btrfs-progs: check: remove unused argument from create_inode_item
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 found;
4703         unsigned int extent_type;
4704         unsigned int is_hole;
4705         int ret;
4706         int err = 0;
4707
4708         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4709
4710         extent_type = btrfs_file_extent_type(node, fi);
4711         /* Skip if file extent is inline */
4712         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4713                 struct btrfs_item *e = btrfs_item_nr(slot);
4714                 u32 item_inline_len;
4715
4716                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4717                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4718                 if (extent_num_bytes == 0 ||
4719                     extent_num_bytes != item_inline_len)
4720                         err |= FILE_EXTENT_ERROR;
4721                 *size += extent_num_bytes;
4722                 return err;
4723         }
4724
4725         /* Check extent type */
4726         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4727                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4728                 err |= FILE_EXTENT_ERROR;
4729                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4730                       root->objectid, fkey->objectid, fkey->offset);
4731                 return err;
4732         }
4733
4734         /* Check REG_EXTENT/PREALLOC_EXTENT */
4735         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4736         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4737         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4738         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4739
4740         /* Check EXTENT_DATA datasum */
4741         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4742         if (found > 0 && nodatasum) {
4743                 err |= ODD_CSUM_ITEM;
4744                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4745                       root->objectid, fkey->objectid, fkey->offset);
4746         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4747                    !is_hole &&
4748                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4749                 err |= CSUM_ITEM_MISSING;
4750                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4753                 err |= ODD_CSUM_ITEM;
4754                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         }
4757
4758         /* Check EXTENT_DATA hole */
4759         if (no_holes && is_hole) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         } else if (!no_holes && *end != fkey->offset) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         }
4768
4769         *end += extent_num_bytes;
4770         if (!is_hole)
4771                 *size += extent_num_bytes;
4772
4773         return err;
4774 }
4775
4776 /*
4777  * Check INODE_ITEM and related ITEMs (the same inode number)
4778  * 1. check link count
4779  * 2. check inode ref/extref
4780  * 3. check dir item/index
4781  *
4782  * @ext_ref:    the EXTENDED_IREF feature
4783  *
4784  * Return 0 if no error occurred.
4785  * Return >0 for error or hit the traversal is done(by error bitmap)
4786  */
4787 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4788                             unsigned int ext_ref)
4789 {
4790         struct extent_buffer *node;
4791         struct btrfs_inode_item *ii;
4792         struct btrfs_key key;
4793         u64 inode_id;
4794         u32 mode;
4795         u64 nlink;
4796         u64 nbytes;
4797         u64 isize;
4798         u64 size = 0;
4799         u64 refs = 0;
4800         u64 extent_end = 0;
4801         u64 extent_size = 0;
4802         unsigned int dir;
4803         unsigned int nodatasum;
4804         int slot;
4805         int ret;
4806         int err = 0;
4807
4808         node = path->nodes[0];
4809         slot = path->slots[0];
4810
4811         btrfs_item_key_to_cpu(node, &key, slot);
4812         inode_id = key.objectid;
4813
4814         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4815                 ret = btrfs_next_item(root, path);
4816                 if (ret > 0)
4817                         err |= LAST_ITEM;
4818                 return err;
4819         }
4820
4821         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4822         isize = btrfs_inode_size(node, ii);
4823         nbytes = btrfs_inode_nbytes(node, ii);
4824         mode = btrfs_inode_mode(node, ii);
4825         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4826         nlink = btrfs_inode_nlink(node, ii);
4827         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4828
4829         while (1) {
4830                 ret = btrfs_next_item(root, path);
4831                 if (ret < 0) {
4832                         /* out will fill 'err' rusing current statistics */
4833                         goto out;
4834                 } else if (ret > 0) {
4835                         err |= LAST_ITEM;
4836                         goto out;
4837                 }
4838
4839                 node = path->nodes[0];
4840                 slot = path->slots[0];
4841                 btrfs_item_key_to_cpu(node, &key, slot);
4842                 if (key.objectid != inode_id)
4843                         goto out;
4844
4845                 switch (key.type) {
4846                 case BTRFS_INODE_REF_KEY:
4847                         ret = check_inode_ref(root, &key, node, slot, &refs,
4848                                               mode);
4849                         err |= ret;
4850                         break;
4851                 case BTRFS_INODE_EXTREF_KEY:
4852                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4853                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4854                                         root->objectid, key.objectid,
4855                                         key.offset);
4856                         ret = check_inode_extref(root, &key, node, slot, &refs,
4857                                                  mode);
4858                         err |= ret;
4859                         break;
4860                 case BTRFS_DIR_ITEM_KEY:
4861                 case BTRFS_DIR_INDEX_KEY:
4862                         if (!dir) {
4863                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4864                                         root->objectid, inode_id,
4865                                         imode_to_type(mode), key.objectid,
4866                                         key.offset);
4867                         }
4868                         ret = check_dir_item(root, &key, node, slot, &size,
4869                                              ext_ref);
4870                         err |= ret;
4871                         break;
4872                 case BTRFS_EXTENT_DATA_KEY:
4873                         if (dir) {
4874                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4875                                         root->objectid, inode_id, key.objectid,
4876                                         key.offset);
4877                         }
4878                         ret = check_file_extent(root, &key, node, slot,
4879                                                 nodatasum, &extent_size,
4880                                                 &extent_end);
4881                         err |= ret;
4882                         break;
4883                 case BTRFS_XATTR_ITEM_KEY:
4884                         break;
4885                 default:
4886                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4887                               key.objectid, key.type, key.offset);
4888                 }
4889         }
4890
4891 out:
4892         /* verify INODE_ITEM nlink/isize/nbytes */
4893         if (dir) {
4894                 if (nlink != 1) {
4895                         err |= LINK_COUNT_ERROR;
4896                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4897                               root->objectid, inode_id, nlink);
4898                 }
4899
4900                 /*
4901                  * Just a warning, as dir inode nbytes is just an
4902                  * instructive value.
4903                  */
4904                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4905                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4906                                 root->objectid, inode_id, root->nodesize);
4907                 }
4908
4909                 if (isize != size) {
4910                         err |= ISIZE_ERROR;
4911                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4912                               root->objectid, inode_id, isize, size);
4913                 }
4914         } else {
4915                 if (nlink != refs) {
4916                         err |= LINK_COUNT_ERROR;
4917                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4918                               root->objectid, inode_id, nlink, refs);
4919                 } else if (!nlink) {
4920                         err |= ORPHAN_ITEM;
4921                 }
4922
4923                 if (!nbytes && !no_holes && extent_end < isize) {
4924                         err |= NBYTES_ERROR;
4925                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4926                               root->objectid, inode_id, isize);
4927                 }
4928
4929                 if (nbytes != extent_size) {
4930                         err |= NBYTES_ERROR;
4931                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4932                               root->objectid, inode_id, nbytes, extent_size);
4933                 }
4934         }
4935
4936         return err;
4937 }
4938
4939 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4940 {
4941         struct btrfs_path path;
4942         struct btrfs_key key;
4943         int err = 0;
4944         int ret;
4945
4946         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4947         key.type = BTRFS_INODE_ITEM_KEY;
4948         key.offset = 0;
4949
4950         /* For root being dropped, we don't need to check first inode */
4951         if (btrfs_root_refs(&root->root_item) == 0 &&
4952             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4953             key.objectid)
4954                 return 0;
4955
4956         btrfs_init_path(&path);
4957
4958         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4959         if (ret < 0)
4960                 goto out;
4961         if (ret > 0) {
4962                 ret = 0;
4963                 err |= INODE_ITEM_MISSING;
4964         }
4965
4966         err |= check_inode_item(root, &path, ext_ref);
4967         err &= ~LAST_ITEM;
4968         if (err && !ret)
4969                 ret = -EIO;
4970 out:
4971         btrfs_release_path(&path);
4972         return ret;
4973 }
4974
4975 /*
4976  * Iterate all item on the tree and call check_inode_item() to check.
4977  *
4978  * @root:       the root of the tree to be checked.
4979  * @ext_ref:    the EXTENDED_IREF feature
4980  *
4981  * Return 0 if no error found.
4982  * Return <0 for error.
4983  */
4984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4985 {
4986         struct btrfs_path path;
4987         struct node_refs nrefs;
4988         struct btrfs_root_item *root_item = &root->root_item;
4989         int ret, wret;
4990         int level;
4991
4992         /*
4993          * We need to manually check the first inode item(256)
4994          * As the following traversal function will only start from
4995          * the first inode item in the leaf, if inode item(256) is missing
4996          * we will just skip it forever.
4997          */
4998         ret = check_fs_first_inode(root, ext_ref);
4999         if (ret < 0)
5000                 return ret;
5001
5002         memset(&nrefs, 0, sizeof(nrefs));
5003         level = btrfs_header_level(root->node);
5004         btrfs_init_path(&path);
5005
5006         if (btrfs_root_refs(root_item) > 0 ||
5007             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5008                 path.nodes[level] = root->node;
5009                 path.slots[level] = 0;
5010                 extent_buffer_get(root->node);
5011         } else {
5012                 struct btrfs_key key;
5013
5014                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5015                 level = root_item->drop_level;
5016                 path.lowest_level = level;
5017                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5018                 if (ret < 0)
5019                         goto out;
5020                 ret = 0;
5021         }
5022
5023         while (1) {
5024                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5025                 if (wret < 0)
5026                         ret = wret;
5027                 if (wret != 0)
5028                         break;
5029
5030                 wret = walk_up_tree_v2(root, &path, &level);
5031                 if (wret < 0)
5032                         ret = wret;
5033                 if (wret != 0)
5034                         break;
5035         }
5036
5037 out:
5038         btrfs_release_path(&path);
5039         return ret;
5040 }
5041
5042 /*
5043  * Find the relative ref for root_ref and root_backref.
5044  *
5045  * @root:       the root of the root tree.
5046  * @ref_key:    the key of the root ref.
5047  *
5048  * Return 0 if no error occurred.
5049  */
5050 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5051                           struct extent_buffer *node, int slot)
5052 {
5053         struct btrfs_path path;
5054         struct btrfs_key key;
5055         struct btrfs_root_ref *ref;
5056         struct btrfs_root_ref *backref;
5057         char ref_name[BTRFS_NAME_LEN] = {0};
5058         char backref_name[BTRFS_NAME_LEN] = {0};
5059         u64 ref_dirid;
5060         u64 ref_seq;
5061         u32 ref_namelen;
5062         u64 backref_dirid;
5063         u64 backref_seq;
5064         u32 backref_namelen;
5065         u32 len;
5066         int ret;
5067         int err = 0;
5068
5069         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5070         ref_dirid = btrfs_root_ref_dirid(node, ref);
5071         ref_seq = btrfs_root_ref_sequence(node, ref);
5072         ref_namelen = btrfs_root_ref_name_len(node, ref);
5073
5074         if (ref_namelen <= BTRFS_NAME_LEN) {
5075                 len = ref_namelen;
5076         } else {
5077                 len = BTRFS_NAME_LEN;
5078                 warning("%s[%llu %llu] ref_name too long",
5079                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5080                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5081                         ref_key->offset);
5082         }
5083         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5084
5085         /* Find relative root_ref */
5086         key.objectid = ref_key->offset;
5087         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5088         key.offset = ref_key->objectid;
5089
5090         btrfs_init_path(&path);
5091         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5092         if (ret) {
5093                 err |= ROOT_REF_MISSING;
5094                 error("%s[%llu %llu] couldn't find relative ref",
5095                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5096                       "ROOT_REF" : "ROOT_BACKREF",
5097                       ref_key->objectid, ref_key->offset);
5098                 goto out;
5099         }
5100
5101         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5102                                  struct btrfs_root_ref);
5103         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5104         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5105         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5106
5107         if (backref_namelen <= BTRFS_NAME_LEN) {
5108                 len = backref_namelen;
5109         } else {
5110                 len = BTRFS_NAME_LEN;
5111                 warning("%s[%llu %llu] ref_name too long",
5112                         key.type == BTRFS_ROOT_REF_KEY ?
5113                         "ROOT_REF" : "ROOT_BACKREF",
5114                         key.objectid, key.offset);
5115         }
5116         read_extent_buffer(path.nodes[0], backref_name,
5117                            (unsigned long)(backref + 1), len);
5118
5119         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5120             ref_namelen != backref_namelen ||
5121             strncmp(ref_name, backref_name, len)) {
5122                 err |= ROOT_REF_MISMATCH;
5123                 error("%s[%llu %llu] mismatch relative ref",
5124                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5125                       "ROOT_REF" : "ROOT_BACKREF",
5126                       ref_key->objectid, ref_key->offset);
5127         }
5128 out:
5129         btrfs_release_path(&path);
5130         return err;
5131 }
5132
5133 /*
5134  * Check all fs/file tree in low_memory mode.
5135  *
5136  * 1. for fs tree root item, call check_fs_root_v2()
5137  * 2. for fs tree root ref/backref, call check_root_ref()
5138  *
5139  * Return 0 if no error occurred.
5140  */
5141 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5142 {
5143         struct btrfs_root *tree_root = fs_info->tree_root;
5144         struct btrfs_root *cur_root = NULL;
5145         struct btrfs_path path;
5146         struct btrfs_key key;
5147         struct extent_buffer *node;
5148         unsigned int ext_ref;
5149         int slot;
5150         int ret;
5151         int err = 0;
5152
5153         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5154
5155         btrfs_init_path(&path);
5156         key.objectid = BTRFS_FS_TREE_OBJECTID;
5157         key.offset = 0;
5158         key.type = BTRFS_ROOT_ITEM_KEY;
5159
5160         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5161         if (ret < 0) {
5162                 err = ret;
5163                 goto out;
5164         } else if (ret > 0) {
5165                 err = -ENOENT;
5166                 goto out;
5167         }
5168
5169         while (1) {
5170                 node = path.nodes[0];
5171                 slot = path.slots[0];
5172                 btrfs_item_key_to_cpu(node, &key, slot);
5173                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5174                         goto out;
5175                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5176                     fs_root_objectid(key.objectid)) {
5177                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5178                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5179                                                                        &key);
5180                         } else {
5181                                 key.offset = (u64)-1;
5182                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5183                         }
5184
5185                         if (IS_ERR(cur_root)) {
5186                                 error("Fail to read fs/subvol tree: %lld",
5187                                       key.objectid);
5188                                 err = -EIO;
5189                                 goto next;
5190                         }
5191
5192                         ret = check_fs_root_v2(cur_root, ext_ref);
5193                         err |= ret;
5194
5195                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5196                                 btrfs_free_fs_root(cur_root);
5197                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5198                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5199                         ret = check_root_ref(tree_root, &key, node, slot);
5200                         err |= ret;
5201                 }
5202 next:
5203                 ret = btrfs_next_item(tree_root, &path);
5204                 if (ret > 0)
5205                         goto out;
5206                 if (ret < 0) {
5207                         err = ret;
5208                         goto out;
5209                 }
5210         }
5211
5212 out:
5213         btrfs_release_path(&path);
5214         return err;
5215 }
5216
5217 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5218 {
5219         struct list_head *cur = rec->backrefs.next;
5220         struct extent_backref *back;
5221         struct tree_backref *tback;
5222         struct data_backref *dback;
5223         u64 found = 0;
5224         int err = 0;
5225
5226         while(cur != &rec->backrefs) {
5227                 back = to_extent_backref(cur);
5228                 cur = cur->next;
5229                 if (!back->found_extent_tree) {
5230                         err = 1;
5231                         if (!print_errs)
5232                                 goto out;
5233                         if (back->is_data) {
5234                                 dback = to_data_backref(back);
5235                                 fprintf(stderr, "Backref %llu %s %llu"
5236                                         " owner %llu offset %llu num_refs %lu"
5237                                         " not found in extent tree\n",
5238                                         (unsigned long long)rec->start,
5239                                         back->full_backref ?
5240                                         "parent" : "root",
5241                                         back->full_backref ?
5242                                         (unsigned long long)dback->parent:
5243                                         (unsigned long long)dback->root,
5244                                         (unsigned long long)dback->owner,
5245                                         (unsigned long long)dback->offset,
5246                                         (unsigned long)dback->num_refs);
5247                         } else {
5248                                 tback = to_tree_backref(back);
5249                                 fprintf(stderr, "Backref %llu parent %llu"
5250                                         " root %llu not found in extent tree\n",
5251                                         (unsigned long long)rec->start,
5252                                         (unsigned long long)tback->parent,
5253                                         (unsigned long long)tback->root);
5254                         }
5255                 }
5256                 if (!back->is_data && !back->found_ref) {
5257                         err = 1;
5258                         if (!print_errs)
5259                                 goto out;
5260                         tback = to_tree_backref(back);
5261                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5262                                 (unsigned long long)rec->start,
5263                                 back->full_backref ? "parent" : "root",
5264                                 back->full_backref ?
5265                                 (unsigned long long)tback->parent :
5266                                 (unsigned long long)tback->root, back);
5267                 }
5268                 if (back->is_data) {
5269                         dback = to_data_backref(back);
5270                         if (dback->found_ref != dback->num_refs) {
5271                                 err = 1;
5272                                 if (!print_errs)
5273                                         goto out;
5274                                 fprintf(stderr, "Incorrect local backref count"
5275                                         " on %llu %s %llu owner %llu"
5276                                         " offset %llu found %u wanted %u back %p\n",
5277                                         (unsigned long long)rec->start,
5278                                         back->full_backref ?
5279                                         "parent" : "root",
5280                                         back->full_backref ?
5281                                         (unsigned long long)dback->parent:
5282                                         (unsigned long long)dback->root,
5283                                         (unsigned long long)dback->owner,
5284                                         (unsigned long long)dback->offset,
5285                                         dback->found_ref, dback->num_refs, back);
5286                         }
5287                         if (dback->disk_bytenr != rec->start) {
5288                                 err = 1;
5289                                 if (!print_errs)
5290                                         goto out;
5291                                 fprintf(stderr, "Backref disk bytenr does not"
5292                                         " match extent record, bytenr=%llu, "
5293                                         "ref bytenr=%llu\n",
5294                                         (unsigned long long)rec->start,
5295                                         (unsigned long long)dback->disk_bytenr);
5296                         }
5297
5298                         if (dback->bytes != rec->nr) {
5299                                 err = 1;
5300                                 if (!print_errs)
5301                                         goto out;
5302                                 fprintf(stderr, "Backref bytes do not match "
5303                                         "extent backref, bytenr=%llu, ref "
5304                                         "bytes=%llu, backref bytes=%llu\n",
5305                                         (unsigned long long)rec->start,
5306                                         (unsigned long long)rec->nr,
5307                                         (unsigned long long)dback->bytes);
5308                         }
5309                 }
5310                 if (!back->is_data) {
5311                         found += 1;
5312                 } else {
5313                         dback = to_data_backref(back);
5314                         found += dback->found_ref;
5315                 }
5316         }
5317         if (found != rec->refs) {
5318                 err = 1;
5319                 if (!print_errs)
5320                         goto out;
5321                 fprintf(stderr, "Incorrect global backref count "
5322                         "on %llu found %llu wanted %llu\n",
5323                         (unsigned long long)rec->start,
5324                         (unsigned long long)found,
5325                         (unsigned long long)rec->refs);
5326         }
5327 out:
5328         return err;
5329 }
5330
5331 static int free_all_extent_backrefs(struct extent_record *rec)
5332 {
5333         struct extent_backref *back;
5334         struct list_head *cur;
5335         while (!list_empty(&rec->backrefs)) {
5336                 cur = rec->backrefs.next;
5337                 back = to_extent_backref(cur);
5338                 list_del(cur);
5339                 free(back);
5340         }
5341         return 0;
5342 }
5343
5344 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5345                                      struct cache_tree *extent_cache)
5346 {
5347         struct cache_extent *cache;
5348         struct extent_record *rec;
5349
5350         while (1) {
5351                 cache = first_cache_extent(extent_cache);
5352                 if (!cache)
5353                         break;
5354                 rec = container_of(cache, struct extent_record, cache);
5355                 remove_cache_extent(extent_cache, cache);
5356                 free_all_extent_backrefs(rec);
5357                 free(rec);
5358         }
5359 }
5360
5361 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5362                                  struct extent_record *rec)
5363 {
5364         if (rec->content_checked && rec->owner_ref_checked &&
5365             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5366             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5367             !rec->bad_full_backref && !rec->crossing_stripes &&
5368             !rec->wrong_chunk_type) {
5369                 remove_cache_extent(extent_cache, &rec->cache);
5370                 free_all_extent_backrefs(rec);
5371                 list_del_init(&rec->list);
5372                 free(rec);
5373         }
5374         return 0;
5375 }
5376
5377 static int check_owner_ref(struct btrfs_root *root,
5378                             struct extent_record *rec,
5379                             struct extent_buffer *buf)
5380 {
5381         struct extent_backref *node;
5382         struct tree_backref *back;
5383         struct btrfs_root *ref_root;
5384         struct btrfs_key key;
5385         struct btrfs_path path;
5386         struct extent_buffer *parent;
5387         int level;
5388         int found = 0;
5389         int ret;
5390
5391         list_for_each_entry(node, &rec->backrefs, list) {
5392                 if (node->is_data)
5393                         continue;
5394                 if (!node->found_ref)
5395                         continue;
5396                 if (node->full_backref)
5397                         continue;
5398                 back = to_tree_backref(node);
5399                 if (btrfs_header_owner(buf) == back->root)
5400                         return 0;
5401         }
5402         BUG_ON(rec->is_root);
5403
5404         /* try to find the block by search corresponding fs tree */
5405         key.objectid = btrfs_header_owner(buf);
5406         key.type = BTRFS_ROOT_ITEM_KEY;
5407         key.offset = (u64)-1;
5408
5409         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5410         if (IS_ERR(ref_root))
5411                 return 1;
5412
5413         level = btrfs_header_level(buf);
5414         if (level == 0)
5415                 btrfs_item_key_to_cpu(buf, &key, 0);
5416         else
5417                 btrfs_node_key_to_cpu(buf, &key, 0);
5418
5419         btrfs_init_path(&path);
5420         path.lowest_level = level + 1;
5421         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5422         if (ret < 0)
5423                 return 0;
5424
5425         parent = path.nodes[level + 1];
5426         if (parent && buf->start == btrfs_node_blockptr(parent,
5427                                                         path.slots[level + 1]))
5428                 found = 1;
5429
5430         btrfs_release_path(&path);
5431         return found ? 0 : 1;
5432 }
5433
5434 static int is_extent_tree_record(struct extent_record *rec)
5435 {
5436         struct list_head *cur = rec->backrefs.next;
5437         struct extent_backref *node;
5438         struct tree_backref *back;
5439         int is_extent = 0;
5440
5441         while(cur != &rec->backrefs) {
5442                 node = to_extent_backref(cur);
5443                 cur = cur->next;
5444                 if (node->is_data)
5445                         return 0;
5446                 back = to_tree_backref(node);
5447                 if (node->full_backref)
5448                         return 0;
5449                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5450                         is_extent = 1;
5451         }
5452         return is_extent;
5453 }
5454
5455
5456 static int record_bad_block_io(struct btrfs_fs_info *info,
5457                                struct cache_tree *extent_cache,
5458                                u64 start, u64 len)
5459 {
5460         struct extent_record *rec;
5461         struct cache_extent *cache;
5462         struct btrfs_key key;
5463
5464         cache = lookup_cache_extent(extent_cache, start, len);
5465         if (!cache)
5466                 return 0;
5467
5468         rec = container_of(cache, struct extent_record, cache);
5469         if (!is_extent_tree_record(rec))
5470                 return 0;
5471
5472         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5473         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5474 }
5475
5476 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5477                        struct extent_buffer *buf, int slot)
5478 {
5479         if (btrfs_header_level(buf)) {
5480                 struct btrfs_key_ptr ptr1, ptr2;
5481
5482                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5483                                    sizeof(struct btrfs_key_ptr));
5484                 read_extent_buffer(buf, &ptr2,
5485                                    btrfs_node_key_ptr_offset(slot + 1),
5486                                    sizeof(struct btrfs_key_ptr));
5487                 write_extent_buffer(buf, &ptr1,
5488                                     btrfs_node_key_ptr_offset(slot + 1),
5489                                     sizeof(struct btrfs_key_ptr));
5490                 write_extent_buffer(buf, &ptr2,
5491                                     btrfs_node_key_ptr_offset(slot),
5492                                     sizeof(struct btrfs_key_ptr));
5493                 if (slot == 0) {
5494                         struct btrfs_disk_key key;
5495                         btrfs_node_key(buf, &key, 0);
5496                         btrfs_fixup_low_keys(root, path, &key,
5497                                              btrfs_header_level(buf) + 1);
5498                 }
5499         } else {
5500                 struct btrfs_item *item1, *item2;
5501                 struct btrfs_key k1, k2;
5502                 char *item1_data, *item2_data;
5503                 u32 item1_offset, item2_offset, item1_size, item2_size;
5504
5505                 item1 = btrfs_item_nr(slot);
5506                 item2 = btrfs_item_nr(slot + 1);
5507                 btrfs_item_key_to_cpu(buf, &k1, slot);
5508                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5509                 item1_offset = btrfs_item_offset(buf, item1);
5510                 item2_offset = btrfs_item_offset(buf, item2);
5511                 item1_size = btrfs_item_size(buf, item1);
5512                 item2_size = btrfs_item_size(buf, item2);
5513
5514                 item1_data = malloc(item1_size);
5515                 if (!item1_data)
5516                         return -ENOMEM;
5517                 item2_data = malloc(item2_size);
5518                 if (!item2_data) {
5519                         free(item1_data);
5520                         return -ENOMEM;
5521                 }
5522
5523                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5524                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5525
5526                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5527                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5528                 free(item1_data);
5529                 free(item2_data);
5530
5531                 btrfs_set_item_offset(buf, item1, item2_offset);
5532                 btrfs_set_item_offset(buf, item2, item1_offset);
5533                 btrfs_set_item_size(buf, item1, item2_size);
5534                 btrfs_set_item_size(buf, item2, item1_size);
5535
5536                 path->slots[0] = slot;
5537                 btrfs_set_item_key_unsafe(root, path, &k2);
5538                 path->slots[0] = slot + 1;
5539                 btrfs_set_item_key_unsafe(root, path, &k1);
5540         }
5541         return 0;
5542 }
5543
5544 static int fix_key_order(struct btrfs_trans_handle *trans,
5545                          struct btrfs_root *root,
5546                          struct btrfs_path *path)
5547 {
5548         struct extent_buffer *buf;
5549         struct btrfs_key k1, k2;
5550         int i;
5551         int level = path->lowest_level;
5552         int ret = -EIO;
5553
5554         buf = path->nodes[level];
5555         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5556                 if (level) {
5557                         btrfs_node_key_to_cpu(buf, &k1, i);
5558                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5559                 } else {
5560                         btrfs_item_key_to_cpu(buf, &k1, i);
5561                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5562                 }
5563                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5564                         continue;
5565                 ret = swap_values(root, path, buf, i);
5566                 if (ret)
5567                         break;
5568                 btrfs_mark_buffer_dirty(buf);
5569                 i = 0;
5570         }
5571         return ret;
5572 }
5573
5574 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5575                              struct btrfs_root *root,
5576                              struct btrfs_path *path,
5577                              struct extent_buffer *buf, int slot)
5578 {
5579         struct btrfs_key key;
5580         int nritems = btrfs_header_nritems(buf);
5581
5582         btrfs_item_key_to_cpu(buf, &key, slot);
5583
5584         /* These are all the keys we can deal with missing. */
5585         if (key.type != BTRFS_DIR_INDEX_KEY &&
5586             key.type != BTRFS_EXTENT_ITEM_KEY &&
5587             key.type != BTRFS_METADATA_ITEM_KEY &&
5588             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5589             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5590                 return -1;
5591
5592         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5593                (unsigned long long)key.objectid, key.type,
5594                (unsigned long long)key.offset, slot, buf->start);
5595         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5596                               btrfs_item_nr_offset(slot + 1),
5597                               sizeof(struct btrfs_item) *
5598                               (nritems - slot - 1));
5599         btrfs_set_header_nritems(buf, nritems - 1);
5600         if (slot == 0) {
5601                 struct btrfs_disk_key disk_key;
5602
5603                 btrfs_item_key(buf, &disk_key, 0);
5604                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5605         }
5606         btrfs_mark_buffer_dirty(buf);
5607         return 0;
5608 }
5609
5610 static int fix_item_offset(struct btrfs_trans_handle *trans,
5611                            struct btrfs_root *root,
5612                            struct btrfs_path *path)
5613 {
5614         struct extent_buffer *buf;
5615         int i;
5616         int ret = 0;
5617
5618         /* We should only get this for leaves */
5619         BUG_ON(path->lowest_level);
5620         buf = path->nodes[0];
5621 again:
5622         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5623                 unsigned int shift = 0, offset;
5624
5625                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5626                     BTRFS_LEAF_DATA_SIZE(root)) {
5627                         if (btrfs_item_end_nr(buf, i) >
5628                             BTRFS_LEAF_DATA_SIZE(root)) {
5629                                 ret = delete_bogus_item(trans, root, path,
5630                                                         buf, i);
5631                                 if (!ret)
5632                                         goto again;
5633                                 fprintf(stderr, "item is off the end of the "
5634                                         "leaf, can't fix\n");
5635                                 ret = -EIO;
5636                                 break;
5637                         }
5638                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5639                                 btrfs_item_end_nr(buf, i);
5640                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5641                            btrfs_item_offset_nr(buf, i - 1)) {
5642                         if (btrfs_item_end_nr(buf, i) >
5643                             btrfs_item_offset_nr(buf, i - 1)) {
5644                                 ret = delete_bogus_item(trans, root, path,
5645                                                         buf, i);
5646                                 if (!ret)
5647                                         goto again;
5648                                 fprintf(stderr, "items overlap, can't fix\n");
5649                                 ret = -EIO;
5650                                 break;
5651                         }
5652                         shift = btrfs_item_offset_nr(buf, i - 1) -
5653                                 btrfs_item_end_nr(buf, i);
5654                 }
5655                 if (!shift)
5656                         continue;
5657
5658                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5659                        i, shift, (unsigned long long)buf->start);
5660                 offset = btrfs_item_offset_nr(buf, i);
5661                 memmove_extent_buffer(buf,
5662                                       btrfs_leaf_data(buf) + offset + shift,
5663                                       btrfs_leaf_data(buf) + offset,
5664                                       btrfs_item_size_nr(buf, i));
5665                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5666                                       offset + shift);
5667                 btrfs_mark_buffer_dirty(buf);
5668         }
5669
5670         /*
5671          * We may have moved things, in which case we want to exit so we don't
5672          * write those changes out.  Once we have proper abort functionality in
5673          * progs this can be changed to something nicer.
5674          */
5675         BUG_ON(ret);
5676         return ret;
5677 }
5678
5679 /*
5680  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5681  * then just return -EIO.
5682  */
5683 static int try_to_fix_bad_block(struct btrfs_root *root,
5684                                 struct extent_buffer *buf,
5685                                 enum btrfs_tree_block_status status)
5686 {
5687         struct btrfs_trans_handle *trans;
5688         struct ulist *roots;
5689         struct ulist_node *node;
5690         struct btrfs_root *search_root;
5691         struct btrfs_path path;
5692         struct ulist_iterator iter;
5693         struct btrfs_key root_key, key;
5694         int ret;
5695
5696         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5697             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5698                 return -EIO;
5699
5700         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5701         if (ret)
5702                 return -EIO;
5703
5704         btrfs_init_path(&path);
5705         ULIST_ITER_INIT(&iter);
5706         while ((node = ulist_next(roots, &iter))) {
5707                 root_key.objectid = node->val;
5708                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5709                 root_key.offset = (u64)-1;
5710
5711                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5712                 if (IS_ERR(root)) {
5713                         ret = -EIO;
5714                         break;
5715                 }
5716
5717
5718                 trans = btrfs_start_transaction(search_root, 0);
5719                 if (IS_ERR(trans)) {
5720                         ret = PTR_ERR(trans);
5721                         break;
5722                 }
5723
5724                 path.lowest_level = btrfs_header_level(buf);
5725                 path.skip_check_block = 1;
5726                 if (path.lowest_level)
5727                         btrfs_node_key_to_cpu(buf, &key, 0);
5728                 else
5729                         btrfs_item_key_to_cpu(buf, &key, 0);
5730                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5731                 if (ret) {
5732                         ret = -EIO;
5733                         btrfs_commit_transaction(trans, search_root);
5734                         break;
5735                 }
5736                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5737                         ret = fix_key_order(trans, search_root, &path);
5738                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5739                         ret = fix_item_offset(trans, search_root, &path);
5740                 if (ret) {
5741                         btrfs_commit_transaction(trans, search_root);
5742                         break;
5743                 }
5744                 btrfs_release_path(&path);
5745                 btrfs_commit_transaction(trans, search_root);
5746         }
5747         ulist_free(roots);
5748         btrfs_release_path(&path);
5749         return ret;
5750 }
5751
5752 static int check_block(struct btrfs_root *root,
5753                        struct cache_tree *extent_cache,
5754                        struct extent_buffer *buf, u64 flags)
5755 {
5756         struct extent_record *rec;
5757         struct cache_extent *cache;
5758         struct btrfs_key key;
5759         enum btrfs_tree_block_status status;
5760         int ret = 0;
5761         int level;
5762
5763         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5764         if (!cache)
5765                 return 1;
5766         rec = container_of(cache, struct extent_record, cache);
5767         rec->generation = btrfs_header_generation(buf);
5768
5769         level = btrfs_header_level(buf);
5770         if (btrfs_header_nritems(buf) > 0) {
5771
5772                 if (level == 0)
5773                         btrfs_item_key_to_cpu(buf, &key, 0);
5774                 else
5775                         btrfs_node_key_to_cpu(buf, &key, 0);
5776
5777                 rec->info_objectid = key.objectid;
5778         }
5779         rec->info_level = level;
5780
5781         if (btrfs_is_leaf(buf))
5782                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5783         else
5784                 status = btrfs_check_node(root, &rec->parent_key, buf);
5785
5786         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5787                 if (repair)
5788                         status = try_to_fix_bad_block(root, buf, status);
5789                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5790                         ret = -EIO;
5791                         fprintf(stderr, "bad block %llu\n",
5792                                 (unsigned long long)buf->start);
5793                 } else {
5794                         /*
5795                          * Signal to callers we need to start the scan over
5796                          * again since we'll have cowed blocks.
5797                          */
5798                         ret = -EAGAIN;
5799                 }
5800         } else {
5801                 rec->content_checked = 1;
5802                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5803                         rec->owner_ref_checked = 1;
5804                 else {
5805                         ret = check_owner_ref(root, rec, buf);
5806                         if (!ret)
5807                                 rec->owner_ref_checked = 1;
5808                 }
5809         }
5810         if (!ret)
5811                 maybe_free_extent_rec(extent_cache, rec);
5812         return ret;
5813 }
5814
5815 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5816                                                 u64 parent, u64 root)
5817 {
5818         struct list_head *cur = rec->backrefs.next;
5819         struct extent_backref *node;
5820         struct tree_backref *back;
5821
5822         while(cur != &rec->backrefs) {
5823                 node = to_extent_backref(cur);
5824                 cur = cur->next;
5825                 if (node->is_data)
5826                         continue;
5827                 back = to_tree_backref(node);
5828                 if (parent > 0) {
5829                         if (!node->full_backref)
5830                                 continue;
5831                         if (parent == back->parent)
5832                                 return back;
5833                 } else {
5834                         if (node->full_backref)
5835                                 continue;
5836                         if (back->root == root)
5837                                 return back;
5838                 }
5839         }
5840         return NULL;
5841 }
5842
5843 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5844                                                 u64 parent, u64 root)
5845 {
5846         struct tree_backref *ref = malloc(sizeof(*ref));
5847
5848         if (!ref)
5849                 return NULL;
5850         memset(&ref->node, 0, sizeof(ref->node));
5851         if (parent > 0) {
5852                 ref->parent = parent;
5853                 ref->node.full_backref = 1;
5854         } else {
5855                 ref->root = root;
5856                 ref->node.full_backref = 0;
5857         }
5858         list_add_tail(&ref->node.list, &rec->backrefs);
5859
5860         return ref;
5861 }
5862
5863 static struct data_backref *find_data_backref(struct extent_record *rec,
5864                                                 u64 parent, u64 root,
5865                                                 u64 owner, u64 offset,
5866                                                 int found_ref,
5867                                                 u64 disk_bytenr, u64 bytes)
5868 {
5869         struct list_head *cur = rec->backrefs.next;
5870         struct extent_backref *node;
5871         struct data_backref *back;
5872
5873         while(cur != &rec->backrefs) {
5874                 node = to_extent_backref(cur);
5875                 cur = cur->next;
5876                 if (!node->is_data)
5877                         continue;
5878                 back = to_data_backref(node);
5879                 if (parent > 0) {
5880                         if (!node->full_backref)
5881                                 continue;
5882                         if (parent == back->parent)
5883                                 return back;
5884                 } else {
5885                         if (node->full_backref)
5886                                 continue;
5887                         if (back->root == root && back->owner == owner &&
5888                             back->offset == offset) {
5889                                 if (found_ref && node->found_ref &&
5890                                     (back->bytes != bytes ||
5891                                     back->disk_bytenr != disk_bytenr))
5892                                         continue;
5893                                 return back;
5894                         }
5895                 }
5896         }
5897         return NULL;
5898 }
5899
5900 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5901                                                 u64 parent, u64 root,
5902                                                 u64 owner, u64 offset,
5903                                                 u64 max_size)
5904 {
5905         struct data_backref *ref = malloc(sizeof(*ref));
5906
5907         if (!ref)
5908                 return NULL;
5909         memset(&ref->node, 0, sizeof(ref->node));
5910         ref->node.is_data = 1;
5911
5912         if (parent > 0) {
5913                 ref->parent = parent;
5914                 ref->owner = 0;
5915                 ref->offset = 0;
5916                 ref->node.full_backref = 1;
5917         } else {
5918                 ref->root = root;
5919                 ref->owner = owner;
5920                 ref->offset = offset;
5921                 ref->node.full_backref = 0;
5922         }
5923         ref->bytes = max_size;
5924         ref->found_ref = 0;
5925         ref->num_refs = 0;
5926         list_add_tail(&ref->node.list, &rec->backrefs);
5927         if (max_size > rec->max_size)
5928                 rec->max_size = max_size;
5929         return ref;
5930 }
5931
5932 /* Check if the type of extent matches with its chunk */
5933 static void check_extent_type(struct extent_record *rec)
5934 {
5935         struct btrfs_block_group_cache *bg_cache;
5936
5937         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5938         if (!bg_cache)
5939                 return;
5940
5941         /* data extent, check chunk directly*/
5942         if (!rec->metadata) {
5943                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5944                         rec->wrong_chunk_type = 1;
5945                 return;
5946         }
5947
5948         /* metadata extent, check the obvious case first */
5949         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5950                                  BTRFS_BLOCK_GROUP_METADATA))) {
5951                 rec->wrong_chunk_type = 1;
5952                 return;
5953         }
5954
5955         /*
5956          * Check SYSTEM extent, as it's also marked as metadata, we can only
5957          * make sure it's a SYSTEM extent by its backref
5958          */
5959         if (!list_empty(&rec->backrefs)) {
5960                 struct extent_backref *node;
5961                 struct tree_backref *tback;
5962                 u64 bg_type;
5963
5964                 node = to_extent_backref(rec->backrefs.next);
5965                 if (node->is_data) {
5966                         /* tree block shouldn't have data backref */
5967                         rec->wrong_chunk_type = 1;
5968                         return;
5969                 }
5970                 tback = container_of(node, struct tree_backref, node);
5971
5972                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5973                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5974                 else
5975                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5976                 if (!(bg_cache->flags & bg_type))
5977                         rec->wrong_chunk_type = 1;
5978         }
5979 }
5980
5981 /*
5982  * Allocate a new extent record, fill default values from @tmpl and insert int
5983  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5984  * the cache, otherwise it fails.
5985  */
5986 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5987                 struct extent_record *tmpl)
5988 {
5989         struct extent_record *rec;
5990         int ret = 0;
5991
5992         rec = malloc(sizeof(*rec));
5993         if (!rec)
5994                 return -ENOMEM;
5995         rec->start = tmpl->start;
5996         rec->max_size = tmpl->max_size;
5997         rec->nr = max(tmpl->nr, tmpl->max_size);
5998         rec->found_rec = tmpl->found_rec;
5999         rec->content_checked = tmpl->content_checked;
6000         rec->owner_ref_checked = tmpl->owner_ref_checked;
6001         rec->num_duplicates = 0;
6002         rec->metadata = tmpl->metadata;
6003         rec->flag_block_full_backref = FLAG_UNSET;
6004         rec->bad_full_backref = 0;
6005         rec->crossing_stripes = 0;
6006         rec->wrong_chunk_type = 0;
6007         rec->is_root = tmpl->is_root;
6008         rec->refs = tmpl->refs;
6009         rec->extent_item_refs = tmpl->extent_item_refs;
6010         rec->parent_generation = tmpl->parent_generation;
6011         INIT_LIST_HEAD(&rec->backrefs);
6012         INIT_LIST_HEAD(&rec->dups);
6013         INIT_LIST_HEAD(&rec->list);
6014         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6015         rec->cache.start = tmpl->start;
6016         rec->cache.size = tmpl->nr;
6017         ret = insert_cache_extent(extent_cache, &rec->cache);
6018         if (ret) {
6019                 free(rec);
6020                 return ret;
6021         }
6022         bytes_used += rec->nr;
6023
6024         if (tmpl->metadata)
6025                 rec->crossing_stripes = check_crossing_stripes(global_info,
6026                                 rec->start, global_info->tree_root->nodesize);
6027         check_extent_type(rec);
6028         return ret;
6029 }
6030
6031 /*
6032  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6033  * some are hints:
6034  * - refs              - if found, increase refs
6035  * - is_root           - if found, set
6036  * - content_checked   - if found, set
6037  * - owner_ref_checked - if found, set
6038  *
6039  * If not found, create a new one, initialize and insert.
6040  */
6041 static int add_extent_rec(struct cache_tree *extent_cache,
6042                 struct extent_record *tmpl)
6043 {
6044         struct extent_record *rec;
6045         struct cache_extent *cache;
6046         int ret = 0;
6047         int dup = 0;
6048
6049         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6050         if (cache) {
6051                 rec = container_of(cache, struct extent_record, cache);
6052                 if (tmpl->refs)
6053                         rec->refs++;
6054                 if (rec->nr == 1)
6055                         rec->nr = max(tmpl->nr, tmpl->max_size);
6056
6057                 /*
6058                  * We need to make sure to reset nr to whatever the extent
6059                  * record says was the real size, this way we can compare it to
6060                  * the backrefs.
6061                  */
6062                 if (tmpl->found_rec) {
6063                         if (tmpl->start != rec->start || rec->found_rec) {
6064                                 struct extent_record *tmp;
6065
6066                                 dup = 1;
6067                                 if (list_empty(&rec->list))
6068                                         list_add_tail(&rec->list,
6069                                                       &duplicate_extents);
6070
6071                                 /*
6072                                  * We have to do this song and dance in case we
6073                                  * find an extent record that falls inside of
6074                                  * our current extent record but does not have
6075                                  * the same objectid.
6076                                  */
6077                                 tmp = malloc(sizeof(*tmp));
6078                                 if (!tmp)
6079                                         return -ENOMEM;
6080                                 tmp->start = tmpl->start;
6081                                 tmp->max_size = tmpl->max_size;
6082                                 tmp->nr = tmpl->nr;
6083                                 tmp->found_rec = 1;
6084                                 tmp->metadata = tmpl->metadata;
6085                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6086                                 INIT_LIST_HEAD(&tmp->list);
6087                                 list_add_tail(&tmp->list, &rec->dups);
6088                                 rec->num_duplicates++;
6089                         } else {
6090                                 rec->nr = tmpl->nr;
6091                                 rec->found_rec = 1;
6092                         }
6093                 }
6094
6095                 if (tmpl->extent_item_refs && !dup) {
6096                         if (rec->extent_item_refs) {
6097                                 fprintf(stderr, "block %llu rec "
6098                                         "extent_item_refs %llu, passed %llu\n",
6099                                         (unsigned long long)tmpl->start,
6100                                         (unsigned long long)
6101                                                         rec->extent_item_refs,
6102                                         (unsigned long long)tmpl->extent_item_refs);
6103                         }
6104                         rec->extent_item_refs = tmpl->extent_item_refs;
6105                 }
6106                 if (tmpl->is_root)
6107                         rec->is_root = 1;
6108                 if (tmpl->content_checked)
6109                         rec->content_checked = 1;
6110                 if (tmpl->owner_ref_checked)
6111                         rec->owner_ref_checked = 1;
6112                 memcpy(&rec->parent_key, &tmpl->parent_key,
6113                                 sizeof(tmpl->parent_key));
6114                 if (tmpl->parent_generation)
6115                         rec->parent_generation = tmpl->parent_generation;
6116                 if (rec->max_size < tmpl->max_size)
6117                         rec->max_size = tmpl->max_size;
6118
6119                 /*
6120                  * A metadata extent can't cross stripe_len boundary, otherwise
6121                  * kernel scrub won't be able to handle it.
6122                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6123                  * it.
6124                  */
6125                 if (tmpl->metadata)
6126                         rec->crossing_stripes = check_crossing_stripes(
6127                                         global_info, rec->start,
6128                                         global_info->tree_root->nodesize);
6129                 check_extent_type(rec);
6130                 maybe_free_extent_rec(extent_cache, rec);
6131                 return ret;
6132         }
6133
6134         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6135
6136         return ret;
6137 }
6138
6139 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6140                             u64 parent, u64 root, int found_ref)
6141 {
6142         struct extent_record *rec;
6143         struct tree_backref *back;
6144         struct cache_extent *cache;
6145         int ret;
6146
6147         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6148         if (!cache) {
6149                 struct extent_record tmpl;
6150
6151                 memset(&tmpl, 0, sizeof(tmpl));
6152                 tmpl.start = bytenr;
6153                 tmpl.nr = 1;
6154                 tmpl.metadata = 1;
6155
6156                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6157                 if (ret)
6158                         return ret;
6159
6160                 /* really a bug in cache_extent implement now */
6161                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6162                 if (!cache)
6163                         return -ENOENT;
6164         }
6165
6166         rec = container_of(cache, struct extent_record, cache);
6167         if (rec->start != bytenr) {
6168                 /*
6169                  * Several cause, from unaligned bytenr to over lapping extents
6170                  */
6171                 return -EEXIST;
6172         }
6173
6174         back = find_tree_backref(rec, parent, root);
6175         if (!back) {
6176                 back = alloc_tree_backref(rec, parent, root);
6177                 if (!back)
6178                         return -ENOMEM;
6179         }
6180
6181         if (found_ref) {
6182                 if (back->node.found_ref) {
6183                         fprintf(stderr, "Extent back ref already exists "
6184                                 "for %llu parent %llu root %llu \n",
6185                                 (unsigned long long)bytenr,
6186                                 (unsigned long long)parent,
6187                                 (unsigned long long)root);
6188                 }
6189                 back->node.found_ref = 1;
6190         } else {
6191                 if (back->node.found_extent_tree) {
6192                         fprintf(stderr, "Extent back ref already exists "
6193                                 "for %llu parent %llu root %llu \n",
6194                                 (unsigned long long)bytenr,
6195                                 (unsigned long long)parent,
6196                                 (unsigned long long)root);
6197                 }
6198                 back->node.found_extent_tree = 1;
6199         }
6200         check_extent_type(rec);
6201         maybe_free_extent_rec(extent_cache, rec);
6202         return 0;
6203 }
6204
6205 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6206                             u64 parent, u64 root, u64 owner, u64 offset,
6207                             u32 num_refs, int found_ref, u64 max_size)
6208 {
6209         struct extent_record *rec;
6210         struct data_backref *back;
6211         struct cache_extent *cache;
6212         int ret;
6213
6214         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6215         if (!cache) {
6216                 struct extent_record tmpl;
6217
6218                 memset(&tmpl, 0, sizeof(tmpl));
6219                 tmpl.start = bytenr;
6220                 tmpl.nr = 1;
6221                 tmpl.max_size = max_size;
6222
6223                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6224                 if (ret)
6225                         return ret;
6226
6227                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6228                 if (!cache)
6229                         abort();
6230         }
6231
6232         rec = container_of(cache, struct extent_record, cache);
6233         if (rec->max_size < max_size)
6234                 rec->max_size = max_size;
6235
6236         /*
6237          * If found_ref is set then max_size is the real size and must match the
6238          * existing refs.  So if we have already found a ref then we need to
6239          * make sure that this ref matches the existing one, otherwise we need
6240          * to add a new backref so we can notice that the backrefs don't match
6241          * and we need to figure out who is telling the truth.  This is to
6242          * account for that awful fsync bug I introduced where we'd end up with
6243          * a btrfs_file_extent_item that would have its length include multiple
6244          * prealloc extents or point inside of a prealloc extent.
6245          */
6246         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6247                                  bytenr, max_size);
6248         if (!back) {
6249                 back = alloc_data_backref(rec, parent, root, owner, offset,
6250                                           max_size);
6251                 BUG_ON(!back);
6252         }
6253
6254         if (found_ref) {
6255                 BUG_ON(num_refs != 1);
6256                 if (back->node.found_ref)
6257                         BUG_ON(back->bytes != max_size);
6258                 back->node.found_ref = 1;
6259                 back->found_ref += 1;
6260                 back->bytes = max_size;
6261                 back->disk_bytenr = bytenr;
6262                 rec->refs += 1;
6263                 rec->content_checked = 1;
6264                 rec->owner_ref_checked = 1;
6265         } else {
6266                 if (back->node.found_extent_tree) {
6267                         fprintf(stderr, "Extent back ref already exists "
6268                                 "for %llu parent %llu root %llu "
6269                                 "owner %llu offset %llu num_refs %lu\n",
6270                                 (unsigned long long)bytenr,
6271                                 (unsigned long long)parent,
6272                                 (unsigned long long)root,
6273                                 (unsigned long long)owner,
6274                                 (unsigned long long)offset,
6275                                 (unsigned long)num_refs);
6276                 }
6277                 back->num_refs = num_refs;
6278                 back->node.found_extent_tree = 1;
6279         }
6280         maybe_free_extent_rec(extent_cache, rec);
6281         return 0;
6282 }
6283
6284 static int add_pending(struct cache_tree *pending,
6285                        struct cache_tree *seen, u64 bytenr, u32 size)
6286 {
6287         int ret;
6288         ret = add_cache_extent(seen, bytenr, size);
6289         if (ret)
6290                 return ret;
6291         add_cache_extent(pending, bytenr, size);
6292         return 0;
6293 }
6294
6295 static int pick_next_pending(struct cache_tree *pending,
6296                         struct cache_tree *reada,
6297                         struct cache_tree *nodes,
6298                         u64 last, struct block_info *bits, int bits_nr,
6299                         int *reada_bits)
6300 {
6301         unsigned long node_start = last;
6302         struct cache_extent *cache;
6303         int ret;
6304
6305         cache = search_cache_extent(reada, 0);
6306         if (cache) {
6307                 bits[0].start = cache->start;
6308                 bits[0].size = cache->size;
6309                 *reada_bits = 1;
6310                 return 1;
6311         }
6312         *reada_bits = 0;
6313         if (node_start > 32768)
6314                 node_start -= 32768;
6315
6316         cache = search_cache_extent(nodes, node_start);
6317         if (!cache)
6318                 cache = search_cache_extent(nodes, 0);
6319
6320         if (!cache) {
6321                  cache = search_cache_extent(pending, 0);
6322                  if (!cache)
6323                          return 0;
6324                  ret = 0;
6325                  do {
6326                          bits[ret].start = cache->start;
6327                          bits[ret].size = cache->size;
6328                          cache = next_cache_extent(cache);
6329                          ret++;
6330                  } while (cache && ret < bits_nr);
6331                  return ret;
6332         }
6333
6334         ret = 0;
6335         do {
6336                 bits[ret].start = cache->start;
6337                 bits[ret].size = cache->size;
6338                 cache = next_cache_extent(cache);
6339                 ret++;
6340         } while (cache && ret < bits_nr);
6341
6342         if (bits_nr - ret > 8) {
6343                 u64 lookup = bits[0].start + bits[0].size;
6344                 struct cache_extent *next;
6345                 next = search_cache_extent(pending, lookup);
6346                 while(next) {
6347                         if (next->start - lookup > 32768)
6348                                 break;
6349                         bits[ret].start = next->start;
6350                         bits[ret].size = next->size;
6351                         lookup = next->start + next->size;
6352                         ret++;
6353                         if (ret == bits_nr)
6354                                 break;
6355                         next = next_cache_extent(next);
6356                         if (!next)
6357                                 break;
6358                 }
6359         }
6360         return ret;
6361 }
6362
6363 static void free_chunk_record(struct cache_extent *cache)
6364 {
6365         struct chunk_record *rec;
6366
6367         rec = container_of(cache, struct chunk_record, cache);
6368         list_del_init(&rec->list);
6369         list_del_init(&rec->dextents);
6370         free(rec);
6371 }
6372
6373 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6374 {
6375         cache_tree_free_extents(chunk_cache, free_chunk_record);
6376 }
6377
6378 static void free_device_record(struct rb_node *node)
6379 {
6380         struct device_record *rec;
6381
6382         rec = container_of(node, struct device_record, node);
6383         free(rec);
6384 }
6385
6386 FREE_RB_BASED_TREE(device_cache, free_device_record);
6387
6388 int insert_block_group_record(struct block_group_tree *tree,
6389                               struct block_group_record *bg_rec)
6390 {
6391         int ret;
6392
6393         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6394         if (ret)
6395                 return ret;
6396
6397         list_add_tail(&bg_rec->list, &tree->block_groups);
6398         return 0;
6399 }
6400
6401 static void free_block_group_record(struct cache_extent *cache)
6402 {
6403         struct block_group_record *rec;
6404
6405         rec = container_of(cache, struct block_group_record, cache);
6406         list_del_init(&rec->list);
6407         free(rec);
6408 }
6409
6410 void free_block_group_tree(struct block_group_tree *tree)
6411 {
6412         cache_tree_free_extents(&tree->tree, free_block_group_record);
6413 }
6414
6415 int insert_device_extent_record(struct device_extent_tree *tree,
6416                                 struct device_extent_record *de_rec)
6417 {
6418         int ret;
6419
6420         /*
6421          * Device extent is a bit different from the other extents, because
6422          * the extents which belong to the different devices may have the
6423          * same start and size, so we need use the special extent cache
6424          * search/insert functions.
6425          */
6426         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6427         if (ret)
6428                 return ret;
6429
6430         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6431         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6432         return 0;
6433 }
6434
6435 static void free_device_extent_record(struct cache_extent *cache)
6436 {
6437         struct device_extent_record *rec;
6438
6439         rec = container_of(cache, struct device_extent_record, cache);
6440         if (!list_empty(&rec->chunk_list))
6441                 list_del_init(&rec->chunk_list);
6442         if (!list_empty(&rec->device_list))
6443                 list_del_init(&rec->device_list);
6444         free(rec);
6445 }
6446
6447 void free_device_extent_tree(struct device_extent_tree *tree)
6448 {
6449         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6450 }
6451
6452 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6453 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6454                                  struct extent_buffer *leaf, int slot)
6455 {
6456         struct btrfs_extent_ref_v0 *ref0;
6457         struct btrfs_key key;
6458         int ret;
6459
6460         btrfs_item_key_to_cpu(leaf, &key, slot);
6461         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6462         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6463                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6464                                 0, 0);
6465         } else {
6466                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6467                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6468         }
6469         return ret;
6470 }
6471 #endif
6472
6473 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6474                                             struct btrfs_key *key,
6475                                             int slot)
6476 {
6477         struct btrfs_chunk *ptr;
6478         struct chunk_record *rec;
6479         int num_stripes, i;
6480
6481         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6482         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6483
6484         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6485         if (!rec) {
6486                 fprintf(stderr, "memory allocation failed\n");
6487                 exit(-1);
6488         }
6489
6490         INIT_LIST_HEAD(&rec->list);
6491         INIT_LIST_HEAD(&rec->dextents);
6492         rec->bg_rec = NULL;
6493
6494         rec->cache.start = key->offset;
6495         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6496
6497         rec->generation = btrfs_header_generation(leaf);
6498
6499         rec->objectid = key->objectid;
6500         rec->type = key->type;
6501         rec->offset = key->offset;
6502
6503         rec->length = rec->cache.size;
6504         rec->owner = btrfs_chunk_owner(leaf, ptr);
6505         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6506         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6507         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6508         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6509         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6510         rec->num_stripes = num_stripes;
6511         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6512
6513         for (i = 0; i < rec->num_stripes; ++i) {
6514                 rec->stripes[i].devid =
6515                         btrfs_stripe_devid_nr(leaf, ptr, i);
6516                 rec->stripes[i].offset =
6517                         btrfs_stripe_offset_nr(leaf, ptr, i);
6518                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6519                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6520                                 BTRFS_UUID_SIZE);
6521         }
6522
6523         return rec;
6524 }
6525
6526 static int process_chunk_item(struct cache_tree *chunk_cache,
6527                               struct btrfs_key *key, struct extent_buffer *eb,
6528                               int slot)
6529 {
6530         struct chunk_record *rec;
6531         struct btrfs_chunk *chunk;
6532         int ret = 0;
6533
6534         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6535         /*
6536          * Do extra check for this chunk item,
6537          *
6538          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6539          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6540          * and owner<->key_type check.
6541          */
6542         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6543                                       key->offset);
6544         if (ret < 0) {
6545                 error("chunk(%llu, %llu) is not valid, ignore it",
6546                       key->offset, btrfs_chunk_length(eb, chunk));
6547                 return 0;
6548         }
6549         rec = btrfs_new_chunk_record(eb, key, slot);
6550         ret = insert_cache_extent(chunk_cache, &rec->cache);
6551         if (ret) {
6552                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6553                         rec->offset, rec->length);
6554                 free(rec);
6555         }
6556
6557         return ret;
6558 }
6559
6560 static int process_device_item(struct rb_root *dev_cache,
6561                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6562 {
6563         struct btrfs_dev_item *ptr;
6564         struct device_record *rec;
6565         int ret = 0;
6566
6567         ptr = btrfs_item_ptr(eb,
6568                 slot, struct btrfs_dev_item);
6569
6570         rec = malloc(sizeof(*rec));
6571         if (!rec) {
6572                 fprintf(stderr, "memory allocation failed\n");
6573                 return -ENOMEM;
6574         }
6575
6576         rec->devid = key->offset;
6577         rec->generation = btrfs_header_generation(eb);
6578
6579         rec->objectid = key->objectid;
6580         rec->type = key->type;
6581         rec->offset = key->offset;
6582
6583         rec->devid = btrfs_device_id(eb, ptr);
6584         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6585         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6586
6587         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6588         if (ret) {
6589                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6590                 free(rec);
6591         }
6592
6593         return ret;
6594 }
6595
6596 struct block_group_record *
6597 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6598                              int slot)
6599 {
6600         struct btrfs_block_group_item *ptr;
6601         struct block_group_record *rec;
6602
6603         rec = calloc(1, sizeof(*rec));
6604         if (!rec) {
6605                 fprintf(stderr, "memory allocation failed\n");
6606                 exit(-1);
6607         }
6608
6609         rec->cache.start = key->objectid;
6610         rec->cache.size = key->offset;
6611
6612         rec->generation = btrfs_header_generation(leaf);
6613
6614         rec->objectid = key->objectid;
6615         rec->type = key->type;
6616         rec->offset = key->offset;
6617
6618         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6619         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6620
6621         INIT_LIST_HEAD(&rec->list);
6622
6623         return rec;
6624 }
6625
6626 static int process_block_group_item(struct block_group_tree *block_group_cache,
6627                                     struct btrfs_key *key,
6628                                     struct extent_buffer *eb, int slot)
6629 {
6630         struct block_group_record *rec;
6631         int ret = 0;
6632
6633         rec = btrfs_new_block_group_record(eb, key, slot);
6634         ret = insert_block_group_record(block_group_cache, rec);
6635         if (ret) {
6636                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6637                         rec->objectid, rec->offset);
6638                 free(rec);
6639         }
6640
6641         return ret;
6642 }
6643
6644 struct device_extent_record *
6645 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6646                                struct btrfs_key *key, int slot)
6647 {
6648         struct device_extent_record *rec;
6649         struct btrfs_dev_extent *ptr;
6650
6651         rec = calloc(1, sizeof(*rec));
6652         if (!rec) {
6653                 fprintf(stderr, "memory allocation failed\n");
6654                 exit(-1);
6655         }
6656
6657         rec->cache.objectid = key->objectid;
6658         rec->cache.start = key->offset;
6659
6660         rec->generation = btrfs_header_generation(leaf);
6661
6662         rec->objectid = key->objectid;
6663         rec->type = key->type;
6664         rec->offset = key->offset;
6665
6666         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6667         rec->chunk_objecteid =
6668                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6669         rec->chunk_offset =
6670                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6671         rec->length = btrfs_dev_extent_length(leaf, ptr);
6672         rec->cache.size = rec->length;
6673
6674         INIT_LIST_HEAD(&rec->chunk_list);
6675         INIT_LIST_HEAD(&rec->device_list);
6676
6677         return rec;
6678 }
6679
6680 static int
6681 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6682                            struct btrfs_key *key, struct extent_buffer *eb,
6683                            int slot)
6684 {
6685         struct device_extent_record *rec;
6686         int ret;
6687
6688         rec = btrfs_new_device_extent_record(eb, key, slot);
6689         ret = insert_device_extent_record(dev_extent_cache, rec);
6690         if (ret) {
6691                 fprintf(stderr,
6692                         "Device extent[%llu, %llu, %llu] existed.\n",
6693                         rec->objectid, rec->offset, rec->length);
6694                 free(rec);
6695         }
6696
6697         return ret;
6698 }
6699
6700 static int process_extent_item(struct btrfs_root *root,
6701                                struct cache_tree *extent_cache,
6702                                struct extent_buffer *eb, int slot)
6703 {
6704         struct btrfs_extent_item *ei;
6705         struct btrfs_extent_inline_ref *iref;
6706         struct btrfs_extent_data_ref *dref;
6707         struct btrfs_shared_data_ref *sref;
6708         struct btrfs_key key;
6709         struct extent_record tmpl;
6710         unsigned long end;
6711         unsigned long ptr;
6712         int ret;
6713         int type;
6714         u32 item_size = btrfs_item_size_nr(eb, slot);
6715         u64 refs = 0;
6716         u64 offset;
6717         u64 num_bytes;
6718         int metadata = 0;
6719
6720         btrfs_item_key_to_cpu(eb, &key, slot);
6721
6722         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6723                 metadata = 1;
6724                 num_bytes = root->nodesize;
6725         } else {
6726                 num_bytes = key.offset;
6727         }
6728
6729         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6730                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6731                       key.objectid, root->sectorsize);
6732                 return -EIO;
6733         }
6734         if (item_size < sizeof(*ei)) {
6735 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6736                 struct btrfs_extent_item_v0 *ei0;
6737                 BUG_ON(item_size != sizeof(*ei0));
6738                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6739                 refs = btrfs_extent_refs_v0(eb, ei0);
6740 #else
6741                 BUG();
6742 #endif
6743                 memset(&tmpl, 0, sizeof(tmpl));
6744                 tmpl.start = key.objectid;
6745                 tmpl.nr = num_bytes;
6746                 tmpl.extent_item_refs = refs;
6747                 tmpl.metadata = metadata;
6748                 tmpl.found_rec = 1;
6749                 tmpl.max_size = num_bytes;
6750
6751                 return add_extent_rec(extent_cache, &tmpl);
6752         }
6753
6754         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6755         refs = btrfs_extent_refs(eb, ei);
6756         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6757                 metadata = 1;
6758         else
6759                 metadata = 0;
6760         if (metadata && num_bytes != root->nodesize) {
6761                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6762                       num_bytes, root->nodesize);
6763                 return -EIO;
6764         }
6765         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6766                 error("ignore invalid data extent, length %llu is not aligned to %u",
6767                       num_bytes, root->sectorsize);
6768                 return -EIO;
6769         }
6770
6771         memset(&tmpl, 0, sizeof(tmpl));
6772         tmpl.start = key.objectid;
6773         tmpl.nr = num_bytes;
6774         tmpl.extent_item_refs = refs;
6775         tmpl.metadata = metadata;
6776         tmpl.found_rec = 1;
6777         tmpl.max_size = num_bytes;
6778         add_extent_rec(extent_cache, &tmpl);
6779
6780         ptr = (unsigned long)(ei + 1);
6781         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6782             key.type == BTRFS_EXTENT_ITEM_KEY)
6783                 ptr += sizeof(struct btrfs_tree_block_info);
6784
6785         end = (unsigned long)ei + item_size;
6786         while (ptr < end) {
6787                 iref = (struct btrfs_extent_inline_ref *)ptr;
6788                 type = btrfs_extent_inline_ref_type(eb, iref);
6789                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6790                 switch (type) {
6791                 case BTRFS_TREE_BLOCK_REF_KEY:
6792                         ret = add_tree_backref(extent_cache, key.objectid,
6793                                         0, offset, 0);
6794                         if (ret < 0)
6795                                 error("add_tree_backref failed: %s",
6796                                       strerror(-ret));
6797                         break;
6798                 case BTRFS_SHARED_BLOCK_REF_KEY:
6799                         ret = add_tree_backref(extent_cache, key.objectid,
6800                                         offset, 0, 0);
6801                         if (ret < 0)
6802                                 error("add_tree_backref failed: %s",
6803                                       strerror(-ret));
6804                         break;
6805                 case BTRFS_EXTENT_DATA_REF_KEY:
6806                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6807                         add_data_backref(extent_cache, key.objectid, 0,
6808                                         btrfs_extent_data_ref_root(eb, dref),
6809                                         btrfs_extent_data_ref_objectid(eb,
6810                                                                        dref),
6811                                         btrfs_extent_data_ref_offset(eb, dref),
6812                                         btrfs_extent_data_ref_count(eb, dref),
6813                                         0, num_bytes);
6814                         break;
6815                 case BTRFS_SHARED_DATA_REF_KEY:
6816                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6817                         add_data_backref(extent_cache, key.objectid, offset,
6818                                         0, 0, 0,
6819                                         btrfs_shared_data_ref_count(eb, sref),
6820                                         0, num_bytes);
6821                         break;
6822                 default:
6823                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6824                                 key.objectid, key.type, num_bytes);
6825                         goto out;
6826                 }
6827                 ptr += btrfs_extent_inline_ref_size(type);
6828         }
6829         WARN_ON(ptr > end);
6830 out:
6831         return 0;
6832 }
6833
6834 static int check_cache_range(struct btrfs_root *root,
6835                              struct btrfs_block_group_cache *cache,
6836                              u64 offset, u64 bytes)
6837 {
6838         struct btrfs_free_space *entry;
6839         u64 *logical;
6840         u64 bytenr;
6841         int stripe_len;
6842         int i, nr, ret;
6843
6844         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6845                 bytenr = btrfs_sb_offset(i);
6846                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6847                                        cache->key.objectid, bytenr, 0,
6848                                        &logical, &nr, &stripe_len);
6849                 if (ret)
6850                         return ret;
6851
6852                 while (nr--) {
6853                         if (logical[nr] + stripe_len <= offset)
6854                                 continue;
6855                         if (offset + bytes <= logical[nr])
6856                                 continue;
6857                         if (logical[nr] == offset) {
6858                                 if (stripe_len >= bytes) {
6859                                         free(logical);
6860                                         return 0;
6861                                 }
6862                                 bytes -= stripe_len;
6863                                 offset += stripe_len;
6864                         } else if (logical[nr] < offset) {
6865                                 if (logical[nr] + stripe_len >=
6866                                     offset + bytes) {
6867                                         free(logical);
6868                                         return 0;
6869                                 }
6870                                 bytes = (offset + bytes) -
6871                                         (logical[nr] + stripe_len);
6872                                 offset = logical[nr] + stripe_len;
6873                         } else {
6874                                 /*
6875                                  * Could be tricky, the super may land in the
6876                                  * middle of the area we're checking.  First
6877                                  * check the easiest case, it's at the end.
6878                                  */
6879                                 if (logical[nr] + stripe_len >=
6880                                     bytes + offset) {
6881                                         bytes = logical[nr] - offset;
6882                                         continue;
6883                                 }
6884
6885                                 /* Check the left side */
6886                                 ret = check_cache_range(root, cache,
6887                                                         offset,
6888                                                         logical[nr] - offset);
6889                                 if (ret) {
6890                                         free(logical);
6891                                         return ret;
6892                                 }
6893
6894                                 /* Now we continue with the right side */
6895                                 bytes = (offset + bytes) -
6896                                         (logical[nr] + stripe_len);
6897                                 offset = logical[nr] + stripe_len;
6898                         }
6899                 }
6900
6901                 free(logical);
6902         }
6903
6904         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6905         if (!entry) {
6906                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6907                         offset, offset+bytes);
6908                 return -EINVAL;
6909         }
6910
6911         if (entry->offset != offset) {
6912                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6913                         entry->offset);
6914                 return -EINVAL;
6915         }
6916
6917         if (entry->bytes != bytes) {
6918                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6919                         bytes, entry->bytes, offset);
6920                 return -EINVAL;
6921         }
6922
6923         unlink_free_space(cache->free_space_ctl, entry);
6924         free(entry);
6925         return 0;
6926 }
6927
6928 static int verify_space_cache(struct btrfs_root *root,
6929                               struct btrfs_block_group_cache *cache)
6930 {
6931         struct btrfs_path path;
6932         struct extent_buffer *leaf;
6933         struct btrfs_key key;
6934         u64 last;
6935         int ret = 0;
6936
6937         root = root->fs_info->extent_root;
6938
6939         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6940
6941         btrfs_init_path(&path);
6942         key.objectid = last;
6943         key.offset = 0;
6944         key.type = BTRFS_EXTENT_ITEM_KEY;
6945         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6946         if (ret < 0)
6947                 goto out;
6948         ret = 0;
6949         while (1) {
6950                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6951                         ret = btrfs_next_leaf(root, &path);
6952                         if (ret < 0)
6953                                 goto out;
6954                         if (ret > 0) {
6955                                 ret = 0;
6956                                 break;
6957                         }
6958                 }
6959                 leaf = path.nodes[0];
6960                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6961                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6962                         break;
6963                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6964                     key.type != BTRFS_METADATA_ITEM_KEY) {
6965                         path.slots[0]++;
6966                         continue;
6967                 }
6968
6969                 if (last == key.objectid) {
6970                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6971                                 last = key.objectid + key.offset;
6972                         else
6973                                 last = key.objectid + root->nodesize;
6974                         path.slots[0]++;
6975                         continue;
6976                 }
6977
6978                 ret = check_cache_range(root, cache, last,
6979                                         key.objectid - last);
6980                 if (ret)
6981                         break;
6982                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6983                         last = key.objectid + key.offset;
6984                 else
6985                         last = key.objectid + root->nodesize;
6986                 path.slots[0]++;
6987         }
6988
6989         if (last < cache->key.objectid + cache->key.offset)
6990                 ret = check_cache_range(root, cache, last,
6991                                         cache->key.objectid +
6992                                         cache->key.offset - last);
6993
6994 out:
6995         btrfs_release_path(&path);
6996
6997         if (!ret &&
6998             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6999                 fprintf(stderr, "There are still entries left in the space "
7000                         "cache\n");
7001                 ret = -EINVAL;
7002         }
7003
7004         return ret;
7005 }
7006
7007 static int check_space_cache(struct btrfs_root *root)
7008 {
7009         struct btrfs_block_group_cache *cache;
7010         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7011         int ret;
7012         int error = 0;
7013
7014         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7015             btrfs_super_generation(root->fs_info->super_copy) !=
7016             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7017                 printf("cache and super generation don't match, space cache "
7018                        "will be invalidated\n");
7019                 return 0;
7020         }
7021
7022         if (ctx.progress_enabled) {
7023                 ctx.tp = TASK_FREE_SPACE;
7024                 task_start(ctx.info);
7025         }
7026
7027         while (1) {
7028                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7029                 if (!cache)
7030                         break;
7031
7032                 start = cache->key.objectid + cache->key.offset;
7033                 if (!cache->free_space_ctl) {
7034                         if (btrfs_init_free_space_ctl(cache,
7035                                                       root->sectorsize)) {
7036                                 ret = -ENOMEM;
7037                                 break;
7038                         }
7039                 } else {
7040                         btrfs_remove_free_space_cache(cache);
7041                 }
7042
7043                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7044                         ret = exclude_super_stripes(root, cache);
7045                         if (ret) {
7046                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7047                                         strerror(-ret));
7048                                 error++;
7049                                 continue;
7050                         }
7051                         ret = load_free_space_tree(root->fs_info, cache);
7052                         free_excluded_extents(root, cache);
7053                         if (ret < 0) {
7054                                 fprintf(stderr, "could not load free space tree: %s\n",
7055                                         strerror(-ret));
7056                                 error++;
7057                                 continue;
7058                         }
7059                         error += ret;
7060                 } else {
7061                         ret = load_free_space_cache(root->fs_info, cache);
7062                         if (!ret)
7063                                 continue;
7064                 }
7065
7066                 ret = verify_space_cache(root, cache);
7067                 if (ret) {
7068                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7069                                 cache->key.objectid);
7070                         error++;
7071                 }
7072         }
7073
7074         task_stop(ctx.info);
7075
7076         return error ? -EINVAL : 0;
7077 }
7078
7079 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7080                         u64 num_bytes, unsigned long leaf_offset,
7081                         struct extent_buffer *eb) {
7082
7083         u64 offset = 0;
7084         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7085         char *data;
7086         unsigned long csum_offset;
7087         u32 csum;
7088         u32 csum_expected;
7089         u64 read_len;
7090         u64 data_checked = 0;
7091         u64 tmp;
7092         int ret = 0;
7093         int mirror;
7094         int num_copies;
7095
7096         if (num_bytes % root->sectorsize)
7097                 return -EINVAL;
7098
7099         data = malloc(num_bytes);
7100         if (!data)
7101                 return -ENOMEM;
7102
7103         while (offset < num_bytes) {
7104                 mirror = 0;
7105 again:
7106                 read_len = num_bytes - offset;
7107                 /* read as much space once a time */
7108                 ret = read_extent_data(root, data + offset,
7109                                 bytenr + offset, &read_len, mirror);
7110                 if (ret)
7111                         goto out;
7112                 data_checked = 0;
7113                 /* verify every 4k data's checksum */
7114                 while (data_checked < read_len) {
7115                         csum = ~(u32)0;
7116                         tmp = offset + data_checked;
7117
7118                         csum = btrfs_csum_data((char *)data + tmp,
7119                                                csum, root->sectorsize);
7120                         btrfs_csum_final(csum, (u8 *)&csum);
7121
7122                         csum_offset = leaf_offset +
7123                                  tmp / root->sectorsize * csum_size;
7124                         read_extent_buffer(eb, (char *)&csum_expected,
7125                                            csum_offset, csum_size);
7126                         /* try another mirror */
7127                         if (csum != csum_expected) {
7128                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7129                                                 mirror, bytenr + tmp,
7130                                                 csum, csum_expected);
7131                                 num_copies = btrfs_num_copies(
7132                                                 &root->fs_info->mapping_tree,
7133                                                 bytenr, num_bytes);
7134                                 if (mirror < num_copies - 1) {
7135                                         mirror += 1;
7136                                         goto again;
7137                                 }
7138                         }
7139                         data_checked += root->sectorsize;
7140                 }
7141                 offset += read_len;
7142         }
7143 out:
7144         free(data);
7145         return ret;
7146 }
7147
7148 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7149                                u64 num_bytes)
7150 {
7151         struct btrfs_path path;
7152         struct extent_buffer *leaf;
7153         struct btrfs_key key;
7154         int ret;
7155
7156         btrfs_init_path(&path);
7157         key.objectid = bytenr;
7158         key.type = BTRFS_EXTENT_ITEM_KEY;
7159         key.offset = (u64)-1;
7160
7161 again:
7162         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7163                                 0, 0);
7164         if (ret < 0) {
7165                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7166                 btrfs_release_path(&path);
7167                 return ret;
7168         } else if (ret) {
7169                 if (path.slots[0] > 0) {
7170                         path.slots[0]--;
7171                 } else {
7172                         ret = btrfs_prev_leaf(root, &path);
7173                         if (ret < 0) {
7174                                 goto out;
7175                         } else if (ret > 0) {
7176                                 ret = 0;
7177                                 goto out;
7178                         }
7179                 }
7180         }
7181
7182         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7183
7184         /*
7185          * Block group items come before extent items if they have the same
7186          * bytenr, so walk back one more just in case.  Dear future traveller,
7187          * first congrats on mastering time travel.  Now if it's not too much
7188          * trouble could you go back to 2006 and tell Chris to make the
7189          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7190          * EXTENT_ITEM_KEY please?
7191          */
7192         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7193                 if (path.slots[0] > 0) {
7194                         path.slots[0]--;
7195                 } else {
7196                         ret = btrfs_prev_leaf(root, &path);
7197                         if (ret < 0) {
7198                                 goto out;
7199                         } else if (ret > 0) {
7200                                 ret = 0;
7201                                 goto out;
7202                         }
7203                 }
7204                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7205         }
7206
7207         while (num_bytes) {
7208                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7209                         ret = btrfs_next_leaf(root, &path);
7210                         if (ret < 0) {
7211                                 fprintf(stderr, "Error going to next leaf "
7212                                         "%d\n", ret);
7213                                 btrfs_release_path(&path);
7214                                 return ret;
7215                         } else if (ret) {
7216                                 break;
7217                         }
7218                 }
7219                 leaf = path.nodes[0];
7220                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7221                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7222                         path.slots[0]++;
7223                         continue;
7224                 }
7225                 if (key.objectid + key.offset < bytenr) {
7226                         path.slots[0]++;
7227                         continue;
7228                 }
7229                 if (key.objectid > bytenr + num_bytes)
7230                         break;
7231
7232                 if (key.objectid == bytenr) {
7233                         if (key.offset >= num_bytes) {
7234                                 num_bytes = 0;
7235                                 break;
7236                         }
7237                         num_bytes -= key.offset;
7238                         bytenr += key.offset;
7239                 } else if (key.objectid < bytenr) {
7240                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7241                                 num_bytes = 0;
7242                                 break;
7243                         }
7244                         num_bytes = (bytenr + num_bytes) -
7245                                 (key.objectid + key.offset);
7246                         bytenr = key.objectid + key.offset;
7247                 } else {
7248                         if (key.objectid + key.offset < bytenr + num_bytes) {
7249                                 u64 new_start = key.objectid + key.offset;
7250                                 u64 new_bytes = bytenr + num_bytes - new_start;
7251
7252                                 /*
7253                                  * Weird case, the extent is in the middle of
7254                                  * our range, we'll have to search one side
7255                                  * and then the other.  Not sure if this happens
7256                                  * in real life, but no harm in coding it up
7257                                  * anyway just in case.
7258                                  */
7259                                 btrfs_release_path(&path);
7260                                 ret = check_extent_exists(root, new_start,
7261                                                           new_bytes);
7262                                 if (ret) {
7263                                         fprintf(stderr, "Right section didn't "
7264                                                 "have a record\n");
7265                                         break;
7266                                 }
7267                                 num_bytes = key.objectid - bytenr;
7268                                 goto again;
7269                         }
7270                         num_bytes = key.objectid - bytenr;
7271                 }
7272                 path.slots[0]++;
7273         }
7274         ret = 0;
7275
7276 out:
7277         if (num_bytes && !ret) {
7278                 fprintf(stderr, "There are no extents for csum range "
7279                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7280                 ret = 1;
7281         }
7282
7283         btrfs_release_path(&path);
7284         return ret;
7285 }
7286
7287 static int check_csums(struct btrfs_root *root)
7288 {
7289         struct btrfs_path path;
7290         struct extent_buffer *leaf;
7291         struct btrfs_key key;
7292         u64 offset = 0, num_bytes = 0;
7293         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7294         int errors = 0;
7295         int ret;
7296         u64 data_len;
7297         unsigned long leaf_offset;
7298
7299         root = root->fs_info->csum_root;
7300         if (!extent_buffer_uptodate(root->node)) {
7301                 fprintf(stderr, "No valid csum tree found\n");
7302                 return -ENOENT;
7303         }
7304
7305         btrfs_init_path(&path);
7306         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7307         key.type = BTRFS_EXTENT_CSUM_KEY;
7308         key.offset = 0;
7309         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7310         if (ret < 0) {
7311                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7312                 btrfs_release_path(&path);
7313                 return ret;
7314         }
7315
7316         if (ret > 0 && path.slots[0])
7317                 path.slots[0]--;
7318         ret = 0;
7319
7320         while (1) {
7321                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7322                         ret = btrfs_next_leaf(root, &path);
7323                         if (ret < 0) {
7324                                 fprintf(stderr, "Error going to next leaf "
7325                                         "%d\n", ret);
7326                                 break;
7327                         }
7328                         if (ret)
7329                                 break;
7330                 }
7331                 leaf = path.nodes[0];
7332
7333                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7334                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7335                         path.slots[0]++;
7336                         continue;
7337                 }
7338
7339                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7340                               csum_size) * root->sectorsize;
7341                 if (!check_data_csum)
7342                         goto skip_csum_check;
7343                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7344                 ret = check_extent_csums(root, key.offset, data_len,
7345                                          leaf_offset, leaf);
7346                 if (ret)
7347                         break;
7348 skip_csum_check:
7349                 if (!num_bytes) {
7350                         offset = key.offset;
7351                 } else if (key.offset != offset + num_bytes) {
7352                         ret = check_extent_exists(root, offset, num_bytes);
7353                         if (ret) {
7354                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7355                                         "there is no extent record\n",
7356                                         offset, offset+num_bytes);
7357                                 errors++;
7358                         }
7359                         offset = key.offset;
7360                         num_bytes = 0;
7361                 }
7362                 num_bytes += data_len;
7363                 path.slots[0]++;
7364         }
7365
7366         btrfs_release_path(&path);
7367         return errors;
7368 }
7369
7370 static int is_dropped_key(struct btrfs_key *key,
7371                           struct btrfs_key *drop_key) {
7372         if (key->objectid < drop_key->objectid)
7373                 return 1;
7374         else if (key->objectid == drop_key->objectid) {
7375                 if (key->type < drop_key->type)
7376                         return 1;
7377                 else if (key->type == drop_key->type) {
7378                         if (key->offset < drop_key->offset)
7379                                 return 1;
7380                 }
7381         }
7382         return 0;
7383 }
7384
7385 /*
7386  * Here are the rules for FULL_BACKREF.
7387  *
7388  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7389  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7390  *      FULL_BACKREF set.
7391  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7392  *    if it happened after the relocation occurred since we'll have dropped the
7393  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7394  *    have no real way to know for sure.
7395  *
7396  * We process the blocks one root at a time, and we start from the lowest root
7397  * objectid and go to the highest.  So we can just lookup the owner backref for
7398  * the record and if we don't find it then we know it doesn't exist and we have
7399  * a FULL BACKREF.
7400  *
7401  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7402  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7403  * be set or not and then we can check later once we've gathered all the refs.
7404  */
7405 static int calc_extent_flag(struct btrfs_root *root,
7406                            struct cache_tree *extent_cache,
7407                            struct extent_buffer *buf,
7408                            struct root_item_record *ri,
7409                            u64 *flags)
7410 {
7411         struct extent_record *rec;
7412         struct cache_extent *cache;
7413         struct tree_backref *tback;
7414         u64 owner = 0;
7415
7416         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7417         /* we have added this extent before */
7418         if (!cache)
7419                 return -ENOENT;
7420
7421         rec = container_of(cache, struct extent_record, cache);
7422
7423         /*
7424          * Except file/reloc tree, we can not have
7425          * FULL BACKREF MODE
7426          */
7427         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7428                 goto normal;
7429         /*
7430          * root node
7431          */
7432         if (buf->start == ri->bytenr)
7433                 goto normal;
7434
7435         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7436                 goto full_backref;
7437
7438         owner = btrfs_header_owner(buf);
7439         if (owner == ri->objectid)
7440                 goto normal;
7441
7442         tback = find_tree_backref(rec, 0, owner);
7443         if (!tback)
7444                 goto full_backref;
7445 normal:
7446         *flags = 0;
7447         if (rec->flag_block_full_backref != FLAG_UNSET &&
7448             rec->flag_block_full_backref != 0)
7449                 rec->bad_full_backref = 1;
7450         return 0;
7451 full_backref:
7452         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7453         if (rec->flag_block_full_backref != FLAG_UNSET &&
7454             rec->flag_block_full_backref != 1)
7455                 rec->bad_full_backref = 1;
7456         return 0;
7457 }
7458
7459 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7460 {
7461         fprintf(stderr, "Invalid key type(");
7462         print_key_type(stderr, 0, key_type);
7463         fprintf(stderr, ") found in root(");
7464         print_objectid(stderr, rootid, 0);
7465         fprintf(stderr, ")\n");
7466 }
7467
7468 /*
7469  * Check if the key is valid with its extent buffer.
7470  *
7471  * This is a early check in case invalid key exists in a extent buffer
7472  * This is not comprehensive yet, but should prevent wrong key/item passed
7473  * further
7474  */
7475 static int check_type_with_root(u64 rootid, u8 key_type)
7476 {
7477         switch (key_type) {
7478         /* Only valid in chunk tree */
7479         case BTRFS_DEV_ITEM_KEY:
7480         case BTRFS_CHUNK_ITEM_KEY:
7481                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7482                         goto err;
7483                 break;
7484         /* valid in csum and log tree */
7485         case BTRFS_CSUM_TREE_OBJECTID:
7486                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7487                       is_fstree(rootid)))
7488                         goto err;
7489                 break;
7490         case BTRFS_EXTENT_ITEM_KEY:
7491         case BTRFS_METADATA_ITEM_KEY:
7492         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7493                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7494                         goto err;
7495                 break;
7496         case BTRFS_ROOT_ITEM_KEY:
7497                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7498                         goto err;
7499                 break;
7500         case BTRFS_DEV_EXTENT_KEY:
7501                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7502                         goto err;
7503                 break;
7504         }
7505         return 0;
7506 err:
7507         report_mismatch_key_root(key_type, rootid);
7508         return -EINVAL;
7509 }
7510
7511 static int run_next_block(struct btrfs_root *root,
7512                           struct block_info *bits,
7513                           int bits_nr,
7514                           u64 *last,
7515                           struct cache_tree *pending,
7516                           struct cache_tree *seen,
7517                           struct cache_tree *reada,
7518                           struct cache_tree *nodes,
7519                           struct cache_tree *extent_cache,
7520                           struct cache_tree *chunk_cache,
7521                           struct rb_root *dev_cache,
7522                           struct block_group_tree *block_group_cache,
7523                           struct device_extent_tree *dev_extent_cache,
7524                           struct root_item_record *ri)
7525 {
7526         struct extent_buffer *buf;
7527         struct extent_record *rec = NULL;
7528         u64 bytenr;
7529         u32 size;
7530         u64 parent;
7531         u64 owner;
7532         u64 flags;
7533         u64 ptr;
7534         u64 gen = 0;
7535         int ret = 0;
7536         int i;
7537         int nritems;
7538         struct btrfs_key key;
7539         struct cache_extent *cache;
7540         int reada_bits;
7541
7542         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7543                                     bits_nr, &reada_bits);
7544         if (nritems == 0)
7545                 return 1;
7546
7547         if (!reada_bits) {
7548                 for(i = 0; i < nritems; i++) {
7549                         ret = add_cache_extent(reada, bits[i].start,
7550                                                bits[i].size);
7551                         if (ret == -EEXIST)
7552                                 continue;
7553
7554                         /* fixme, get the parent transid */
7555                         readahead_tree_block(root, bits[i].start,
7556                                              bits[i].size, 0);
7557                 }
7558         }
7559         *last = bits[0].start;
7560         bytenr = bits[0].start;
7561         size = bits[0].size;
7562
7563         cache = lookup_cache_extent(pending, bytenr, size);
7564         if (cache) {
7565                 remove_cache_extent(pending, cache);
7566                 free(cache);
7567         }
7568         cache = lookup_cache_extent(reada, bytenr, size);
7569         if (cache) {
7570                 remove_cache_extent(reada, cache);
7571                 free(cache);
7572         }
7573         cache = lookup_cache_extent(nodes, bytenr, size);
7574         if (cache) {
7575                 remove_cache_extent(nodes, cache);
7576                 free(cache);
7577         }
7578         cache = lookup_cache_extent(extent_cache, bytenr, size);
7579         if (cache) {
7580                 rec = container_of(cache, struct extent_record, cache);
7581                 gen = rec->parent_generation;
7582         }
7583
7584         /* fixme, get the real parent transid */
7585         buf = read_tree_block(root, bytenr, size, gen);
7586         if (!extent_buffer_uptodate(buf)) {
7587                 record_bad_block_io(root->fs_info,
7588                                     extent_cache, bytenr, size);
7589                 goto out;
7590         }
7591
7592         nritems = btrfs_header_nritems(buf);
7593
7594         flags = 0;
7595         if (!init_extent_tree) {
7596                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7597                                        btrfs_header_level(buf), 1, NULL,
7598                                        &flags);
7599                 if (ret < 0) {
7600                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7601                         if (ret < 0) {
7602                                 fprintf(stderr, "Couldn't calc extent flags\n");
7603                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7604                         }
7605                 }
7606         } else {
7607                 flags = 0;
7608                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7609                 if (ret < 0) {
7610                         fprintf(stderr, "Couldn't calc extent flags\n");
7611                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7612                 }
7613         }
7614
7615         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7616                 if (ri != NULL &&
7617                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7618                     ri->objectid == btrfs_header_owner(buf)) {
7619                         /*
7620                          * Ok we got to this block from it's original owner and
7621                          * we have FULL_BACKREF set.  Relocation can leave
7622                          * converted blocks over so this is altogether possible,
7623                          * however it's not possible if the generation > the
7624                          * last snapshot, so check for this case.
7625                          */
7626                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7627                             btrfs_header_generation(buf) > ri->last_snapshot) {
7628                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7629                                 rec->bad_full_backref = 1;
7630                         }
7631                 }
7632         } else {
7633                 if (ri != NULL &&
7634                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7635                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7636                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7637                         rec->bad_full_backref = 1;
7638                 }
7639         }
7640
7641         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7642                 rec->flag_block_full_backref = 1;
7643                 parent = bytenr;
7644                 owner = 0;
7645         } else {
7646                 rec->flag_block_full_backref = 0;
7647                 parent = 0;
7648                 owner = btrfs_header_owner(buf);
7649         }
7650
7651         ret = check_block(root, extent_cache, buf, flags);
7652         if (ret)
7653                 goto out;
7654
7655         if (btrfs_is_leaf(buf)) {
7656                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7657                 for (i = 0; i < nritems; i++) {
7658                         struct btrfs_file_extent_item *fi;
7659                         btrfs_item_key_to_cpu(buf, &key, i);
7660                         /*
7661                          * Check key type against the leaf owner.
7662                          * Could filter quite a lot of early error if
7663                          * owner is correct
7664                          */
7665                         if (check_type_with_root(btrfs_header_owner(buf),
7666                                                  key.type)) {
7667                                 fprintf(stderr, "ignoring invalid key\n");
7668                                 continue;
7669                         }
7670                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7671                                 process_extent_item(root, extent_cache, buf,
7672                                                     i);
7673                                 continue;
7674                         }
7675                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7676                                 process_extent_item(root, extent_cache, buf,
7677                                                     i);
7678                                 continue;
7679                         }
7680                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7681                                 total_csum_bytes +=
7682                                         btrfs_item_size_nr(buf, i);
7683                                 continue;
7684                         }
7685                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7686                                 process_chunk_item(chunk_cache, &key, buf, i);
7687                                 continue;
7688                         }
7689                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7690                                 process_device_item(dev_cache, &key, buf, i);
7691                                 continue;
7692                         }
7693                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7694                                 process_block_group_item(block_group_cache,
7695                                         &key, buf, i);
7696                                 continue;
7697                         }
7698                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7699                                 process_device_extent_item(dev_extent_cache,
7700                                         &key, buf, i);
7701                                 continue;
7702
7703                         }
7704                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7705 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7706                                 process_extent_ref_v0(extent_cache, buf, i);
7707 #else
7708                                 BUG();
7709 #endif
7710                                 continue;
7711                         }
7712
7713                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7714                                 ret = add_tree_backref(extent_cache,
7715                                                 key.objectid, 0, key.offset, 0);
7716                                 if (ret < 0)
7717                                         error("add_tree_backref failed: %s",
7718                                               strerror(-ret));
7719                                 continue;
7720                         }
7721                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7722                                 ret = add_tree_backref(extent_cache,
7723                                                 key.objectid, key.offset, 0, 0);
7724                                 if (ret < 0)
7725                                         error("add_tree_backref failed: %s",
7726                                               strerror(-ret));
7727                                 continue;
7728                         }
7729                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7730                                 struct btrfs_extent_data_ref *ref;
7731                                 ref = btrfs_item_ptr(buf, i,
7732                                                 struct btrfs_extent_data_ref);
7733                                 add_data_backref(extent_cache,
7734                                         key.objectid, 0,
7735                                         btrfs_extent_data_ref_root(buf, ref),
7736                                         btrfs_extent_data_ref_objectid(buf,
7737                                                                        ref),
7738                                         btrfs_extent_data_ref_offset(buf, ref),
7739                                         btrfs_extent_data_ref_count(buf, ref),
7740                                         0, root->sectorsize);
7741                                 continue;
7742                         }
7743                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7744                                 struct btrfs_shared_data_ref *ref;
7745                                 ref = btrfs_item_ptr(buf, i,
7746                                                 struct btrfs_shared_data_ref);
7747                                 add_data_backref(extent_cache,
7748                                         key.objectid, key.offset, 0, 0, 0,
7749                                         btrfs_shared_data_ref_count(buf, ref),
7750                                         0, root->sectorsize);
7751                                 continue;
7752                         }
7753                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7754                                 struct bad_item *bad;
7755
7756                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7757                                         continue;
7758                                 if (!owner)
7759                                         continue;
7760                                 bad = malloc(sizeof(struct bad_item));
7761                                 if (!bad)
7762                                         continue;
7763                                 INIT_LIST_HEAD(&bad->list);
7764                                 memcpy(&bad->key, &key,
7765                                        sizeof(struct btrfs_key));
7766                                 bad->root_id = owner;
7767                                 list_add_tail(&bad->list, &delete_items);
7768                                 continue;
7769                         }
7770                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7771                                 continue;
7772                         fi = btrfs_item_ptr(buf, i,
7773                                             struct btrfs_file_extent_item);
7774                         if (btrfs_file_extent_type(buf, fi) ==
7775                             BTRFS_FILE_EXTENT_INLINE)
7776                                 continue;
7777                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7778                                 continue;
7779
7780                         data_bytes_allocated +=
7781                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7782                         if (data_bytes_allocated < root->sectorsize) {
7783                                 abort();
7784                         }
7785                         data_bytes_referenced +=
7786                                 btrfs_file_extent_num_bytes(buf, fi);
7787                         add_data_backref(extent_cache,
7788                                 btrfs_file_extent_disk_bytenr(buf, fi),
7789                                 parent, owner, key.objectid, key.offset -
7790                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7791                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7792                 }
7793         } else {
7794                 int level;
7795                 struct btrfs_key first_key;
7796
7797                 first_key.objectid = 0;
7798
7799                 if (nritems > 0)
7800                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7801                 level = btrfs_header_level(buf);
7802                 for (i = 0; i < nritems; i++) {
7803                         struct extent_record tmpl;
7804
7805                         ptr = btrfs_node_blockptr(buf, i);
7806                         size = root->nodesize;
7807                         btrfs_node_key_to_cpu(buf, &key, i);
7808                         if (ri != NULL) {
7809                                 if ((level == ri->drop_level)
7810                                     && is_dropped_key(&key, &ri->drop_key)) {
7811                                         continue;
7812                                 }
7813                         }
7814
7815                         memset(&tmpl, 0, sizeof(tmpl));
7816                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7817                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7818                         tmpl.start = ptr;
7819                         tmpl.nr = size;
7820                         tmpl.refs = 1;
7821                         tmpl.metadata = 1;
7822                         tmpl.max_size = size;
7823                         ret = add_extent_rec(extent_cache, &tmpl);
7824                         if (ret < 0)
7825                                 goto out;
7826
7827                         ret = add_tree_backref(extent_cache, ptr, parent,
7828                                         owner, 1);
7829                         if (ret < 0) {
7830                                 error("add_tree_backref failed: %s",
7831                                       strerror(-ret));
7832                                 continue;
7833                         }
7834
7835                         if (level > 1) {
7836                                 add_pending(nodes, seen, ptr, size);
7837                         } else {
7838                                 add_pending(pending, seen, ptr, size);
7839                         }
7840                 }
7841                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7842                                       nritems) * sizeof(struct btrfs_key_ptr);
7843         }
7844         total_btree_bytes += buf->len;
7845         if (fs_root_objectid(btrfs_header_owner(buf)))
7846                 total_fs_tree_bytes += buf->len;
7847         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7848                 total_extent_tree_bytes += buf->len;
7849         if (!found_old_backref &&
7850             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7851             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7852             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7853                 found_old_backref = 1;
7854 out:
7855         free_extent_buffer(buf);
7856         return ret;
7857 }
7858
7859 static int add_root_to_pending(struct extent_buffer *buf,
7860                                struct cache_tree *extent_cache,
7861                                struct cache_tree *pending,
7862                                struct cache_tree *seen,
7863                                struct cache_tree *nodes,
7864                                u64 objectid)
7865 {
7866         struct extent_record tmpl;
7867         int ret;
7868
7869         if (btrfs_header_level(buf) > 0)
7870                 add_pending(nodes, seen, buf->start, buf->len);
7871         else
7872                 add_pending(pending, seen, buf->start, buf->len);
7873
7874         memset(&tmpl, 0, sizeof(tmpl));
7875         tmpl.start = buf->start;
7876         tmpl.nr = buf->len;
7877         tmpl.is_root = 1;
7878         tmpl.refs = 1;
7879         tmpl.metadata = 1;
7880         tmpl.max_size = buf->len;
7881         add_extent_rec(extent_cache, &tmpl);
7882
7883         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7884             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7885                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7886                                 0, 1);
7887         else
7888                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7889                                 1);
7890         return ret;
7891 }
7892
7893 /* as we fix the tree, we might be deleting blocks that
7894  * we're tracking for repair.  This hook makes sure we
7895  * remove any backrefs for blocks as we are fixing them.
7896  */
7897 static int free_extent_hook(struct btrfs_trans_handle *trans,
7898                             struct btrfs_root *root,
7899                             u64 bytenr, u64 num_bytes, u64 parent,
7900                             u64 root_objectid, u64 owner, u64 offset,
7901                             int refs_to_drop)
7902 {
7903         struct extent_record *rec;
7904         struct cache_extent *cache;
7905         int is_data;
7906         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7907
7908         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7909         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7910         if (!cache)
7911                 return 0;
7912
7913         rec = container_of(cache, struct extent_record, cache);
7914         if (is_data) {
7915                 struct data_backref *back;
7916                 back = find_data_backref(rec, parent, root_objectid, owner,
7917                                          offset, 1, bytenr, num_bytes);
7918                 if (!back)
7919                         goto out;
7920                 if (back->node.found_ref) {
7921                         back->found_ref -= refs_to_drop;
7922                         if (rec->refs)
7923                                 rec->refs -= refs_to_drop;
7924                 }
7925                 if (back->node.found_extent_tree) {
7926                         back->num_refs -= refs_to_drop;
7927                         if (rec->extent_item_refs)
7928                                 rec->extent_item_refs -= refs_to_drop;
7929                 }
7930                 if (back->found_ref == 0)
7931                         back->node.found_ref = 0;
7932                 if (back->num_refs == 0)
7933                         back->node.found_extent_tree = 0;
7934
7935                 if (!back->node.found_extent_tree && back->node.found_ref) {
7936                         list_del(&back->node.list);
7937                         free(back);
7938                 }
7939         } else {
7940                 struct tree_backref *back;
7941                 back = find_tree_backref(rec, parent, root_objectid);
7942                 if (!back)
7943                         goto out;
7944                 if (back->node.found_ref) {
7945                         if (rec->refs)
7946                                 rec->refs--;
7947                         back->node.found_ref = 0;
7948                 }
7949                 if (back->node.found_extent_tree) {
7950                         if (rec->extent_item_refs)
7951                                 rec->extent_item_refs--;
7952                         back->node.found_extent_tree = 0;
7953                 }
7954                 if (!back->node.found_extent_tree && back->node.found_ref) {
7955                         list_del(&back->node.list);
7956                         free(back);
7957                 }
7958         }
7959         maybe_free_extent_rec(extent_cache, rec);
7960 out:
7961         return 0;
7962 }
7963
7964 static int delete_extent_records(struct btrfs_trans_handle *trans,
7965                                  struct btrfs_root *root,
7966                                  struct btrfs_path *path,
7967                                  u64 bytenr)
7968 {
7969         struct btrfs_key key;
7970         struct btrfs_key found_key;
7971         struct extent_buffer *leaf;
7972         int ret;
7973         int slot;
7974
7975
7976         key.objectid = bytenr;
7977         key.type = (u8)-1;
7978         key.offset = (u64)-1;
7979
7980         while(1) {
7981                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7982                                         &key, path, 0, 1);
7983                 if (ret < 0)
7984                         break;
7985
7986                 if (ret > 0) {
7987                         ret = 0;
7988                         if (path->slots[0] == 0)
7989                                 break;
7990                         path->slots[0]--;
7991                 }
7992                 ret = 0;
7993
7994                 leaf = path->nodes[0];
7995                 slot = path->slots[0];
7996
7997                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7998                 if (found_key.objectid != bytenr)
7999                         break;
8000
8001                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8002                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8003                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8004                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8005                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8006                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8007                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8008                         btrfs_release_path(path);
8009                         if (found_key.type == 0) {
8010                                 if (found_key.offset == 0)
8011                                         break;
8012                                 key.offset = found_key.offset - 1;
8013                                 key.type = found_key.type;
8014                         }
8015                         key.type = found_key.type - 1;
8016                         key.offset = (u64)-1;
8017                         continue;
8018                 }
8019
8020                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8021                         found_key.objectid, found_key.type, found_key.offset);
8022
8023                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8024                 if (ret)
8025                         break;
8026                 btrfs_release_path(path);
8027
8028                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8029                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8030                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8031                                 found_key.offset : root->nodesize;
8032
8033                         ret = btrfs_update_block_group(trans, root, bytenr,
8034                                                        bytes, 0, 0);
8035                         if (ret)
8036                                 break;
8037                 }
8038         }
8039
8040         btrfs_release_path(path);
8041         return ret;
8042 }
8043
8044 /*
8045  * for a single backref, this will allocate a new extent
8046  * and add the backref to it.
8047  */
8048 static int record_extent(struct btrfs_trans_handle *trans,
8049                          struct btrfs_fs_info *info,
8050                          struct btrfs_path *path,
8051                          struct extent_record *rec,
8052                          struct extent_backref *back,
8053                          int allocated, u64 flags)
8054 {
8055         int ret = 0;
8056         struct btrfs_root *extent_root = info->extent_root;
8057         struct extent_buffer *leaf;
8058         struct btrfs_key ins_key;
8059         struct btrfs_extent_item *ei;
8060         struct data_backref *dback;
8061         struct btrfs_tree_block_info *bi;
8062
8063         if (!back->is_data)
8064                 rec->max_size = max_t(u64, rec->max_size,
8065                                     info->extent_root->nodesize);
8066
8067         if (!allocated) {
8068                 u32 item_size = sizeof(*ei);
8069
8070                 if (!back->is_data)
8071                         item_size += sizeof(*bi);
8072
8073                 ins_key.objectid = rec->start;
8074                 ins_key.offset = rec->max_size;
8075                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8076
8077                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8078                                         &ins_key, item_size);
8079                 if (ret)
8080                         goto fail;
8081
8082                 leaf = path->nodes[0];
8083                 ei = btrfs_item_ptr(leaf, path->slots[0],
8084                                     struct btrfs_extent_item);
8085
8086                 btrfs_set_extent_refs(leaf, ei, 0);
8087                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8088
8089                 if (back->is_data) {
8090                         btrfs_set_extent_flags(leaf, ei,
8091                                                BTRFS_EXTENT_FLAG_DATA);
8092                 } else {
8093                         struct btrfs_disk_key copy_key;;
8094
8095                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8096                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8097                                              sizeof(*bi));
8098
8099                         btrfs_set_disk_key_objectid(&copy_key,
8100                                                     rec->info_objectid);
8101                         btrfs_set_disk_key_type(&copy_key, 0);
8102                         btrfs_set_disk_key_offset(&copy_key, 0);
8103
8104                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8105                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8106
8107                         btrfs_set_extent_flags(leaf, ei,
8108                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8109                 }
8110
8111                 btrfs_mark_buffer_dirty(leaf);
8112                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8113                                                rec->max_size, 1, 0);
8114                 if (ret)
8115                         goto fail;
8116                 btrfs_release_path(path);
8117         }
8118
8119         if (back->is_data) {
8120                 u64 parent;
8121                 int i;
8122
8123                 dback = to_data_backref(back);
8124                 if (back->full_backref)
8125                         parent = dback->parent;
8126                 else
8127                         parent = 0;
8128
8129                 for (i = 0; i < dback->found_ref; i++) {
8130                         /* if parent != 0, we're doing a full backref
8131                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8132                          * just makes the backref allocator create a data
8133                          * backref
8134                          */
8135                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8136                                                    rec->start, rec->max_size,
8137                                                    parent,
8138                                                    dback->root,
8139                                                    parent ?
8140                                                    BTRFS_FIRST_FREE_OBJECTID :
8141                                                    dback->owner,
8142                                                    dback->offset);
8143                         if (ret)
8144                                 break;
8145                 }
8146                 fprintf(stderr, "adding new data backref"
8147                                 " on %llu %s %llu owner %llu"
8148                                 " offset %llu found %d\n",
8149                                 (unsigned long long)rec->start,
8150                                 back->full_backref ?
8151                                 "parent" : "root",
8152                                 back->full_backref ?
8153                                 (unsigned long long)parent :
8154                                 (unsigned long long)dback->root,
8155                                 (unsigned long long)dback->owner,
8156                                 (unsigned long long)dback->offset,
8157                                 dback->found_ref);
8158         } else {
8159                 u64 parent;
8160                 struct tree_backref *tback;
8161
8162                 tback = to_tree_backref(back);
8163                 if (back->full_backref)
8164                         parent = tback->parent;
8165                 else
8166                         parent = 0;
8167
8168                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8169                                            rec->start, rec->max_size,
8170                                            parent, tback->root, 0, 0);
8171                 fprintf(stderr, "adding new tree backref on "
8172                         "start %llu len %llu parent %llu root %llu\n",
8173                         rec->start, rec->max_size, parent, tback->root);
8174         }
8175 fail:
8176         btrfs_release_path(path);
8177         return ret;
8178 }
8179
8180 static struct extent_entry *find_entry(struct list_head *entries,
8181                                        u64 bytenr, u64 bytes)
8182 {
8183         struct extent_entry *entry = NULL;
8184
8185         list_for_each_entry(entry, entries, list) {
8186                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8187                         return entry;
8188         }
8189
8190         return NULL;
8191 }
8192
8193 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8194 {
8195         struct extent_entry *entry, *best = NULL, *prev = NULL;
8196
8197         list_for_each_entry(entry, entries, list) {
8198                 /*
8199                  * If there are as many broken entries as entries then we know
8200                  * not to trust this particular entry.
8201                  */
8202                 if (entry->broken == entry->count)
8203                         continue;
8204
8205                 /*
8206                  * Special case, when there are only two entries and 'best' is
8207                  * the first one
8208                  */
8209                 if (!prev) {
8210                         best = entry;
8211                         prev = entry;
8212                         continue;
8213                 }
8214
8215                 /*
8216                  * If our current entry == best then we can't be sure our best
8217                  * is really the best, so we need to keep searching.
8218                  */
8219                 if (best && best->count == entry->count) {
8220                         prev = entry;
8221                         best = NULL;
8222                         continue;
8223                 }
8224
8225                 /* Prev == entry, not good enough, have to keep searching */
8226                 if (!prev->broken && prev->count == entry->count)
8227                         continue;
8228
8229                 if (!best)
8230                         best = (prev->count > entry->count) ? prev : entry;
8231                 else if (best->count < entry->count)
8232                         best = entry;
8233                 prev = entry;
8234         }
8235
8236         return best;
8237 }
8238
8239 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8240                       struct data_backref *dback, struct extent_entry *entry)
8241 {
8242         struct btrfs_trans_handle *trans;
8243         struct btrfs_root *root;
8244         struct btrfs_file_extent_item *fi;
8245         struct extent_buffer *leaf;
8246         struct btrfs_key key;
8247         u64 bytenr, bytes;
8248         int ret, err;
8249
8250         key.objectid = dback->root;
8251         key.type = BTRFS_ROOT_ITEM_KEY;
8252         key.offset = (u64)-1;
8253         root = btrfs_read_fs_root(info, &key);
8254         if (IS_ERR(root)) {
8255                 fprintf(stderr, "Couldn't find root for our ref\n");
8256                 return -EINVAL;
8257         }
8258
8259         /*
8260          * The backref points to the original offset of the extent if it was
8261          * split, so we need to search down to the offset we have and then walk
8262          * forward until we find the backref we're looking for.
8263          */
8264         key.objectid = dback->owner;
8265         key.type = BTRFS_EXTENT_DATA_KEY;
8266         key.offset = dback->offset;
8267         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8268         if (ret < 0) {
8269                 fprintf(stderr, "Error looking up ref %d\n", ret);
8270                 return ret;
8271         }
8272
8273         while (1) {
8274                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8275                         ret = btrfs_next_leaf(root, path);
8276                         if (ret) {
8277                                 fprintf(stderr, "Couldn't find our ref, next\n");
8278                                 return -EINVAL;
8279                         }
8280                 }
8281                 leaf = path->nodes[0];
8282                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8283                 if (key.objectid != dback->owner ||
8284                     key.type != BTRFS_EXTENT_DATA_KEY) {
8285                         fprintf(stderr, "Couldn't find our ref, search\n");
8286                         return -EINVAL;
8287                 }
8288                 fi = btrfs_item_ptr(leaf, path->slots[0],
8289                                     struct btrfs_file_extent_item);
8290                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8291                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8292
8293                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8294                         break;
8295                 path->slots[0]++;
8296         }
8297
8298         btrfs_release_path(path);
8299
8300         trans = btrfs_start_transaction(root, 1);
8301         if (IS_ERR(trans))
8302                 return PTR_ERR(trans);
8303
8304         /*
8305          * Ok we have the key of the file extent we want to fix, now we can cow
8306          * down to the thing and fix it.
8307          */
8308         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8309         if (ret < 0) {
8310                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8311                         key.objectid, key.type, key.offset, ret);
8312                 goto out;
8313         }
8314         if (ret > 0) {
8315                 fprintf(stderr, "Well that's odd, we just found this key "
8316                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8317                         key.offset);
8318                 ret = -EINVAL;
8319                 goto out;
8320         }
8321         leaf = path->nodes[0];
8322         fi = btrfs_item_ptr(leaf, path->slots[0],
8323                             struct btrfs_file_extent_item);
8324
8325         if (btrfs_file_extent_compression(leaf, fi) &&
8326             dback->disk_bytenr != entry->bytenr) {
8327                 fprintf(stderr, "Ref doesn't match the record start and is "
8328                         "compressed, please take a btrfs-image of this file "
8329                         "system and send it to a btrfs developer so they can "
8330                         "complete this functionality for bytenr %Lu\n",
8331                         dback->disk_bytenr);
8332                 ret = -EINVAL;
8333                 goto out;
8334         }
8335
8336         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8337                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8338         } else if (dback->disk_bytenr > entry->bytenr) {
8339                 u64 off_diff, offset;
8340
8341                 off_diff = dback->disk_bytenr - entry->bytenr;
8342                 offset = btrfs_file_extent_offset(leaf, fi);
8343                 if (dback->disk_bytenr + offset +
8344                     btrfs_file_extent_num_bytes(leaf, fi) >
8345                     entry->bytenr + entry->bytes) {
8346                         fprintf(stderr, "Ref is past the entry end, please "
8347                                 "take a btrfs-image of this file system and "
8348                                 "send it to a btrfs developer, ref %Lu\n",
8349                                 dback->disk_bytenr);
8350                         ret = -EINVAL;
8351                         goto out;
8352                 }
8353                 offset += off_diff;
8354                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8355                 btrfs_set_file_extent_offset(leaf, fi, offset);
8356         } else if (dback->disk_bytenr < entry->bytenr) {
8357                 u64 offset;
8358
8359                 offset = btrfs_file_extent_offset(leaf, fi);
8360                 if (dback->disk_bytenr + offset < entry->bytenr) {
8361                         fprintf(stderr, "Ref is before the entry start, please"
8362                                 " take a btrfs-image of this file system and "
8363                                 "send it to a btrfs developer, ref %Lu\n",
8364                                 dback->disk_bytenr);
8365                         ret = -EINVAL;
8366                         goto out;
8367                 }
8368
8369                 offset += dback->disk_bytenr;
8370                 offset -= entry->bytenr;
8371                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8372                 btrfs_set_file_extent_offset(leaf, fi, offset);
8373         }
8374
8375         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8376
8377         /*
8378          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8379          * only do this if we aren't using compression, otherwise it's a
8380          * trickier case.
8381          */
8382         if (!btrfs_file_extent_compression(leaf, fi))
8383                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8384         else
8385                 printf("ram bytes may be wrong?\n");
8386         btrfs_mark_buffer_dirty(leaf);
8387 out:
8388         err = btrfs_commit_transaction(trans, root);
8389         btrfs_release_path(path);
8390         return ret ? ret : err;
8391 }
8392
8393 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8394                            struct extent_record *rec)
8395 {
8396         struct extent_backref *back;
8397         struct data_backref *dback;
8398         struct extent_entry *entry, *best = NULL;
8399         LIST_HEAD(entries);
8400         int nr_entries = 0;
8401         int broken_entries = 0;
8402         int ret = 0;
8403         short mismatch = 0;
8404
8405         /*
8406          * Metadata is easy and the backrefs should always agree on bytenr and
8407          * size, if not we've got bigger issues.
8408          */
8409         if (rec->metadata)
8410                 return 0;
8411
8412         list_for_each_entry(back, &rec->backrefs, list) {
8413                 if (back->full_backref || !back->is_data)
8414                         continue;
8415
8416                 dback = to_data_backref(back);
8417
8418                 /*
8419                  * We only pay attention to backrefs that we found a real
8420                  * backref for.
8421                  */
8422                 if (dback->found_ref == 0)
8423                         continue;
8424
8425                 /*
8426                  * For now we only catch when the bytes don't match, not the
8427                  * bytenr.  We can easily do this at the same time, but I want
8428                  * to have a fs image to test on before we just add repair
8429                  * functionality willy-nilly so we know we won't screw up the
8430                  * repair.
8431                  */
8432
8433                 entry = find_entry(&entries, dback->disk_bytenr,
8434                                    dback->bytes);
8435                 if (!entry) {
8436                         entry = malloc(sizeof(struct extent_entry));
8437                         if (!entry) {
8438                                 ret = -ENOMEM;
8439                                 goto out;
8440                         }
8441                         memset(entry, 0, sizeof(*entry));
8442                         entry->bytenr = dback->disk_bytenr;
8443                         entry->bytes = dback->bytes;
8444                         list_add_tail(&entry->list, &entries);
8445                         nr_entries++;
8446                 }
8447
8448                 /*
8449                  * If we only have on entry we may think the entries agree when
8450                  * in reality they don't so we have to do some extra checking.
8451                  */
8452                 if (dback->disk_bytenr != rec->start ||
8453                     dback->bytes != rec->nr || back->broken)
8454                         mismatch = 1;
8455
8456                 if (back->broken) {
8457                         entry->broken++;
8458                         broken_entries++;
8459                 }
8460
8461                 entry->count++;
8462         }
8463
8464         /* Yay all the backrefs agree, carry on good sir */
8465         if (nr_entries <= 1 && !mismatch)
8466                 goto out;
8467
8468         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8469                 "%Lu\n", rec->start);
8470
8471         /*
8472          * First we want to see if the backrefs can agree amongst themselves who
8473          * is right, so figure out which one of the entries has the highest
8474          * count.
8475          */
8476         best = find_most_right_entry(&entries);
8477
8478         /*
8479          * Ok so we may have an even split between what the backrefs think, so
8480          * this is where we use the extent ref to see what it thinks.
8481          */
8482         if (!best) {
8483                 entry = find_entry(&entries, rec->start, rec->nr);
8484                 if (!entry && (!broken_entries || !rec->found_rec)) {
8485                         fprintf(stderr, "Backrefs don't agree with each other "
8486                                 "and extent record doesn't agree with anybody,"
8487                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8488                                 rec->start, rec->nr);
8489                         ret = -EINVAL;
8490                         goto out;
8491                 } else if (!entry) {
8492                         /*
8493                          * Ok our backrefs were broken, we'll assume this is the
8494                          * correct value and add an entry for this range.
8495                          */
8496                         entry = malloc(sizeof(struct extent_entry));
8497                         if (!entry) {
8498                                 ret = -ENOMEM;
8499                                 goto out;
8500                         }
8501                         memset(entry, 0, sizeof(*entry));
8502                         entry->bytenr = rec->start;
8503                         entry->bytes = rec->nr;
8504                         list_add_tail(&entry->list, &entries);
8505                         nr_entries++;
8506                 }
8507                 entry->count++;
8508                 best = find_most_right_entry(&entries);
8509                 if (!best) {
8510                         fprintf(stderr, "Backrefs and extent record evenly "
8511                                 "split on who is right, this is going to "
8512                                 "require user input to fix bytenr %Lu bytes "
8513                                 "%Lu\n", rec->start, rec->nr);
8514                         ret = -EINVAL;
8515                         goto out;
8516                 }
8517         }
8518
8519         /*
8520          * I don't think this can happen currently as we'll abort() if we catch
8521          * this case higher up, but in case somebody removes that we still can't
8522          * deal with it properly here yet, so just bail out of that's the case.
8523          */
8524         if (best->bytenr != rec->start) {
8525                 fprintf(stderr, "Extent start and backref starts don't match, "
8526                         "please use btrfs-image on this file system and send "
8527                         "it to a btrfs developer so they can make fsck fix "
8528                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8529                         rec->start, rec->nr);
8530                 ret = -EINVAL;
8531                 goto out;
8532         }
8533
8534         /*
8535          * Ok great we all agreed on an extent record, let's go find the real
8536          * references and fix up the ones that don't match.
8537          */
8538         list_for_each_entry(back, &rec->backrefs, list) {
8539                 if (back->full_backref || !back->is_data)
8540                         continue;
8541
8542                 dback = to_data_backref(back);
8543
8544                 /*
8545                  * Still ignoring backrefs that don't have a real ref attached
8546                  * to them.
8547                  */
8548                 if (dback->found_ref == 0)
8549                         continue;
8550
8551                 if (dback->bytes == best->bytes &&
8552                     dback->disk_bytenr == best->bytenr)
8553                         continue;
8554
8555                 ret = repair_ref(info, path, dback, best);
8556                 if (ret)
8557                         goto out;
8558         }
8559
8560         /*
8561          * Ok we messed with the actual refs, which means we need to drop our
8562          * entire cache and go back and rescan.  I know this is a huge pain and
8563          * adds a lot of extra work, but it's the only way to be safe.  Once all
8564          * the backrefs agree we may not need to do anything to the extent
8565          * record itself.
8566          */
8567         ret = -EAGAIN;
8568 out:
8569         while (!list_empty(&entries)) {
8570                 entry = list_entry(entries.next, struct extent_entry, list);
8571                 list_del_init(&entry->list);
8572                 free(entry);
8573         }
8574         return ret;
8575 }
8576
8577 static int process_duplicates(struct btrfs_root *root,
8578                               struct cache_tree *extent_cache,
8579                               struct extent_record *rec)
8580 {
8581         struct extent_record *good, *tmp;
8582         struct cache_extent *cache;
8583         int ret;
8584
8585         /*
8586          * If we found a extent record for this extent then return, or if we
8587          * have more than one duplicate we are likely going to need to delete
8588          * something.
8589          */
8590         if (rec->found_rec || rec->num_duplicates > 1)
8591                 return 0;
8592
8593         /* Shouldn't happen but just in case */
8594         BUG_ON(!rec->num_duplicates);
8595
8596         /*
8597          * So this happens if we end up with a backref that doesn't match the
8598          * actual extent entry.  So either the backref is bad or the extent
8599          * entry is bad.  Either way we want to have the extent_record actually
8600          * reflect what we found in the extent_tree, so we need to take the
8601          * duplicate out and use that as the extent_record since the only way we
8602          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8603          */
8604         remove_cache_extent(extent_cache, &rec->cache);
8605
8606         good = to_extent_record(rec->dups.next);
8607         list_del_init(&good->list);
8608         INIT_LIST_HEAD(&good->backrefs);
8609         INIT_LIST_HEAD(&good->dups);
8610         good->cache.start = good->start;
8611         good->cache.size = good->nr;
8612         good->content_checked = 0;
8613         good->owner_ref_checked = 0;
8614         good->num_duplicates = 0;
8615         good->refs = rec->refs;
8616         list_splice_init(&rec->backrefs, &good->backrefs);
8617         while (1) {
8618                 cache = lookup_cache_extent(extent_cache, good->start,
8619                                             good->nr);
8620                 if (!cache)
8621                         break;
8622                 tmp = container_of(cache, struct extent_record, cache);
8623
8624                 /*
8625                  * If we find another overlapping extent and it's found_rec is
8626                  * set then it's a duplicate and we need to try and delete
8627                  * something.
8628                  */
8629                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8630                         if (list_empty(&good->list))
8631                                 list_add_tail(&good->list,
8632                                               &duplicate_extents);
8633                         good->num_duplicates += tmp->num_duplicates + 1;
8634                         list_splice_init(&tmp->dups, &good->dups);
8635                         list_del_init(&tmp->list);
8636                         list_add_tail(&tmp->list, &good->dups);
8637                         remove_cache_extent(extent_cache, &tmp->cache);
8638                         continue;
8639                 }
8640
8641                 /*
8642                  * Ok we have another non extent item backed extent rec, so lets
8643                  * just add it to this extent and carry on like we did above.
8644                  */
8645                 good->refs += tmp->refs;
8646                 list_splice_init(&tmp->backrefs, &good->backrefs);
8647                 remove_cache_extent(extent_cache, &tmp->cache);
8648                 free(tmp);
8649         }
8650         ret = insert_cache_extent(extent_cache, &good->cache);
8651         BUG_ON(ret);
8652         free(rec);
8653         return good->num_duplicates ? 0 : 1;
8654 }
8655
8656 static int delete_duplicate_records(struct btrfs_root *root,
8657                                     struct extent_record *rec)
8658 {
8659         struct btrfs_trans_handle *trans;
8660         LIST_HEAD(delete_list);
8661         struct btrfs_path path;
8662         struct extent_record *tmp, *good, *n;
8663         int nr_del = 0;
8664         int ret = 0, err;
8665         struct btrfs_key key;
8666
8667         btrfs_init_path(&path);
8668
8669         good = rec;
8670         /* Find the record that covers all of the duplicates. */
8671         list_for_each_entry(tmp, &rec->dups, list) {
8672                 if (good->start < tmp->start)
8673                         continue;
8674                 if (good->nr > tmp->nr)
8675                         continue;
8676
8677                 if (tmp->start + tmp->nr < good->start + good->nr) {
8678                         fprintf(stderr, "Ok we have overlapping extents that "
8679                                 "aren't completely covered by each other, this "
8680                                 "is going to require more careful thought.  "
8681                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8682                                 tmp->start, tmp->nr, good->start, good->nr);
8683                         abort();
8684                 }
8685                 good = tmp;
8686         }
8687
8688         if (good != rec)
8689                 list_add_tail(&rec->list, &delete_list);
8690
8691         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8692                 if (tmp == good)
8693                         continue;
8694                 list_move_tail(&tmp->list, &delete_list);
8695         }
8696
8697         root = root->fs_info->extent_root;
8698         trans = btrfs_start_transaction(root, 1);
8699         if (IS_ERR(trans)) {
8700                 ret = PTR_ERR(trans);
8701                 goto out;
8702         }
8703
8704         list_for_each_entry(tmp, &delete_list, list) {
8705                 if (tmp->found_rec == 0)
8706                         continue;
8707                 key.objectid = tmp->start;
8708                 key.type = BTRFS_EXTENT_ITEM_KEY;
8709                 key.offset = tmp->nr;
8710
8711                 /* Shouldn't happen but just in case */
8712                 if (tmp->metadata) {
8713                         fprintf(stderr, "Well this shouldn't happen, extent "
8714                                 "record overlaps but is metadata? "
8715                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8716                         abort();
8717                 }
8718
8719                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8720                 if (ret) {
8721                         if (ret > 0)
8722                                 ret = -EINVAL;
8723                         break;
8724                 }
8725                 ret = btrfs_del_item(trans, root, &path);
8726                 if (ret)
8727                         break;
8728                 btrfs_release_path(&path);
8729                 nr_del++;
8730         }
8731         err = btrfs_commit_transaction(trans, root);
8732         if (err && !ret)
8733                 ret = err;
8734 out:
8735         while (!list_empty(&delete_list)) {
8736                 tmp = to_extent_record(delete_list.next);
8737                 list_del_init(&tmp->list);
8738                 if (tmp == rec)
8739                         continue;
8740                 free(tmp);
8741         }
8742
8743         while (!list_empty(&rec->dups)) {
8744                 tmp = to_extent_record(rec->dups.next);
8745                 list_del_init(&tmp->list);
8746                 free(tmp);
8747         }
8748
8749         btrfs_release_path(&path);
8750
8751         if (!ret && !nr_del)
8752                 rec->num_duplicates = 0;
8753
8754         return ret ? ret : nr_del;
8755 }
8756
8757 static int find_possible_backrefs(struct btrfs_fs_info *info,
8758                                   struct btrfs_path *path,
8759                                   struct cache_tree *extent_cache,
8760                                   struct extent_record *rec)
8761 {
8762         struct btrfs_root *root;
8763         struct extent_backref *back;
8764         struct data_backref *dback;
8765         struct cache_extent *cache;
8766         struct btrfs_file_extent_item *fi;
8767         struct btrfs_key key;
8768         u64 bytenr, bytes;
8769         int ret;
8770
8771         list_for_each_entry(back, &rec->backrefs, list) {
8772                 /* Don't care about full backrefs (poor unloved backrefs) */
8773                 if (back->full_backref || !back->is_data)
8774                         continue;
8775
8776                 dback = to_data_backref(back);
8777
8778                 /* We found this one, we don't need to do a lookup */
8779                 if (dback->found_ref)
8780                         continue;
8781
8782                 key.objectid = dback->root;
8783                 key.type = BTRFS_ROOT_ITEM_KEY;
8784                 key.offset = (u64)-1;
8785
8786                 root = btrfs_read_fs_root(info, &key);
8787
8788                 /* No root, definitely a bad ref, skip */
8789                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8790                         continue;
8791                 /* Other err, exit */
8792                 if (IS_ERR(root))
8793                         return PTR_ERR(root);
8794
8795                 key.objectid = dback->owner;
8796                 key.type = BTRFS_EXTENT_DATA_KEY;
8797                 key.offset = dback->offset;
8798                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8799                 if (ret) {
8800                         btrfs_release_path(path);
8801                         if (ret < 0)
8802                                 return ret;
8803                         /* Didn't find it, we can carry on */
8804                         ret = 0;
8805                         continue;
8806                 }
8807
8808                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8809                                     struct btrfs_file_extent_item);
8810                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8811                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8812                 btrfs_release_path(path);
8813                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8814                 if (cache) {
8815                         struct extent_record *tmp;
8816                         tmp = container_of(cache, struct extent_record, cache);
8817
8818                         /*
8819                          * If we found an extent record for the bytenr for this
8820                          * particular backref then we can't add it to our
8821                          * current extent record.  We only want to add backrefs
8822                          * that don't have a corresponding extent item in the
8823                          * extent tree since they likely belong to this record
8824                          * and we need to fix it if it doesn't match bytenrs.
8825                          */
8826                         if  (tmp->found_rec)
8827                                 continue;
8828                 }
8829
8830                 dback->found_ref += 1;
8831                 dback->disk_bytenr = bytenr;
8832                 dback->bytes = bytes;
8833
8834                 /*
8835                  * Set this so the verify backref code knows not to trust the
8836                  * values in this backref.
8837                  */
8838                 back->broken = 1;
8839         }
8840
8841         return 0;
8842 }
8843
8844 /*
8845  * Record orphan data ref into corresponding root.
8846  *
8847  * Return 0 if the extent item contains data ref and recorded.
8848  * Return 1 if the extent item contains no useful data ref
8849  *   On that case, it may contains only shared_dataref or metadata backref
8850  *   or the file extent exists(this should be handled by the extent bytenr
8851  *   recovery routine)
8852  * Return <0 if something goes wrong.
8853  */
8854 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8855                                       struct extent_record *rec)
8856 {
8857         struct btrfs_key key;
8858         struct btrfs_root *dest_root;
8859         struct extent_backref *back;
8860         struct data_backref *dback;
8861         struct orphan_data_extent *orphan;
8862         struct btrfs_path path;
8863         int recorded_data_ref = 0;
8864         int ret = 0;
8865
8866         if (rec->metadata)
8867                 return 1;
8868         btrfs_init_path(&path);
8869         list_for_each_entry(back, &rec->backrefs, list) {
8870                 if (back->full_backref || !back->is_data ||
8871                     !back->found_extent_tree)
8872                         continue;
8873                 dback = to_data_backref(back);
8874                 if (dback->found_ref)
8875                         continue;
8876                 key.objectid = dback->root;
8877                 key.type = BTRFS_ROOT_ITEM_KEY;
8878                 key.offset = (u64)-1;
8879
8880                 dest_root = btrfs_read_fs_root(fs_info, &key);
8881
8882                 /* For non-exist root we just skip it */
8883                 if (IS_ERR(dest_root) || !dest_root)
8884                         continue;
8885
8886                 key.objectid = dback->owner;
8887                 key.type = BTRFS_EXTENT_DATA_KEY;
8888                 key.offset = dback->offset;
8889
8890                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8891                 btrfs_release_path(&path);
8892                 /*
8893                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8894                  * we need to record it for inode/file extent rebuild.
8895                  * For ret > 0, we record it only for file extent rebuild.
8896                  * For ret == 0, the file extent exists but only bytenr
8897                  * mismatch, let the original bytenr fix routine to handle,
8898                  * don't record it.
8899                  */
8900                 if (ret == 0)
8901                         continue;
8902                 ret = 0;
8903                 orphan = malloc(sizeof(*orphan));
8904                 if (!orphan) {
8905                         ret = -ENOMEM;
8906                         goto out;
8907                 }
8908                 INIT_LIST_HEAD(&orphan->list);
8909                 orphan->root = dback->root;
8910                 orphan->objectid = dback->owner;
8911                 orphan->offset = dback->offset;
8912                 orphan->disk_bytenr = rec->cache.start;
8913                 orphan->disk_len = rec->cache.size;
8914                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8915                 recorded_data_ref = 1;
8916         }
8917 out:
8918         btrfs_release_path(&path);
8919         if (!ret)
8920                 return !recorded_data_ref;
8921         else
8922                 return ret;
8923 }
8924
8925 /*
8926  * when an incorrect extent item is found, this will delete
8927  * all of the existing entries for it and recreate them
8928  * based on what the tree scan found.
8929  */
8930 static int fixup_extent_refs(struct btrfs_fs_info *info,
8931                              struct cache_tree *extent_cache,
8932                              struct extent_record *rec)
8933 {
8934         struct btrfs_trans_handle *trans = NULL;
8935         int ret;
8936         struct btrfs_path path;
8937         struct list_head *cur = rec->backrefs.next;
8938         struct cache_extent *cache;
8939         struct extent_backref *back;
8940         int allocated = 0;
8941         u64 flags = 0;
8942
8943         if (rec->flag_block_full_backref)
8944                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8945
8946         btrfs_init_path(&path);
8947         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8948                 /*
8949                  * Sometimes the backrefs themselves are so broken they don't
8950                  * get attached to any meaningful rec, so first go back and
8951                  * check any of our backrefs that we couldn't find and throw
8952                  * them into the list if we find the backref so that
8953                  * verify_backrefs can figure out what to do.
8954                  */
8955                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8956                 if (ret < 0)
8957                         goto out;
8958         }
8959
8960         /* step one, make sure all of the backrefs agree */
8961         ret = verify_backrefs(info, &path, rec);
8962         if (ret < 0)
8963                 goto out;
8964
8965         trans = btrfs_start_transaction(info->extent_root, 1);
8966         if (IS_ERR(trans)) {
8967                 ret = PTR_ERR(trans);
8968                 goto out;
8969         }
8970
8971         /* step two, delete all the existing records */
8972         ret = delete_extent_records(trans, info->extent_root, &path,
8973                                     rec->start);
8974
8975         if (ret < 0)
8976                 goto out;
8977
8978         /* was this block corrupt?  If so, don't add references to it */
8979         cache = lookup_cache_extent(info->corrupt_blocks,
8980                                     rec->start, rec->max_size);
8981         if (cache) {
8982                 ret = 0;
8983                 goto out;
8984         }
8985
8986         /* step three, recreate all the refs we did find */
8987         while(cur != &rec->backrefs) {
8988                 back = to_extent_backref(cur);
8989                 cur = cur->next;
8990
8991                 /*
8992                  * if we didn't find any references, don't create a
8993                  * new extent record
8994                  */
8995                 if (!back->found_ref)
8996                         continue;
8997
8998                 rec->bad_full_backref = 0;
8999                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9000                 allocated = 1;
9001
9002                 if (ret)
9003                         goto out;
9004         }
9005 out:
9006         if (trans) {
9007                 int err = btrfs_commit_transaction(trans, info->extent_root);
9008                 if (!ret)
9009                         ret = err;
9010         }
9011
9012         if (!ret)
9013                 fprintf(stderr, "Repaired extent references for %llu\n",
9014                                 (unsigned long long)rec->start);
9015
9016         btrfs_release_path(&path);
9017         return ret;
9018 }
9019
9020 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9021                               struct extent_record *rec)
9022 {
9023         struct btrfs_trans_handle *trans;
9024         struct btrfs_root *root = fs_info->extent_root;
9025         struct btrfs_path path;
9026         struct btrfs_extent_item *ei;
9027         struct btrfs_key key;
9028         u64 flags;
9029         int ret = 0;
9030
9031         key.objectid = rec->start;
9032         if (rec->metadata) {
9033                 key.type = BTRFS_METADATA_ITEM_KEY;
9034                 key.offset = rec->info_level;
9035         } else {
9036                 key.type = BTRFS_EXTENT_ITEM_KEY;
9037                 key.offset = rec->max_size;
9038         }
9039
9040         trans = btrfs_start_transaction(root, 0);
9041         if (IS_ERR(trans))
9042                 return PTR_ERR(trans);
9043
9044         btrfs_init_path(&path);
9045         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9046         if (ret < 0) {
9047                 btrfs_release_path(&path);
9048                 btrfs_commit_transaction(trans, root);
9049                 return ret;
9050         } else if (ret) {
9051                 fprintf(stderr, "Didn't find extent for %llu\n",
9052                         (unsigned long long)rec->start);
9053                 btrfs_release_path(&path);
9054                 btrfs_commit_transaction(trans, root);
9055                 return -ENOENT;
9056         }
9057
9058         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9059                             struct btrfs_extent_item);
9060         flags = btrfs_extent_flags(path.nodes[0], ei);
9061         if (rec->flag_block_full_backref) {
9062                 fprintf(stderr, "setting full backref on %llu\n",
9063                         (unsigned long long)key.objectid);
9064                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9065         } else {
9066                 fprintf(stderr, "clearing full backref on %llu\n",
9067                         (unsigned long long)key.objectid);
9068                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9069         }
9070         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9071         btrfs_mark_buffer_dirty(path.nodes[0]);
9072         btrfs_release_path(&path);
9073         ret = btrfs_commit_transaction(trans, root);
9074         if (!ret)
9075                 fprintf(stderr, "Repaired extent flags for %llu\n",
9076                                 (unsigned long long)rec->start);
9077
9078         return ret;
9079 }
9080
9081 /* right now we only prune from the extent allocation tree */
9082 static int prune_one_block(struct btrfs_trans_handle *trans,
9083                            struct btrfs_fs_info *info,
9084                            struct btrfs_corrupt_block *corrupt)
9085 {
9086         int ret;
9087         struct btrfs_path path;
9088         struct extent_buffer *eb;
9089         u64 found;
9090         int slot;
9091         int nritems;
9092         int level = corrupt->level + 1;
9093
9094         btrfs_init_path(&path);
9095 again:
9096         /* we want to stop at the parent to our busted block */
9097         path.lowest_level = level;
9098
9099         ret = btrfs_search_slot(trans, info->extent_root,
9100                                 &corrupt->key, &path, -1, 1);
9101
9102         if (ret < 0)
9103                 goto out;
9104
9105         eb = path.nodes[level];
9106         if (!eb) {
9107                 ret = -ENOENT;
9108                 goto out;
9109         }
9110
9111         /*
9112          * hopefully the search gave us the block we want to prune,
9113          * lets try that first
9114          */
9115         slot = path.slots[level];
9116         found =  btrfs_node_blockptr(eb, slot);
9117         if (found == corrupt->cache.start)
9118                 goto del_ptr;
9119
9120         nritems = btrfs_header_nritems(eb);
9121
9122         /* the search failed, lets scan this node and hope we find it */
9123         for (slot = 0; slot < nritems; slot++) {
9124                 found =  btrfs_node_blockptr(eb, slot);
9125                 if (found == corrupt->cache.start)
9126                         goto del_ptr;
9127         }
9128         /*
9129          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9130          * to this block
9131          */
9132         if (eb == info->extent_root->node) {
9133                 ret = -ENOENT;
9134                 goto out;
9135         } else {
9136                 level++;
9137                 btrfs_release_path(&path);
9138                 goto again;
9139         }
9140
9141 del_ptr:
9142         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9143         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9144
9145 out:
9146         btrfs_release_path(&path);
9147         return ret;
9148 }
9149
9150 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9151 {
9152         struct btrfs_trans_handle *trans = NULL;
9153         struct cache_extent *cache;
9154         struct btrfs_corrupt_block *corrupt;
9155
9156         while (1) {
9157                 cache = search_cache_extent(info->corrupt_blocks, 0);
9158                 if (!cache)
9159                         break;
9160                 if (!trans) {
9161                         trans = btrfs_start_transaction(info->extent_root, 1);
9162                         if (IS_ERR(trans))
9163                                 return PTR_ERR(trans);
9164                 }
9165                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9166                 prune_one_block(trans, info, corrupt);
9167                 remove_cache_extent(info->corrupt_blocks, cache);
9168         }
9169         if (trans)
9170                 return btrfs_commit_transaction(trans, info->extent_root);
9171         return 0;
9172 }
9173
9174 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9175 {
9176         struct btrfs_block_group_cache *cache;
9177         u64 start, end;
9178         int ret;
9179
9180         while (1) {
9181                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9182                                             &start, &end, EXTENT_DIRTY);
9183                 if (ret)
9184                         break;
9185                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9186         }
9187
9188         start = 0;
9189         while (1) {
9190                 cache = btrfs_lookup_first_block_group(fs_info, start);
9191                 if (!cache)
9192                         break;
9193                 if (cache->cached)
9194                         cache->cached = 0;
9195                 start = cache->key.objectid + cache->key.offset;
9196         }
9197 }
9198
9199 static int check_extent_refs(struct btrfs_root *root,
9200                              struct cache_tree *extent_cache)
9201 {
9202         struct extent_record *rec;
9203         struct cache_extent *cache;
9204         int ret = 0;
9205         int had_dups = 0;
9206
9207         if (repair) {
9208                 /*
9209                  * if we're doing a repair, we have to make sure
9210                  * we don't allocate from the problem extents.
9211                  * In the worst case, this will be all the
9212                  * extents in the FS
9213                  */
9214                 cache = search_cache_extent(extent_cache, 0);
9215                 while(cache) {
9216                         rec = container_of(cache, struct extent_record, cache);
9217                         set_extent_dirty(root->fs_info->excluded_extents,
9218                                          rec->start,
9219                                          rec->start + rec->max_size - 1);
9220                         cache = next_cache_extent(cache);
9221                 }
9222
9223                 /* pin down all the corrupted blocks too */
9224                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9225                 while(cache) {
9226                         set_extent_dirty(root->fs_info->excluded_extents,
9227                                          cache->start,
9228                                          cache->start + cache->size - 1);
9229                         cache = next_cache_extent(cache);
9230                 }
9231                 prune_corrupt_blocks(root->fs_info);
9232                 reset_cached_block_groups(root->fs_info);
9233         }
9234
9235         reset_cached_block_groups(root->fs_info);
9236
9237         /*
9238          * We need to delete any duplicate entries we find first otherwise we
9239          * could mess up the extent tree when we have backrefs that actually
9240          * belong to a different extent item and not the weird duplicate one.
9241          */
9242         while (repair && !list_empty(&duplicate_extents)) {
9243                 rec = to_extent_record(duplicate_extents.next);
9244                 list_del_init(&rec->list);
9245
9246                 /* Sometimes we can find a backref before we find an actual
9247                  * extent, so we need to process it a little bit to see if there
9248                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9249                  * if this is a backref screwup.  If we need to delete stuff
9250                  * process_duplicates() will return 0, otherwise it will return
9251                  * 1 and we
9252                  */
9253                 if (process_duplicates(root, extent_cache, rec))
9254                         continue;
9255                 ret = delete_duplicate_records(root, rec);
9256                 if (ret < 0)
9257                         return ret;
9258                 /*
9259                  * delete_duplicate_records will return the number of entries
9260                  * deleted, so if it's greater than 0 then we know we actually
9261                  * did something and we need to remove.
9262                  */
9263                 if (ret)
9264                         had_dups = 1;
9265         }
9266
9267         if (had_dups)
9268                 return -EAGAIN;
9269
9270         while(1) {
9271                 int cur_err = 0;
9272                 int fix = 0;
9273
9274                 cache = search_cache_extent(extent_cache, 0);
9275                 if (!cache)
9276                         break;
9277                 rec = container_of(cache, struct extent_record, cache);
9278                 if (rec->num_duplicates) {
9279                         fprintf(stderr, "extent item %llu has multiple extent "
9280                                 "items\n", (unsigned long long)rec->start);
9281                         cur_err = 1;
9282                 }
9283
9284                 if (rec->refs != rec->extent_item_refs) {
9285                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9286                                 (unsigned long long)rec->start,
9287                                 (unsigned long long)rec->nr);
9288                         fprintf(stderr, "extent item %llu, found %llu\n",
9289                                 (unsigned long long)rec->extent_item_refs,
9290                                 (unsigned long long)rec->refs);
9291                         ret = record_orphan_data_extents(root->fs_info, rec);
9292                         if (ret < 0)
9293                                 goto repair_abort;
9294                         fix = ret;
9295                         cur_err = 1;
9296                 }
9297                 if (all_backpointers_checked(rec, 1)) {
9298                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9299                                 (unsigned long long)rec->start,
9300                                 (unsigned long long)rec->nr);
9301                         fix = 1;
9302                         cur_err = 1;
9303                 }
9304                 if (!rec->owner_ref_checked) {
9305                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9306                                 (unsigned long long)rec->start,
9307                                 (unsigned long long)rec->nr);
9308                         fix = 1;
9309                         cur_err = 1;
9310                 }
9311
9312                 if (repair && fix) {
9313                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9314                         if (ret)
9315                                 goto repair_abort;
9316                 }
9317
9318
9319                 if (rec->bad_full_backref) {
9320                         fprintf(stderr, "bad full backref, on [%llu]\n",
9321                                 (unsigned long long)rec->start);
9322                         if (repair) {
9323                                 ret = fixup_extent_flags(root->fs_info, rec);
9324                                 if (ret)
9325                                         goto repair_abort;
9326                                 fix = 1;
9327                         }
9328                         cur_err = 1;
9329                 }
9330                 /*
9331                  * Although it's not a extent ref's problem, we reuse this
9332                  * routine for error reporting.
9333                  * No repair function yet.
9334                  */
9335                 if (rec->crossing_stripes) {
9336                         fprintf(stderr,
9337                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9338                                 rec->start, rec->start + rec->max_size);
9339                         cur_err = 1;
9340                 }
9341
9342                 if (rec->wrong_chunk_type) {
9343                         fprintf(stderr,
9344                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9345                                 rec->start, rec->start + rec->max_size);
9346                         cur_err = 1;
9347                 }
9348
9349                 remove_cache_extent(extent_cache, cache);
9350                 free_all_extent_backrefs(rec);
9351                 if (!init_extent_tree && repair && (!cur_err || fix))
9352                         clear_extent_dirty(root->fs_info->excluded_extents,
9353                                            rec->start,
9354                                            rec->start + rec->max_size - 1);
9355                 free(rec);
9356         }
9357 repair_abort:
9358         if (repair) {
9359                 if (ret && ret != -EAGAIN) {
9360                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9361                         exit(1);
9362                 } else if (!ret) {
9363                         struct btrfs_trans_handle *trans;
9364
9365                         root = root->fs_info->extent_root;
9366                         trans = btrfs_start_transaction(root, 1);
9367                         if (IS_ERR(trans)) {
9368                                 ret = PTR_ERR(trans);
9369                                 goto repair_abort;
9370                         }
9371
9372                         btrfs_fix_block_accounting(trans, root);
9373                         ret = btrfs_commit_transaction(trans, root);
9374                         if (ret)
9375                                 goto repair_abort;
9376                 }
9377                 return ret;
9378         }
9379         return 0;
9380 }
9381
9382 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9383 {
9384         u64 stripe_size;
9385
9386         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9387                 stripe_size = length;
9388                 stripe_size /= num_stripes;
9389         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9390                 stripe_size = length * 2;
9391                 stripe_size /= num_stripes;
9392         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9393                 stripe_size = length;
9394                 stripe_size /= (num_stripes - 1);
9395         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9396                 stripe_size = length;
9397                 stripe_size /= (num_stripes - 2);
9398         } else {
9399                 stripe_size = length;
9400         }
9401         return stripe_size;
9402 }
9403
9404 /*
9405  * Check the chunk with its block group/dev list ref:
9406  * Return 0 if all refs seems valid.
9407  * Return 1 if part of refs seems valid, need later check for rebuild ref
9408  * like missing block group and needs to search extent tree to rebuild them.
9409  * Return -1 if essential refs are missing and unable to rebuild.
9410  */
9411 static int check_chunk_refs(struct chunk_record *chunk_rec,
9412                             struct block_group_tree *block_group_cache,
9413                             struct device_extent_tree *dev_extent_cache,
9414                             int silent)
9415 {
9416         struct cache_extent *block_group_item;
9417         struct block_group_record *block_group_rec;
9418         struct cache_extent *dev_extent_item;
9419         struct device_extent_record *dev_extent_rec;
9420         u64 devid;
9421         u64 offset;
9422         u64 length;
9423         int metadump_v2 = 0;
9424         int i;
9425         int ret = 0;
9426
9427         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9428                                                chunk_rec->offset,
9429                                                chunk_rec->length);
9430         if (block_group_item) {
9431                 block_group_rec = container_of(block_group_item,
9432                                                struct block_group_record,
9433                                                cache);
9434                 if (chunk_rec->length != block_group_rec->offset ||
9435                     chunk_rec->offset != block_group_rec->objectid ||
9436                     (!metadump_v2 &&
9437                      chunk_rec->type_flags != block_group_rec->flags)) {
9438                         if (!silent)
9439                                 fprintf(stderr,
9440                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9441                                         chunk_rec->objectid,
9442                                         chunk_rec->type,
9443                                         chunk_rec->offset,
9444                                         chunk_rec->length,
9445                                         chunk_rec->offset,
9446                                         chunk_rec->type_flags,
9447                                         block_group_rec->objectid,
9448                                         block_group_rec->type,
9449                                         block_group_rec->offset,
9450                                         block_group_rec->offset,
9451                                         block_group_rec->objectid,
9452                                         block_group_rec->flags);
9453                         ret = -1;
9454                 } else {
9455                         list_del_init(&block_group_rec->list);
9456                         chunk_rec->bg_rec = block_group_rec;
9457                 }
9458         } else {
9459                 if (!silent)
9460                         fprintf(stderr,
9461                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9462                                 chunk_rec->objectid,
9463                                 chunk_rec->type,
9464                                 chunk_rec->offset,
9465                                 chunk_rec->length,
9466                                 chunk_rec->offset,
9467                                 chunk_rec->type_flags);
9468                 ret = 1;
9469         }
9470
9471         if (metadump_v2)
9472                 return ret;
9473
9474         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9475                                     chunk_rec->num_stripes);
9476         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9477                 devid = chunk_rec->stripes[i].devid;
9478                 offset = chunk_rec->stripes[i].offset;
9479                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9480                                                        devid, offset, length);
9481                 if (dev_extent_item) {
9482                         dev_extent_rec = container_of(dev_extent_item,
9483                                                 struct device_extent_record,
9484                                                 cache);
9485                         if (dev_extent_rec->objectid != devid ||
9486                             dev_extent_rec->offset != offset ||
9487                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9488                             dev_extent_rec->length != length) {
9489                                 if (!silent)
9490                                         fprintf(stderr,
9491                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9492                                                 chunk_rec->objectid,
9493                                                 chunk_rec->type,
9494                                                 chunk_rec->offset,
9495                                                 chunk_rec->stripes[i].devid,
9496                                                 chunk_rec->stripes[i].offset,
9497                                                 dev_extent_rec->objectid,
9498                                                 dev_extent_rec->offset,
9499                                                 dev_extent_rec->length);
9500                                 ret = -1;
9501                         } else {
9502                                 list_move(&dev_extent_rec->chunk_list,
9503                                           &chunk_rec->dextents);
9504                         }
9505                 } else {
9506                         if (!silent)
9507                                 fprintf(stderr,
9508                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9509                                         chunk_rec->objectid,
9510                                         chunk_rec->type,
9511                                         chunk_rec->offset,
9512                                         chunk_rec->stripes[i].devid,
9513                                         chunk_rec->stripes[i].offset);
9514                         ret = -1;
9515                 }
9516         }
9517         return ret;
9518 }
9519
9520 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9521 int check_chunks(struct cache_tree *chunk_cache,
9522                  struct block_group_tree *block_group_cache,
9523                  struct device_extent_tree *dev_extent_cache,
9524                  struct list_head *good, struct list_head *bad,
9525                  struct list_head *rebuild, int silent)
9526 {
9527         struct cache_extent *chunk_item;
9528         struct chunk_record *chunk_rec;
9529         struct block_group_record *bg_rec;
9530         struct device_extent_record *dext_rec;
9531         int err;
9532         int ret = 0;
9533
9534         chunk_item = first_cache_extent(chunk_cache);
9535         while (chunk_item) {
9536                 chunk_rec = container_of(chunk_item, struct chunk_record,
9537                                          cache);
9538                 err = check_chunk_refs(chunk_rec, block_group_cache,
9539                                        dev_extent_cache, silent);
9540                 if (err < 0)
9541                         ret = err;
9542                 if (err == 0 && good)
9543                         list_add_tail(&chunk_rec->list, good);
9544                 if (err > 0 && rebuild)
9545                         list_add_tail(&chunk_rec->list, rebuild);
9546                 if (err < 0 && bad)
9547                         list_add_tail(&chunk_rec->list, bad);
9548                 chunk_item = next_cache_extent(chunk_item);
9549         }
9550
9551         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9552                 if (!silent)
9553                         fprintf(stderr,
9554                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9555                                 bg_rec->objectid,
9556                                 bg_rec->offset,
9557                                 bg_rec->flags);
9558                 if (!ret)
9559                         ret = 1;
9560         }
9561
9562         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9563                             chunk_list) {
9564                 if (!silent)
9565                         fprintf(stderr,
9566                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9567                                 dext_rec->objectid,
9568                                 dext_rec->offset,
9569                                 dext_rec->length);
9570                 if (!ret)
9571                         ret = 1;
9572         }
9573         return ret;
9574 }
9575
9576
9577 static int check_device_used(struct device_record *dev_rec,
9578                              struct device_extent_tree *dext_cache)
9579 {
9580         struct cache_extent *cache;
9581         struct device_extent_record *dev_extent_rec;
9582         u64 total_byte = 0;
9583
9584         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9585         while (cache) {
9586                 dev_extent_rec = container_of(cache,
9587                                               struct device_extent_record,
9588                                               cache);
9589                 if (dev_extent_rec->objectid != dev_rec->devid)
9590                         break;
9591
9592                 list_del_init(&dev_extent_rec->device_list);
9593                 total_byte += dev_extent_rec->length;
9594                 cache = next_cache_extent(cache);
9595         }
9596
9597         if (total_byte != dev_rec->byte_used) {
9598                 fprintf(stderr,
9599                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9600                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9601                         dev_rec->type, dev_rec->offset);
9602                 return -1;
9603         } else {
9604                 return 0;
9605         }
9606 }
9607
9608 /* check btrfs_dev_item -> btrfs_dev_extent */
9609 static int check_devices(struct rb_root *dev_cache,
9610                          struct device_extent_tree *dev_extent_cache)
9611 {
9612         struct rb_node *dev_node;
9613         struct device_record *dev_rec;
9614         struct device_extent_record *dext_rec;
9615         int err;
9616         int ret = 0;
9617
9618         dev_node = rb_first(dev_cache);
9619         while (dev_node) {
9620                 dev_rec = container_of(dev_node, struct device_record, node);
9621                 err = check_device_used(dev_rec, dev_extent_cache);
9622                 if (err)
9623                         ret = err;
9624
9625                 dev_node = rb_next(dev_node);
9626         }
9627         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9628                             device_list) {
9629                 fprintf(stderr,
9630                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9631                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9632                 if (!ret)
9633                         ret = 1;
9634         }
9635         return ret;
9636 }
9637
9638 static int add_root_item_to_list(struct list_head *head,
9639                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9640                                   u8 level, u8 drop_level,
9641                                   int level_size, struct btrfs_key *drop_key)
9642 {
9643
9644         struct root_item_record *ri_rec;
9645         ri_rec = malloc(sizeof(*ri_rec));
9646         if (!ri_rec)
9647                 return -ENOMEM;
9648         ri_rec->bytenr = bytenr;
9649         ri_rec->objectid = objectid;
9650         ri_rec->level = level;
9651         ri_rec->level_size = level_size;
9652         ri_rec->drop_level = drop_level;
9653         ri_rec->last_snapshot = last_snapshot;
9654         if (drop_key)
9655                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9656         list_add_tail(&ri_rec->list, head);
9657
9658         return 0;
9659 }
9660
9661 static void free_root_item_list(struct list_head *list)
9662 {
9663         struct root_item_record *ri_rec;
9664
9665         while (!list_empty(list)) {
9666                 ri_rec = list_first_entry(list, struct root_item_record,
9667                                           list);
9668                 list_del_init(&ri_rec->list);
9669                 free(ri_rec);
9670         }
9671 }
9672
9673 static int deal_root_from_list(struct list_head *list,
9674                                struct btrfs_root *root,
9675                                struct block_info *bits,
9676                                int bits_nr,
9677                                struct cache_tree *pending,
9678                                struct cache_tree *seen,
9679                                struct cache_tree *reada,
9680                                struct cache_tree *nodes,
9681                                struct cache_tree *extent_cache,
9682                                struct cache_tree *chunk_cache,
9683                                struct rb_root *dev_cache,
9684                                struct block_group_tree *block_group_cache,
9685                                struct device_extent_tree *dev_extent_cache)
9686 {
9687         int ret = 0;
9688         u64 last;
9689
9690         while (!list_empty(list)) {
9691                 struct root_item_record *rec;
9692                 struct extent_buffer *buf;
9693                 rec = list_entry(list->next,
9694                                  struct root_item_record, list);
9695                 last = 0;
9696                 buf = read_tree_block(root->fs_info->tree_root,
9697                                       rec->bytenr, rec->level_size, 0);
9698                 if (!extent_buffer_uptodate(buf)) {
9699                         free_extent_buffer(buf);
9700                         ret = -EIO;
9701                         break;
9702                 }
9703                 ret = add_root_to_pending(buf, extent_cache, pending,
9704                                     seen, nodes, rec->objectid);
9705                 if (ret < 0)
9706                         break;
9707                 /*
9708                  * To rebuild extent tree, we need deal with snapshot
9709                  * one by one, otherwise we deal with node firstly which
9710                  * can maximize readahead.
9711                  */
9712                 while (1) {
9713                         ret = run_next_block(root, bits, bits_nr, &last,
9714                                              pending, seen, reada, nodes,
9715                                              extent_cache, chunk_cache,
9716                                              dev_cache, block_group_cache,
9717                                              dev_extent_cache, rec);
9718                         if (ret != 0)
9719                                 break;
9720                 }
9721                 free_extent_buffer(buf);
9722                 list_del(&rec->list);
9723                 free(rec);
9724                 if (ret < 0)
9725                         break;
9726         }
9727         while (ret >= 0) {
9728                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9729                                      reada, nodes, extent_cache, chunk_cache,
9730                                      dev_cache, block_group_cache,
9731                                      dev_extent_cache, NULL);
9732                 if (ret != 0) {
9733                         if (ret > 0)
9734                                 ret = 0;
9735                         break;
9736                 }
9737         }
9738         return ret;
9739 }
9740
9741 static int check_chunks_and_extents(struct btrfs_root *root)
9742 {
9743         struct rb_root dev_cache;
9744         struct cache_tree chunk_cache;
9745         struct block_group_tree block_group_cache;
9746         struct device_extent_tree dev_extent_cache;
9747         struct cache_tree extent_cache;
9748         struct cache_tree seen;
9749         struct cache_tree pending;
9750         struct cache_tree reada;
9751         struct cache_tree nodes;
9752         struct extent_io_tree excluded_extents;
9753         struct cache_tree corrupt_blocks;
9754         struct btrfs_path path;
9755         struct btrfs_key key;
9756         struct btrfs_key found_key;
9757         int ret, err = 0;
9758         struct block_info *bits;
9759         int bits_nr;
9760         struct extent_buffer *leaf;
9761         int slot;
9762         struct btrfs_root_item ri;
9763         struct list_head dropping_trees;
9764         struct list_head normal_trees;
9765         struct btrfs_root *root1;
9766         u64 objectid;
9767         u32 level_size;
9768         u8 level;
9769
9770         dev_cache = RB_ROOT;
9771         cache_tree_init(&chunk_cache);
9772         block_group_tree_init(&block_group_cache);
9773         device_extent_tree_init(&dev_extent_cache);
9774
9775         cache_tree_init(&extent_cache);
9776         cache_tree_init(&seen);
9777         cache_tree_init(&pending);
9778         cache_tree_init(&nodes);
9779         cache_tree_init(&reada);
9780         cache_tree_init(&corrupt_blocks);
9781         extent_io_tree_init(&excluded_extents);
9782         INIT_LIST_HEAD(&dropping_trees);
9783         INIT_LIST_HEAD(&normal_trees);
9784
9785         if (repair) {
9786                 root->fs_info->excluded_extents = &excluded_extents;
9787                 root->fs_info->fsck_extent_cache = &extent_cache;
9788                 root->fs_info->free_extent_hook = free_extent_hook;
9789                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9790         }
9791
9792         bits_nr = 1024;
9793         bits = malloc(bits_nr * sizeof(struct block_info));
9794         if (!bits) {
9795                 perror("malloc");
9796                 exit(1);
9797         }
9798
9799         if (ctx.progress_enabled) {
9800                 ctx.tp = TASK_EXTENTS;
9801                 task_start(ctx.info);
9802         }
9803
9804 again:
9805         root1 = root->fs_info->tree_root;
9806         level = btrfs_header_level(root1->node);
9807         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9808                                     root1->node->start, 0, level, 0,
9809                                     root1->nodesize, NULL);
9810         if (ret < 0)
9811                 goto out;
9812         root1 = root->fs_info->chunk_root;
9813         level = btrfs_header_level(root1->node);
9814         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9815                                     root1->node->start, 0, level, 0,
9816                                     root1->nodesize, NULL);
9817         if (ret < 0)
9818                 goto out;
9819         btrfs_init_path(&path);
9820         key.offset = 0;
9821         key.objectid = 0;
9822         key.type = BTRFS_ROOT_ITEM_KEY;
9823         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9824                                         &key, &path, 0, 0);
9825         if (ret < 0)
9826                 goto out;
9827         while(1) {
9828                 leaf = path.nodes[0];
9829                 slot = path.slots[0];
9830                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9831                         ret = btrfs_next_leaf(root, &path);
9832                         if (ret != 0)
9833                                 break;
9834                         leaf = path.nodes[0];
9835                         slot = path.slots[0];
9836                 }
9837                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9838                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9839                         unsigned long offset;
9840                         u64 last_snapshot;
9841
9842                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9843                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9844                         last_snapshot = btrfs_root_last_snapshot(&ri);
9845                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9846                                 level = btrfs_root_level(&ri);
9847                                 level_size = root->nodesize;
9848                                 ret = add_root_item_to_list(&normal_trees,
9849                                                 found_key.objectid,
9850                                                 btrfs_root_bytenr(&ri),
9851                                                 last_snapshot, level,
9852                                                 0, level_size, NULL);
9853                                 if (ret < 0)
9854                                         goto out;
9855                         } else {
9856                                 level = btrfs_root_level(&ri);
9857                                 level_size = root->nodesize;
9858                                 objectid = found_key.objectid;
9859                                 btrfs_disk_key_to_cpu(&found_key,
9860                                                       &ri.drop_progress);
9861                                 ret = add_root_item_to_list(&dropping_trees,
9862                                                 objectid,
9863                                                 btrfs_root_bytenr(&ri),
9864                                                 last_snapshot, level,
9865                                                 ri.drop_level,
9866                                                 level_size, &found_key);
9867                                 if (ret < 0)
9868                                         goto out;
9869                         }
9870                 }
9871                 path.slots[0]++;
9872         }
9873         btrfs_release_path(&path);
9874
9875         /*
9876          * check_block can return -EAGAIN if it fixes something, please keep
9877          * this in mind when dealing with return values from these functions, if
9878          * we get -EAGAIN we want to fall through and restart the loop.
9879          */
9880         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9881                                   &seen, &reada, &nodes, &extent_cache,
9882                                   &chunk_cache, &dev_cache, &block_group_cache,
9883                                   &dev_extent_cache);
9884         if (ret < 0) {
9885                 if (ret == -EAGAIN)
9886                         goto loop;
9887                 goto out;
9888         }
9889         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9890                                   &pending, &seen, &reada, &nodes,
9891                                   &extent_cache, &chunk_cache, &dev_cache,
9892                                   &block_group_cache, &dev_extent_cache);
9893         if (ret < 0) {
9894                 if (ret == -EAGAIN)
9895                         goto loop;
9896                 goto out;
9897         }
9898
9899         ret = check_chunks(&chunk_cache, &block_group_cache,
9900                            &dev_extent_cache, NULL, NULL, NULL, 0);
9901         if (ret) {
9902                 if (ret == -EAGAIN)
9903                         goto loop;
9904                 err = ret;
9905         }
9906
9907         ret = check_extent_refs(root, &extent_cache);
9908         if (ret < 0) {
9909                 if (ret == -EAGAIN)
9910                         goto loop;
9911                 goto out;
9912         }
9913
9914         ret = check_devices(&dev_cache, &dev_extent_cache);
9915         if (ret && err)
9916                 ret = err;
9917
9918 out:
9919         task_stop(ctx.info);
9920         if (repair) {
9921                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9922                 extent_io_tree_cleanup(&excluded_extents);
9923                 root->fs_info->fsck_extent_cache = NULL;
9924                 root->fs_info->free_extent_hook = NULL;
9925                 root->fs_info->corrupt_blocks = NULL;
9926                 root->fs_info->excluded_extents = NULL;
9927         }
9928         free(bits);
9929         free_chunk_cache_tree(&chunk_cache);
9930         free_device_cache_tree(&dev_cache);
9931         free_block_group_tree(&block_group_cache);
9932         free_device_extent_tree(&dev_extent_cache);
9933         free_extent_cache_tree(&seen);
9934         free_extent_cache_tree(&pending);
9935         free_extent_cache_tree(&reada);
9936         free_extent_cache_tree(&nodes);
9937         return ret;
9938 loop:
9939         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9940         free_extent_cache_tree(&seen);
9941         free_extent_cache_tree(&pending);
9942         free_extent_cache_tree(&reada);
9943         free_extent_cache_tree(&nodes);
9944         free_chunk_cache_tree(&chunk_cache);
9945         free_block_group_tree(&block_group_cache);
9946         free_device_cache_tree(&dev_cache);
9947         free_device_extent_tree(&dev_extent_cache);
9948         free_extent_record_cache(root->fs_info, &extent_cache);
9949         free_root_item_list(&normal_trees);
9950         free_root_item_list(&dropping_trees);
9951         extent_io_tree_cleanup(&excluded_extents);
9952         goto again;
9953 }
9954
9955 /*
9956  * Check backrefs of a tree block given by @bytenr or @eb.
9957  *
9958  * @root:       the root containing the @bytenr or @eb
9959  * @eb:         tree block extent buffer, can be NULL
9960  * @bytenr:     bytenr of the tree block to search
9961  * @level:      tree level of the tree block
9962  * @owner:      owner of the tree block
9963  *
9964  * Return >0 for any error found and output error message
9965  * Return 0 for no error found
9966  */
9967 static int check_tree_block_ref(struct btrfs_root *root,
9968                                 struct extent_buffer *eb, u64 bytenr,
9969                                 int level, u64 owner)
9970 {
9971         struct btrfs_key key;
9972         struct btrfs_root *extent_root = root->fs_info->extent_root;
9973         struct btrfs_path path;
9974         struct btrfs_extent_item *ei;
9975         struct btrfs_extent_inline_ref *iref;
9976         struct extent_buffer *leaf;
9977         unsigned long end;
9978         unsigned long ptr;
9979         int slot;
9980         int skinny_level;
9981         int type;
9982         u32 nodesize = root->nodesize;
9983         u32 item_size;
9984         u64 offset;
9985         int tree_reloc_root = 0;
9986         int found_ref = 0;
9987         int err = 0;
9988         int ret;
9989
9990         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9991             btrfs_header_bytenr(root->node) == bytenr)
9992                 tree_reloc_root = 1;
9993
9994         btrfs_init_path(&path);
9995         key.objectid = bytenr;
9996         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9997                 key.type = BTRFS_METADATA_ITEM_KEY;
9998         else
9999                 key.type = BTRFS_EXTENT_ITEM_KEY;
10000         key.offset = (u64)-1;
10001
10002         /* Search for the backref in extent tree */
10003         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10004         if (ret < 0) {
10005                 err |= BACKREF_MISSING;
10006                 goto out;
10007         }
10008         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10009         if (ret) {
10010                 err |= BACKREF_MISSING;
10011                 goto out;
10012         }
10013
10014         leaf = path.nodes[0];
10015         slot = path.slots[0];
10016         btrfs_item_key_to_cpu(leaf, &key, slot);
10017
10018         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10019
10020         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10021                 skinny_level = (int)key.offset;
10022                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10023         } else {
10024                 struct btrfs_tree_block_info *info;
10025
10026                 info = (struct btrfs_tree_block_info *)(ei + 1);
10027                 skinny_level = btrfs_tree_block_level(leaf, info);
10028                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10029         }
10030
10031         if (eb) {
10032                 u64 header_gen;
10033                 u64 extent_gen;
10034
10035                 if (!(btrfs_extent_flags(leaf, ei) &
10036                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10037                         error(
10038                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10039                                 key.objectid, nodesize,
10040                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10041                         err = BACKREF_MISMATCH;
10042                 }
10043                 header_gen = btrfs_header_generation(eb);
10044                 extent_gen = btrfs_extent_generation(leaf, ei);
10045                 if (header_gen != extent_gen) {
10046                         error(
10047         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10048                                 key.objectid, nodesize, header_gen,
10049                                 extent_gen);
10050                         err = BACKREF_MISMATCH;
10051                 }
10052                 if (level != skinny_level) {
10053                         error(
10054                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10055                                 key.objectid, nodesize, level, skinny_level);
10056                         err = BACKREF_MISMATCH;
10057                 }
10058                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10059                         error(
10060                         "extent[%llu %u] is referred by other roots than %llu",
10061                                 key.objectid, nodesize, root->objectid);
10062                         err = BACKREF_MISMATCH;
10063                 }
10064         }
10065
10066         /*
10067          * Iterate the extent/metadata item to find the exact backref
10068          */
10069         item_size = btrfs_item_size_nr(leaf, slot);
10070         ptr = (unsigned long)iref;
10071         end = (unsigned long)ei + item_size;
10072         while (ptr < end) {
10073                 iref = (struct btrfs_extent_inline_ref *)ptr;
10074                 type = btrfs_extent_inline_ref_type(leaf, iref);
10075                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10076
10077                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10078                         (offset == root->objectid || offset == owner)) {
10079                         found_ref = 1;
10080                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10081                         /*
10082                          * Backref of tree reloc root points to itself, no need
10083                          * to check backref any more.
10084                          */
10085                         if (tree_reloc_root)
10086                                 found_ref = 1;
10087                         else
10088                         /* Check if the backref points to valid referencer */
10089                                 found_ref = !check_tree_block_ref(root, NULL,
10090                                                 offset, level + 1, owner);
10091                 }
10092
10093                 if (found_ref)
10094                         break;
10095                 ptr += btrfs_extent_inline_ref_size(type);
10096         }
10097
10098         /*
10099          * Inlined extent item doesn't have what we need, check
10100          * TREE_BLOCK_REF_KEY
10101          */
10102         if (!found_ref) {
10103                 btrfs_release_path(&path);
10104                 key.objectid = bytenr;
10105                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10106                 key.offset = root->objectid;
10107
10108                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10109                 if (!ret)
10110                         found_ref = 1;
10111         }
10112         if (!found_ref)
10113                 err |= BACKREF_MISSING;
10114 out:
10115         btrfs_release_path(&path);
10116         if (eb && (err & BACKREF_MISSING))
10117                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10118                         bytenr, nodesize, owner, level);
10119         return err;
10120 }
10121
10122 /*
10123  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10124  *
10125  * Return >0 any error found and output error message
10126  * Return 0 for no error found
10127  */
10128 static int check_extent_data_item(struct btrfs_root *root,
10129                                   struct extent_buffer *eb, int slot)
10130 {
10131         struct btrfs_file_extent_item *fi;
10132         struct btrfs_path path;
10133         struct btrfs_root *extent_root = root->fs_info->extent_root;
10134         struct btrfs_key fi_key;
10135         struct btrfs_key dbref_key;
10136         struct extent_buffer *leaf;
10137         struct btrfs_extent_item *ei;
10138         struct btrfs_extent_inline_ref *iref;
10139         struct btrfs_extent_data_ref *dref;
10140         u64 owner;
10141         u64 disk_bytenr;
10142         u64 disk_num_bytes;
10143         u64 extent_num_bytes;
10144         u64 extent_flags;
10145         u32 item_size;
10146         unsigned long end;
10147         unsigned long ptr;
10148         int type;
10149         u64 ref_root;
10150         int found_dbackref = 0;
10151         int err = 0;
10152         int ret;
10153
10154         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10155         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10156
10157         /* Nothing to check for hole and inline data extents */
10158         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10159             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10160                 return 0;
10161
10162         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10163         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10164         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10165
10166         /* Check unaligned disk_num_bytes and num_bytes */
10167         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10168                 error(
10169 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10170                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10171                         root->sectorsize);
10172                 err |= BYTES_UNALIGNED;
10173         } else {
10174                 data_bytes_allocated += disk_num_bytes;
10175         }
10176         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10177                 error(
10178 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10179                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10180                         root->sectorsize);
10181                 err |= BYTES_UNALIGNED;
10182         } else {
10183                 data_bytes_referenced += extent_num_bytes;
10184         }
10185         owner = btrfs_header_owner(eb);
10186
10187         /* Check the extent item of the file extent in extent tree */
10188         btrfs_init_path(&path);
10189         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10190         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10191         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10192
10193         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10194         if (ret) {
10195                 err |= BACKREF_MISSING;
10196                 goto error;
10197         }
10198
10199         leaf = path.nodes[0];
10200         slot = path.slots[0];
10201         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10202
10203         extent_flags = btrfs_extent_flags(leaf, ei);
10204
10205         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10206                 error(
10207                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10208                     disk_bytenr, disk_num_bytes,
10209                     BTRFS_EXTENT_FLAG_DATA);
10210                 err |= BACKREF_MISMATCH;
10211         }
10212
10213         /* Check data backref inside that extent item */
10214         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10215         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10216         ptr = (unsigned long)iref;
10217         end = (unsigned long)ei + item_size;
10218         while (ptr < end) {
10219                 iref = (struct btrfs_extent_inline_ref *)ptr;
10220                 type = btrfs_extent_inline_ref_type(leaf, iref);
10221                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10222
10223                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10224                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10225                         if (ref_root == owner || ref_root == root->objectid)
10226                                 found_dbackref = 1;
10227                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10228                         found_dbackref = !check_tree_block_ref(root, NULL,
10229                                 btrfs_extent_inline_ref_offset(leaf, iref),
10230                                 0, owner);
10231                 }
10232
10233                 if (found_dbackref)
10234                         break;
10235                 ptr += btrfs_extent_inline_ref_size(type);
10236         }
10237
10238         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10239         if (!found_dbackref) {
10240                 btrfs_release_path(&path);
10241
10242                 btrfs_init_path(&path);
10243                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10244                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10245                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10246                                 fi_key.objectid, fi_key.offset);
10247
10248                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10249                                         &dbref_key, &path, 0, 0);
10250                 if (!ret)
10251                         found_dbackref = 1;
10252         }
10253
10254         if (!found_dbackref)
10255                 err |= BACKREF_MISSING;
10256 error:
10257         btrfs_release_path(&path);
10258         if (err & BACKREF_MISSING) {
10259                 error("data extent[%llu %llu] backref lost",
10260                       disk_bytenr, disk_num_bytes);
10261         }
10262         return err;
10263 }
10264
10265 /*
10266  * Get real tree block level for the case like shared block
10267  * Return >= 0 as tree level
10268  * Return <0 for error
10269  */
10270 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10271 {
10272         struct extent_buffer *eb;
10273         struct btrfs_path path;
10274         struct btrfs_key key;
10275         struct btrfs_extent_item *ei;
10276         u64 flags;
10277         u64 transid;
10278         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10279         u8 backref_level;
10280         u8 header_level;
10281         int ret;
10282
10283         /* Search extent tree for extent generation and level */
10284         key.objectid = bytenr;
10285         key.type = BTRFS_METADATA_ITEM_KEY;
10286         key.offset = (u64)-1;
10287
10288         btrfs_init_path(&path);
10289         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10290         if (ret < 0)
10291                 goto release_out;
10292         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10293         if (ret < 0)
10294                 goto release_out;
10295         if (ret > 0) {
10296                 ret = -ENOENT;
10297                 goto release_out;
10298         }
10299
10300         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10301         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10302                             struct btrfs_extent_item);
10303         flags = btrfs_extent_flags(path.nodes[0], ei);
10304         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10305                 ret = -ENOENT;
10306                 goto release_out;
10307         }
10308
10309         /* Get transid for later read_tree_block() check */
10310         transid = btrfs_extent_generation(path.nodes[0], ei);
10311
10312         /* Get backref level as one source */
10313         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10314                 backref_level = key.offset;
10315         } else {
10316                 struct btrfs_tree_block_info *info;
10317
10318                 info = (struct btrfs_tree_block_info *)(ei + 1);
10319                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10320         }
10321         btrfs_release_path(&path);
10322
10323         /* Get level from tree block as an alternative source */
10324         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10325         if (!extent_buffer_uptodate(eb)) {
10326                 free_extent_buffer(eb);
10327                 return -EIO;
10328         }
10329         header_level = btrfs_header_level(eb);
10330         free_extent_buffer(eb);
10331
10332         if (header_level != backref_level)
10333                 return -EIO;
10334         return header_level;
10335
10336 release_out:
10337         btrfs_release_path(&path);
10338         return ret;
10339 }
10340
10341 /*
10342  * Check if a tree block backref is valid (points to a valid tree block)
10343  * if level == -1, level will be resolved
10344  * Return >0 for any error found and print error message
10345  */
10346 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10347                                     u64 bytenr, int level)
10348 {
10349         struct btrfs_root *root;
10350         struct btrfs_key key;
10351         struct btrfs_path path;
10352         struct extent_buffer *eb;
10353         struct extent_buffer *node;
10354         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10355         int err = 0;
10356         int ret;
10357
10358         /* Query level for level == -1 special case */
10359         if (level == -1)
10360                 level = query_tree_block_level(fs_info, bytenr);
10361         if (level < 0) {
10362                 err |= REFERENCER_MISSING;
10363                 goto out;
10364         }
10365
10366         key.objectid = root_id;
10367         key.type = BTRFS_ROOT_ITEM_KEY;
10368         key.offset = (u64)-1;
10369
10370         root = btrfs_read_fs_root(fs_info, &key);
10371         if (IS_ERR(root)) {
10372                 err |= REFERENCER_MISSING;
10373                 goto out;
10374         }
10375
10376         /* Read out the tree block to get item/node key */
10377         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10378         if (!extent_buffer_uptodate(eb)) {
10379                 err |= REFERENCER_MISSING;
10380                 free_extent_buffer(eb);
10381                 goto out;
10382         }
10383
10384         /* Empty tree, no need to check key */
10385         if (!btrfs_header_nritems(eb) && !level) {
10386                 free_extent_buffer(eb);
10387                 goto out;
10388         }
10389
10390         if (level)
10391                 btrfs_node_key_to_cpu(eb, &key, 0);
10392         else
10393                 btrfs_item_key_to_cpu(eb, &key, 0);
10394
10395         free_extent_buffer(eb);
10396
10397         btrfs_init_path(&path);
10398         path.lowest_level = level;
10399         /* Search with the first key, to ensure we can reach it */
10400         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10401         if (ret < 0) {
10402                 err |= REFERENCER_MISSING;
10403                 goto release_out;
10404         }
10405
10406         node = path.nodes[level];
10407         if (btrfs_header_bytenr(node) != bytenr) {
10408                 error(
10409         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10410                         bytenr, nodesize, bytenr,
10411                         btrfs_header_bytenr(node));
10412                 err |= REFERENCER_MISMATCH;
10413         }
10414         if (btrfs_header_level(node) != level) {
10415                 error(
10416         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10417                         bytenr, nodesize, level,
10418                         btrfs_header_level(node));
10419                 err |= REFERENCER_MISMATCH;
10420         }
10421
10422 release_out:
10423         btrfs_release_path(&path);
10424 out:
10425         if (err & REFERENCER_MISSING) {
10426                 if (level < 0)
10427                         error("extent [%llu %d] lost referencer (owner: %llu)",
10428                                 bytenr, nodesize, root_id);
10429                 else
10430                         error(
10431                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10432                                 bytenr, nodesize, root_id, level);
10433         }
10434
10435         return err;
10436 }
10437
10438 /*
10439  * Check if tree block @eb is tree reloc root.
10440  * Return 0 if it's not or any problem happens
10441  * Return 1 if it's a tree reloc root
10442  */
10443 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10444                                  struct extent_buffer *eb)
10445 {
10446         struct btrfs_root *tree_reloc_root;
10447         struct btrfs_key key;
10448         u64 bytenr = btrfs_header_bytenr(eb);
10449         u64 owner = btrfs_header_owner(eb);
10450         int ret = 0;
10451
10452         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10453         key.offset = owner;
10454         key.type = BTRFS_ROOT_ITEM_KEY;
10455
10456         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10457         if (IS_ERR(tree_reloc_root))
10458                 return 0;
10459
10460         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10461                 ret = 1;
10462         btrfs_free_fs_root(tree_reloc_root);
10463         return ret;
10464 }
10465
10466 /*
10467  * Check referencer for shared block backref
10468  * If level == -1, this function will resolve the level.
10469  */
10470 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10471                                      u64 parent, u64 bytenr, int level)
10472 {
10473         struct extent_buffer *eb;
10474         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10475         u32 nr;
10476         int found_parent = 0;
10477         int i;
10478
10479         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10480         if (!extent_buffer_uptodate(eb))
10481                 goto out;
10482
10483         if (level == -1)
10484                 level = query_tree_block_level(fs_info, bytenr);
10485         if (level < 0)
10486                 goto out;
10487
10488         /* It's possible it's a tree reloc root */
10489         if (parent == bytenr) {
10490                 if (is_tree_reloc_root(fs_info, eb))
10491                         found_parent = 1;
10492                 goto out;
10493         }
10494
10495         if (level + 1 != btrfs_header_level(eb))
10496                 goto out;
10497
10498         nr = btrfs_header_nritems(eb);
10499         for (i = 0; i < nr; i++) {
10500                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10501                         found_parent = 1;
10502                         break;
10503                 }
10504         }
10505 out:
10506         free_extent_buffer(eb);
10507         if (!found_parent) {
10508                 error(
10509         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10510                         bytenr, nodesize, parent, level);
10511                 return REFERENCER_MISSING;
10512         }
10513         return 0;
10514 }
10515
10516 /*
10517  * Check referencer for normal (inlined) data ref
10518  * If len == 0, it will be resolved by searching in extent tree
10519  */
10520 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10521                                      u64 root_id, u64 objectid, u64 offset,
10522                                      u64 bytenr, u64 len, u32 count)
10523 {
10524         struct btrfs_root *root;
10525         struct btrfs_root *extent_root = fs_info->extent_root;
10526         struct btrfs_key key;
10527         struct btrfs_path path;
10528         struct extent_buffer *leaf;
10529         struct btrfs_file_extent_item *fi;
10530         u32 found_count = 0;
10531         int slot;
10532         int ret = 0;
10533
10534         if (!len) {
10535                 key.objectid = bytenr;
10536                 key.type = BTRFS_EXTENT_ITEM_KEY;
10537                 key.offset = (u64)-1;
10538
10539                 btrfs_init_path(&path);
10540                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10541                 if (ret < 0)
10542                         goto out;
10543                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10544                 if (ret)
10545                         goto out;
10546                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10547                 if (key.objectid != bytenr ||
10548                     key.type != BTRFS_EXTENT_ITEM_KEY)
10549                         goto out;
10550                 len = key.offset;
10551                 btrfs_release_path(&path);
10552         }
10553         key.objectid = root_id;
10554         key.type = BTRFS_ROOT_ITEM_KEY;
10555         key.offset = (u64)-1;
10556         btrfs_init_path(&path);
10557
10558         root = btrfs_read_fs_root(fs_info, &key);
10559         if (IS_ERR(root))
10560                 goto out;
10561
10562         key.objectid = objectid;
10563         key.type = BTRFS_EXTENT_DATA_KEY;
10564         /*
10565          * It can be nasty as data backref offset is
10566          * file offset - file extent offset, which is smaller or
10567          * equal to original backref offset.  The only special case is
10568          * overflow.  So we need to special check and do further search.
10569          */
10570         key.offset = offset & (1ULL << 63) ? 0 : offset;
10571
10572         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10573         if (ret < 0)
10574                 goto out;
10575
10576         /*
10577          * Search afterwards to get correct one
10578          * NOTE: As we must do a comprehensive check on the data backref to
10579          * make sure the dref count also matches, we must iterate all file
10580          * extents for that inode.
10581          */
10582         while (1) {
10583                 leaf = path.nodes[0];
10584                 slot = path.slots[0];
10585
10586                 btrfs_item_key_to_cpu(leaf, &key, slot);
10587                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10588                         break;
10589                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10590                 /*
10591                  * Except normal disk bytenr and disk num bytes, we still
10592                  * need to do extra check on dbackref offset as
10593                  * dbackref offset = file_offset - file_extent_offset
10594                  */
10595                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10596                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10597                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10598                     offset)
10599                         found_count++;
10600
10601                 ret = btrfs_next_item(root, &path);
10602                 if (ret)
10603                         break;
10604         }
10605 out:
10606         btrfs_release_path(&path);
10607         if (found_count != count) {
10608                 error(
10609 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10610                         bytenr, len, root_id, objectid, offset, count, found_count);
10611                 return REFERENCER_MISSING;
10612         }
10613         return 0;
10614 }
10615
10616 /*
10617  * Check if the referencer of a shared data backref exists
10618  */
10619 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10620                                      u64 parent, u64 bytenr)
10621 {
10622         struct extent_buffer *eb;
10623         struct btrfs_key key;
10624         struct btrfs_file_extent_item *fi;
10625         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10626         u32 nr;
10627         int found_parent = 0;
10628         int i;
10629
10630         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10631         if (!extent_buffer_uptodate(eb))
10632                 goto out;
10633
10634         nr = btrfs_header_nritems(eb);
10635         for (i = 0; i < nr; i++) {
10636                 btrfs_item_key_to_cpu(eb, &key, i);
10637                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10638                         continue;
10639
10640                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10641                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10642                         continue;
10643
10644                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10645                         found_parent = 1;
10646                         break;
10647                 }
10648         }
10649
10650 out:
10651         free_extent_buffer(eb);
10652         if (!found_parent) {
10653                 error("shared extent %llu referencer lost (parent: %llu)",
10654                         bytenr, parent);
10655                 return REFERENCER_MISSING;
10656         }
10657         return 0;
10658 }
10659
10660 /*
10661  * This function will check a given extent item, including its backref and
10662  * itself (like crossing stripe boundary and type)
10663  *
10664  * Since we don't use extent_record anymore, introduce new error bit
10665  */
10666 static int check_extent_item(struct btrfs_fs_info *fs_info,
10667                              struct extent_buffer *eb, int slot)
10668 {
10669         struct btrfs_extent_item *ei;
10670         struct btrfs_extent_inline_ref *iref;
10671         struct btrfs_extent_data_ref *dref;
10672         unsigned long end;
10673         unsigned long ptr;
10674         int type;
10675         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10676         u32 item_size = btrfs_item_size_nr(eb, slot);
10677         u64 flags;
10678         u64 offset;
10679         int metadata = 0;
10680         int level;
10681         struct btrfs_key key;
10682         int ret;
10683         int err = 0;
10684
10685         btrfs_item_key_to_cpu(eb, &key, slot);
10686         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10687                 bytes_used += key.offset;
10688         else
10689                 bytes_used += nodesize;
10690
10691         if (item_size < sizeof(*ei)) {
10692                 /*
10693                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10694                  * old thing when on disk format is still un-determined.
10695                  * No need to care about it anymore
10696                  */
10697                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10698                 return -ENOTTY;
10699         }
10700
10701         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10702         flags = btrfs_extent_flags(eb, ei);
10703
10704         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10705                 metadata = 1;
10706         if (metadata && check_crossing_stripes(global_info, key.objectid,
10707                                                eb->len)) {
10708                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10709                       key.objectid, key.objectid + nodesize);
10710                 err |= CROSSING_STRIPE_BOUNDARY;
10711         }
10712
10713         ptr = (unsigned long)(ei + 1);
10714
10715         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10716                 /* Old EXTENT_ITEM metadata */
10717                 struct btrfs_tree_block_info *info;
10718
10719                 info = (struct btrfs_tree_block_info *)ptr;
10720                 level = btrfs_tree_block_level(eb, info);
10721                 ptr += sizeof(struct btrfs_tree_block_info);
10722         } else {
10723                 /* New METADATA_ITEM */
10724                 level = key.offset;
10725         }
10726         end = (unsigned long)ei + item_size;
10727
10728         if (ptr >= end) {
10729                 err |= ITEM_SIZE_MISMATCH;
10730                 goto out;
10731         }
10732
10733         /* Now check every backref in this extent item */
10734 next:
10735         iref = (struct btrfs_extent_inline_ref *)ptr;
10736         type = btrfs_extent_inline_ref_type(eb, iref);
10737         offset = btrfs_extent_inline_ref_offset(eb, iref);
10738         switch (type) {
10739         case BTRFS_TREE_BLOCK_REF_KEY:
10740                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10741                                                level);
10742                 err |= ret;
10743                 break;
10744         case BTRFS_SHARED_BLOCK_REF_KEY:
10745                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10746                                                  level);
10747                 err |= ret;
10748                 break;
10749         case BTRFS_EXTENT_DATA_REF_KEY:
10750                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10751                 ret = check_extent_data_backref(fs_info,
10752                                 btrfs_extent_data_ref_root(eb, dref),
10753                                 btrfs_extent_data_ref_objectid(eb, dref),
10754                                 btrfs_extent_data_ref_offset(eb, dref),
10755                                 key.objectid, key.offset,
10756                                 btrfs_extent_data_ref_count(eb, dref));
10757                 err |= ret;
10758                 break;
10759         case BTRFS_SHARED_DATA_REF_KEY:
10760                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10761                 err |= ret;
10762                 break;
10763         default:
10764                 error("extent[%llu %d %llu] has unknown ref type: %d",
10765                         key.objectid, key.type, key.offset, type);
10766                 err |= UNKNOWN_TYPE;
10767                 goto out;
10768         }
10769
10770         ptr += btrfs_extent_inline_ref_size(type);
10771         if (ptr < end)
10772                 goto next;
10773
10774 out:
10775         return err;
10776 }
10777
10778 /*
10779  * Check if a dev extent item is referred correctly by its chunk
10780  */
10781 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10782                                  struct extent_buffer *eb, int slot)
10783 {
10784         struct btrfs_root *chunk_root = fs_info->chunk_root;
10785         struct btrfs_dev_extent *ptr;
10786         struct btrfs_path path;
10787         struct btrfs_key chunk_key;
10788         struct btrfs_key devext_key;
10789         struct btrfs_chunk *chunk;
10790         struct extent_buffer *l;
10791         int num_stripes;
10792         u64 length;
10793         int i;
10794         int found_chunk = 0;
10795         int ret;
10796
10797         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10798         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10799         length = btrfs_dev_extent_length(eb, ptr);
10800
10801         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10802         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10803         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10804
10805         btrfs_init_path(&path);
10806         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10807         if (ret)
10808                 goto out;
10809
10810         l = path.nodes[0];
10811         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10812         if (btrfs_chunk_length(l, chunk) != length)
10813                 goto out;
10814
10815         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10816         for (i = 0; i < num_stripes; i++) {
10817                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10818                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10819
10820                 if (devid == devext_key.objectid &&
10821                     offset == devext_key.offset) {
10822                         found_chunk = 1;
10823                         break;
10824                 }
10825         }
10826 out:
10827         btrfs_release_path(&path);
10828         if (!found_chunk) {
10829                 error(
10830                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10831                         devext_key.objectid, devext_key.offset, length);
10832                 return REFERENCER_MISSING;
10833         }
10834         return 0;
10835 }
10836
10837 /*
10838  * Check if the used space is correct with the dev item
10839  */
10840 static int check_dev_item(struct btrfs_fs_info *fs_info,
10841                           struct extent_buffer *eb, int slot)
10842 {
10843         struct btrfs_root *dev_root = fs_info->dev_root;
10844         struct btrfs_dev_item *dev_item;
10845         struct btrfs_path path;
10846         struct btrfs_key key;
10847         struct btrfs_dev_extent *ptr;
10848         u64 dev_id;
10849         u64 used;
10850         u64 total = 0;
10851         int ret;
10852
10853         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10854         dev_id = btrfs_device_id(eb, dev_item);
10855         used = btrfs_device_bytes_used(eb, dev_item);
10856
10857         key.objectid = dev_id;
10858         key.type = BTRFS_DEV_EXTENT_KEY;
10859         key.offset = 0;
10860
10861         btrfs_init_path(&path);
10862         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10863         if (ret < 0) {
10864                 btrfs_item_key_to_cpu(eb, &key, slot);
10865                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10866                         key.objectid, key.type, key.offset);
10867                 btrfs_release_path(&path);
10868                 return REFERENCER_MISSING;
10869         }
10870
10871         /* Iterate dev_extents to calculate the used space of a device */
10872         while (1) {
10873                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10874
10875                 if (key.objectid > dev_id)
10876                         break;
10877                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10878                         goto next;
10879
10880                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10881                                      struct btrfs_dev_extent);
10882                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10883 next:
10884                 ret = btrfs_next_item(dev_root, &path);
10885                 if (ret)
10886                         break;
10887         }
10888         btrfs_release_path(&path);
10889
10890         if (used != total) {
10891                 btrfs_item_key_to_cpu(eb, &key, slot);
10892                 error(
10893 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10894                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10895                         BTRFS_DEV_EXTENT_KEY, dev_id);
10896                 return ACCOUNTING_MISMATCH;
10897         }
10898         return 0;
10899 }
10900
10901 /*
10902  * Check a block group item with its referener (chunk) and its used space
10903  * with extent/metadata item
10904  */
10905 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10906                                   struct extent_buffer *eb, int slot)
10907 {
10908         struct btrfs_root *extent_root = fs_info->extent_root;
10909         struct btrfs_root *chunk_root = fs_info->chunk_root;
10910         struct btrfs_block_group_item *bi;
10911         struct btrfs_block_group_item bg_item;
10912         struct btrfs_path path;
10913         struct btrfs_key bg_key;
10914         struct btrfs_key chunk_key;
10915         struct btrfs_key extent_key;
10916         struct btrfs_chunk *chunk;
10917         struct extent_buffer *leaf;
10918         struct btrfs_extent_item *ei;
10919         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10920         u64 flags;
10921         u64 bg_flags;
10922         u64 used;
10923         u64 total = 0;
10924         int ret;
10925         int err = 0;
10926
10927         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10928         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10929         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10930         used = btrfs_block_group_used(&bg_item);
10931         bg_flags = btrfs_block_group_flags(&bg_item);
10932
10933         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10934         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10935         chunk_key.offset = bg_key.objectid;
10936
10937         btrfs_init_path(&path);
10938         /* Search for the referencer chunk */
10939         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10940         if (ret) {
10941                 error(
10942                 "block group[%llu %llu] did not find the related chunk item",
10943                         bg_key.objectid, bg_key.offset);
10944                 err |= REFERENCER_MISSING;
10945         } else {
10946                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10947                                         struct btrfs_chunk);
10948                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10949                                                 bg_key.offset) {
10950                         error(
10951         "block group[%llu %llu] related chunk item length does not match",
10952                                 bg_key.objectid, bg_key.offset);
10953                         err |= REFERENCER_MISMATCH;
10954                 }
10955         }
10956         btrfs_release_path(&path);
10957
10958         /* Search from the block group bytenr */
10959         extent_key.objectid = bg_key.objectid;
10960         extent_key.type = 0;
10961         extent_key.offset = 0;
10962
10963         btrfs_init_path(&path);
10964         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10965         if (ret < 0)
10966                 goto out;
10967
10968         /* Iterate extent tree to account used space */
10969         while (1) {
10970                 leaf = path.nodes[0];
10971                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10972                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10973                         break;
10974
10975                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10976                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10977                         goto next;
10978                 if (extent_key.objectid < bg_key.objectid)
10979                         goto next;
10980
10981                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10982                         total += nodesize;
10983                 else
10984                         total += extent_key.offset;
10985
10986                 ei = btrfs_item_ptr(leaf, path.slots[0],
10987                                     struct btrfs_extent_item);
10988                 flags = btrfs_extent_flags(leaf, ei);
10989                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10990                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10991                                 error(
10992                         "bad extent[%llu, %llu) type mismatch with chunk",
10993                                         extent_key.objectid,
10994                                         extent_key.objectid + extent_key.offset);
10995                                 err |= CHUNK_TYPE_MISMATCH;
10996                         }
10997                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10998                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10999                                     BTRFS_BLOCK_GROUP_METADATA))) {
11000                                 error(
11001                         "bad extent[%llu, %llu) type mismatch with chunk",
11002                                         extent_key.objectid,
11003                                         extent_key.objectid + nodesize);
11004                                 err |= CHUNK_TYPE_MISMATCH;
11005                         }
11006                 }
11007 next:
11008                 ret = btrfs_next_item(extent_root, &path);
11009                 if (ret)
11010                         break;
11011         }
11012
11013 out:
11014         btrfs_release_path(&path);
11015
11016         if (total != used) {
11017                 error(
11018                 "block group[%llu %llu] used %llu but extent items used %llu",
11019                         bg_key.objectid, bg_key.offset, used, total);
11020                 err |= ACCOUNTING_MISMATCH;
11021         }
11022         return err;
11023 }
11024
11025 /*
11026  * Check a chunk item.
11027  * Including checking all referred dev_extents and block group
11028  */
11029 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11030                             struct extent_buffer *eb, int slot)
11031 {
11032         struct btrfs_root *extent_root = fs_info->extent_root;
11033         struct btrfs_root *dev_root = fs_info->dev_root;
11034         struct btrfs_path path;
11035         struct btrfs_key chunk_key;
11036         struct btrfs_key bg_key;
11037         struct btrfs_key devext_key;
11038         struct btrfs_chunk *chunk;
11039         struct extent_buffer *leaf;
11040         struct btrfs_block_group_item *bi;
11041         struct btrfs_block_group_item bg_item;
11042         struct btrfs_dev_extent *ptr;
11043         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11044         u64 length;
11045         u64 chunk_end;
11046         u64 type;
11047         u64 profile;
11048         int num_stripes;
11049         u64 offset;
11050         u64 objectid;
11051         int i;
11052         int ret;
11053         int err = 0;
11054
11055         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11056         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11057         length = btrfs_chunk_length(eb, chunk);
11058         chunk_end = chunk_key.offset + length;
11059         if (!IS_ALIGNED(length, sectorsize)) {
11060                 error("chunk[%llu %llu) not aligned to %u",
11061                         chunk_key.offset, chunk_end, sectorsize);
11062                 err |= BYTES_UNALIGNED;
11063                 goto out;
11064         }
11065
11066         type = btrfs_chunk_type(eb, chunk);
11067         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11068         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11069                 error("chunk[%llu %llu) has no chunk type",
11070                         chunk_key.offset, chunk_end);
11071                 err |= UNKNOWN_TYPE;
11072         }
11073         if (profile && (profile & (profile - 1))) {
11074                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11075                         chunk_key.offset, chunk_end, profile);
11076                 err |= UNKNOWN_TYPE;
11077         }
11078
11079         bg_key.objectid = chunk_key.offset;
11080         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11081         bg_key.offset = length;
11082
11083         btrfs_init_path(&path);
11084         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11085         if (ret) {
11086                 error(
11087                 "chunk[%llu %llu) did not find the related block group item",
11088                         chunk_key.offset, chunk_end);
11089                 err |= REFERENCER_MISSING;
11090         } else{
11091                 leaf = path.nodes[0];
11092                 bi = btrfs_item_ptr(leaf, path.slots[0],
11093                                     struct btrfs_block_group_item);
11094                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11095                                    sizeof(bg_item));
11096                 if (btrfs_block_group_flags(&bg_item) != type) {
11097                         error(
11098 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11099                                 chunk_key.offset, chunk_end, type,
11100                                 btrfs_block_group_flags(&bg_item));
11101                         err |= REFERENCER_MISSING;
11102                 }
11103         }
11104
11105         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11106         for (i = 0; i < num_stripes; i++) {
11107                 btrfs_release_path(&path);
11108                 btrfs_init_path(&path);
11109                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11110                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11111                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11112
11113                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11114                                         0, 0);
11115                 if (ret)
11116                         goto not_match_dev;
11117
11118                 leaf = path.nodes[0];
11119                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11120                                      struct btrfs_dev_extent);
11121                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11122                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11123                 if (objectid != chunk_key.objectid ||
11124                     offset != chunk_key.offset ||
11125                     btrfs_dev_extent_length(leaf, ptr) != length)
11126                         goto not_match_dev;
11127                 continue;
11128 not_match_dev:
11129                 err |= BACKREF_MISSING;
11130                 error(
11131                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11132                         chunk_key.objectid, chunk_end, i);
11133                 continue;
11134         }
11135         btrfs_release_path(&path);
11136 out:
11137         return err;
11138 }
11139
11140 /*
11141  * Main entry function to check known items and update related accounting info
11142  */
11143 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11144 {
11145         struct btrfs_fs_info *fs_info = root->fs_info;
11146         struct btrfs_key key;
11147         int slot = 0;
11148         int type;
11149         struct btrfs_extent_data_ref *dref;
11150         int ret;
11151         int err = 0;
11152
11153 next:
11154         btrfs_item_key_to_cpu(eb, &key, slot);
11155         type = key.type;
11156
11157         switch (type) {
11158         case BTRFS_EXTENT_DATA_KEY:
11159                 ret = check_extent_data_item(root, eb, slot);
11160                 err |= ret;
11161                 break;
11162         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11163                 ret = check_block_group_item(fs_info, eb, slot);
11164                 err |= ret;
11165                 break;
11166         case BTRFS_DEV_ITEM_KEY:
11167                 ret = check_dev_item(fs_info, eb, slot);
11168                 err |= ret;
11169                 break;
11170         case BTRFS_CHUNK_ITEM_KEY:
11171                 ret = check_chunk_item(fs_info, eb, slot);
11172                 err |= ret;
11173                 break;
11174         case BTRFS_DEV_EXTENT_KEY:
11175                 ret = check_dev_extent_item(fs_info, eb, slot);
11176                 err |= ret;
11177                 break;
11178         case BTRFS_EXTENT_ITEM_KEY:
11179         case BTRFS_METADATA_ITEM_KEY:
11180                 ret = check_extent_item(fs_info, eb, slot);
11181                 err |= ret;
11182                 break;
11183         case BTRFS_EXTENT_CSUM_KEY:
11184                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11185                 break;
11186         case BTRFS_TREE_BLOCK_REF_KEY:
11187                 ret = check_tree_block_backref(fs_info, key.offset,
11188                                                key.objectid, -1);
11189                 err |= ret;
11190                 break;
11191         case BTRFS_EXTENT_DATA_REF_KEY:
11192                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11193                 ret = check_extent_data_backref(fs_info,
11194                                 btrfs_extent_data_ref_root(eb, dref),
11195                                 btrfs_extent_data_ref_objectid(eb, dref),
11196                                 btrfs_extent_data_ref_offset(eb, dref),
11197                                 key.objectid, 0,
11198                                 btrfs_extent_data_ref_count(eb, dref));
11199                 err |= ret;
11200                 break;
11201         case BTRFS_SHARED_BLOCK_REF_KEY:
11202                 ret = check_shared_block_backref(fs_info, key.offset,
11203                                                  key.objectid, -1);
11204                 err |= ret;
11205                 break;
11206         case BTRFS_SHARED_DATA_REF_KEY:
11207                 ret = check_shared_data_backref(fs_info, key.offset,
11208                                                 key.objectid);
11209                 err |= ret;
11210                 break;
11211         default:
11212                 break;
11213         }
11214
11215         if (++slot < btrfs_header_nritems(eb))
11216                 goto next;
11217
11218         return err;
11219 }
11220
11221 /*
11222  * Helper function for later fs/subvol tree check.  To determine if a tree
11223  * block should be checked.
11224  * This function will ensure only the direct referencer with lowest rootid to
11225  * check a fs/subvolume tree block.
11226  *
11227  * Backref check at extent tree would detect errors like missing subvolume
11228  * tree, so we can do aggressive check to reduce duplicated checks.
11229  */
11230 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11231 {
11232         struct btrfs_root *extent_root = root->fs_info->extent_root;
11233         struct btrfs_key key;
11234         struct btrfs_path path;
11235         struct extent_buffer *leaf;
11236         int slot;
11237         struct btrfs_extent_item *ei;
11238         unsigned long ptr;
11239         unsigned long end;
11240         int type;
11241         u32 item_size;
11242         u64 offset;
11243         struct btrfs_extent_inline_ref *iref;
11244         int ret;
11245
11246         btrfs_init_path(&path);
11247         key.objectid = btrfs_header_bytenr(eb);
11248         key.type = BTRFS_METADATA_ITEM_KEY;
11249         key.offset = (u64)-1;
11250
11251         /*
11252          * Any failure in backref resolving means we can't determine
11253          * whom the tree block belongs to.
11254          * So in that case, we need to check that tree block
11255          */
11256         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11257         if (ret < 0)
11258                 goto need_check;
11259
11260         ret = btrfs_previous_extent_item(extent_root, &path,
11261                                          btrfs_header_bytenr(eb));
11262         if (ret)
11263                 goto need_check;
11264
11265         leaf = path.nodes[0];
11266         slot = path.slots[0];
11267         btrfs_item_key_to_cpu(leaf, &key, slot);
11268         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11269
11270         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11271                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11272         } else {
11273                 struct btrfs_tree_block_info *info;
11274
11275                 info = (struct btrfs_tree_block_info *)(ei + 1);
11276                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11277         }
11278
11279         item_size = btrfs_item_size_nr(leaf, slot);
11280         ptr = (unsigned long)iref;
11281         end = (unsigned long)ei + item_size;
11282         while (ptr < end) {
11283                 iref = (struct btrfs_extent_inline_ref *)ptr;
11284                 type = btrfs_extent_inline_ref_type(leaf, iref);
11285                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11286
11287                 /*
11288                  * We only check the tree block if current root is
11289                  * the lowest referencer of it.
11290                  */
11291                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11292                     offset < root->objectid) {
11293                         btrfs_release_path(&path);
11294                         return 0;
11295                 }
11296
11297                 ptr += btrfs_extent_inline_ref_size(type);
11298         }
11299         /*
11300          * Normally we should also check keyed tree block ref, but that may be
11301          * very time consuming.  Inlined ref should already make us skip a lot
11302          * of refs now.  So skip search keyed tree block ref.
11303          */
11304
11305 need_check:
11306         btrfs_release_path(&path);
11307         return 1;
11308 }
11309
11310 /*
11311  * Traversal function for tree block. We will do:
11312  * 1) Skip shared fs/subvolume tree blocks
11313  * 2) Update related bytes accounting
11314  * 3) Pre-order traversal
11315  */
11316 static int traverse_tree_block(struct btrfs_root *root,
11317                                 struct extent_buffer *node)
11318 {
11319         struct extent_buffer *eb;
11320         struct btrfs_key key;
11321         struct btrfs_key drop_key;
11322         int level;
11323         u64 nr;
11324         int i;
11325         int err = 0;
11326         int ret;
11327
11328         /*
11329          * Skip shared fs/subvolume tree block, in that case they will
11330          * be checked by referencer with lowest rootid
11331          */
11332         if (is_fstree(root->objectid) && !should_check(root, node))
11333                 return 0;
11334
11335         /* Update bytes accounting */
11336         total_btree_bytes += node->len;
11337         if (fs_root_objectid(btrfs_header_owner(node)))
11338                 total_fs_tree_bytes += node->len;
11339         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11340                 total_extent_tree_bytes += node->len;
11341         if (!found_old_backref &&
11342             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11343             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11344             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11345                 found_old_backref = 1;
11346
11347         /* pre-order tranversal, check itself first */
11348         level = btrfs_header_level(node);
11349         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11350                                    btrfs_header_level(node),
11351                                    btrfs_header_owner(node));
11352         err |= ret;
11353         if (err)
11354                 error(
11355         "check %s failed root %llu bytenr %llu level %d, force continue check",
11356                         level ? "node":"leaf", root->objectid,
11357                         btrfs_header_bytenr(node), btrfs_header_level(node));
11358
11359         if (!level) {
11360                 btree_space_waste += btrfs_leaf_free_space(root, node);
11361                 ret = check_leaf_items(root, node);
11362                 err |= ret;
11363                 return err;
11364         }
11365
11366         nr = btrfs_header_nritems(node);
11367         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11368         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11369                 sizeof(struct btrfs_key_ptr);
11370
11371         /* Then check all its children */
11372         for (i = 0; i < nr; i++) {
11373                 u64 blocknr = btrfs_node_blockptr(node, i);
11374
11375                 btrfs_node_key_to_cpu(node, &key, i);
11376                 if (level == root->root_item.drop_level &&
11377                     is_dropped_key(&key, &drop_key))
11378                         continue;
11379
11380                 /*
11381                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11382                  * to call the function itself.
11383                  */
11384                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11385                 if (extent_buffer_uptodate(eb)) {
11386                         ret = traverse_tree_block(root, eb);
11387                         err |= ret;
11388                 }
11389                 free_extent_buffer(eb);
11390         }
11391
11392         return err;
11393 }
11394
11395 /*
11396  * Low memory usage version check_chunks_and_extents.
11397  */
11398 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11399 {
11400         struct btrfs_path path;
11401         struct btrfs_key key;
11402         struct btrfs_root *root1;
11403         struct btrfs_root *cur_root;
11404         int err = 0;
11405         int ret;
11406
11407         root1 = root->fs_info->chunk_root;
11408         ret = traverse_tree_block(root1, root1->node);
11409         err |= ret;
11410
11411         root1 = root->fs_info->tree_root;
11412         ret = traverse_tree_block(root1, root1->node);
11413         err |= ret;
11414
11415         btrfs_init_path(&path);
11416         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11417         key.offset = 0;
11418         key.type = BTRFS_ROOT_ITEM_KEY;
11419
11420         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11421         if (ret) {
11422                 error("cannot find extent treet in tree_root");
11423                 goto out;
11424         }
11425
11426         while (1) {
11427                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11428                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11429                         goto next;
11430                 key.offset = (u64)-1;
11431
11432                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11433                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11434                                         &key);
11435                 else
11436                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11437                 if (IS_ERR(cur_root) || !cur_root) {
11438                         error("failed to read tree: %lld", key.objectid);
11439                         goto next;
11440                 }
11441
11442                 ret = traverse_tree_block(cur_root, cur_root->node);
11443                 err |= ret;
11444
11445                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11446                         btrfs_free_fs_root(cur_root);
11447 next:
11448                 ret = btrfs_next_item(root1, &path);
11449                 if (ret)
11450                         goto out;
11451         }
11452
11453 out:
11454         btrfs_release_path(&path);
11455         return err;
11456 }
11457
11458 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11459                            struct btrfs_root *root, int overwrite)
11460 {
11461         struct extent_buffer *c;
11462         struct extent_buffer *old = root->node;
11463         int level;
11464         int ret;
11465         struct btrfs_disk_key disk_key = {0,0,0};
11466
11467         level = 0;
11468
11469         if (overwrite) {
11470                 c = old;
11471                 extent_buffer_get(c);
11472                 goto init;
11473         }
11474         c = btrfs_alloc_free_block(trans, root,
11475                                    root->nodesize,
11476                                    root->root_key.objectid,
11477                                    &disk_key, level, 0, 0);
11478         if (IS_ERR(c)) {
11479                 c = old;
11480                 extent_buffer_get(c);
11481                 overwrite = 1;
11482         }
11483 init:
11484         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11485         btrfs_set_header_level(c, level);
11486         btrfs_set_header_bytenr(c, c->start);
11487         btrfs_set_header_generation(c, trans->transid);
11488         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11489         btrfs_set_header_owner(c, root->root_key.objectid);
11490
11491         write_extent_buffer(c, root->fs_info->fsid,
11492                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11493
11494         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11495                             btrfs_header_chunk_tree_uuid(c),
11496                             BTRFS_UUID_SIZE);
11497
11498         btrfs_mark_buffer_dirty(c);
11499         /*
11500          * this case can happen in the following case:
11501          *
11502          * 1.overwrite previous root.
11503          *
11504          * 2.reinit reloc data root, this is because we skip pin
11505          * down reloc data tree before which means we can allocate
11506          * same block bytenr here.
11507          */
11508         if (old->start == c->start) {
11509                 btrfs_set_root_generation(&root->root_item,
11510                                           trans->transid);
11511                 root->root_item.level = btrfs_header_level(root->node);
11512                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11513                                         &root->root_key, &root->root_item);
11514                 if (ret) {
11515                         free_extent_buffer(c);
11516                         return ret;
11517                 }
11518         }
11519         free_extent_buffer(old);
11520         root->node = c;
11521         add_root_to_dirty_list(root);
11522         return 0;
11523 }
11524
11525 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11526                                 struct extent_buffer *eb, int tree_root)
11527 {
11528         struct extent_buffer *tmp;
11529         struct btrfs_root_item *ri;
11530         struct btrfs_key key;
11531         u64 bytenr;
11532         u32 nodesize;
11533         int level = btrfs_header_level(eb);
11534         int nritems;
11535         int ret;
11536         int i;
11537
11538         /*
11539          * If we have pinned this block before, don't pin it again.
11540          * This can not only avoid forever loop with broken filesystem
11541          * but also give us some speedups.
11542          */
11543         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11544                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11545                 return 0;
11546
11547         btrfs_pin_extent(fs_info, eb->start, eb->len);
11548
11549         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11550         nritems = btrfs_header_nritems(eb);
11551         for (i = 0; i < nritems; i++) {
11552                 if (level == 0) {
11553                         btrfs_item_key_to_cpu(eb, &key, i);
11554                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11555                                 continue;
11556                         /* Skip the extent root and reloc roots */
11557                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11558                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11559                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11560                                 continue;
11561                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11562                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11563
11564                         /*
11565                          * If at any point we start needing the real root we
11566                          * will have to build a stump root for the root we are
11567                          * in, but for now this doesn't actually use the root so
11568                          * just pass in extent_root.
11569                          */
11570                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11571                                               nodesize, 0);
11572                         if (!extent_buffer_uptodate(tmp)) {
11573                                 fprintf(stderr, "Error reading root block\n");
11574                                 return -EIO;
11575                         }
11576                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11577                         free_extent_buffer(tmp);
11578                         if (ret)
11579                                 return ret;
11580                 } else {
11581                         bytenr = btrfs_node_blockptr(eb, i);
11582
11583                         /* If we aren't the tree root don't read the block */
11584                         if (level == 1 && !tree_root) {
11585                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11586                                 continue;
11587                         }
11588
11589                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11590                                               nodesize, 0);
11591                         if (!extent_buffer_uptodate(tmp)) {
11592                                 fprintf(stderr, "Error reading tree block\n");
11593                                 return -EIO;
11594                         }
11595                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11596                         free_extent_buffer(tmp);
11597                         if (ret)
11598                                 return ret;
11599                 }
11600         }
11601
11602         return 0;
11603 }
11604
11605 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11606 {
11607         int ret;
11608
11609         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11610         if (ret)
11611                 return ret;
11612
11613         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11614 }
11615
11616 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11617 {
11618         struct btrfs_block_group_cache *cache;
11619         struct btrfs_path path;
11620         struct extent_buffer *leaf;
11621         struct btrfs_chunk *chunk;
11622         struct btrfs_key key;
11623         int ret;
11624         u64 start;
11625
11626         btrfs_init_path(&path);
11627         key.objectid = 0;
11628         key.type = BTRFS_CHUNK_ITEM_KEY;
11629         key.offset = 0;
11630         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11631         if (ret < 0) {
11632                 btrfs_release_path(&path);
11633                 return ret;
11634         }
11635
11636         /*
11637          * We do this in case the block groups were screwed up and had alloc
11638          * bits that aren't actually set on the chunks.  This happens with
11639          * restored images every time and could happen in real life I guess.
11640          */
11641         fs_info->avail_data_alloc_bits = 0;
11642         fs_info->avail_metadata_alloc_bits = 0;
11643         fs_info->avail_system_alloc_bits = 0;
11644
11645         /* First we need to create the in-memory block groups */
11646         while (1) {
11647                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11648                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11649                         if (ret < 0) {
11650                                 btrfs_release_path(&path);
11651                                 return ret;
11652                         }
11653                         if (ret) {
11654                                 ret = 0;
11655                                 break;
11656                         }
11657                 }
11658                 leaf = path.nodes[0];
11659                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11660                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11661                         path.slots[0]++;
11662                         continue;
11663                 }
11664
11665                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11666                 btrfs_add_block_group(fs_info, 0,
11667                                       btrfs_chunk_type(leaf, chunk),
11668                                       key.objectid, key.offset,
11669                                       btrfs_chunk_length(leaf, chunk));
11670                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11671                                  key.offset + btrfs_chunk_length(leaf, chunk));
11672                 path.slots[0]++;
11673         }
11674         start = 0;
11675         while (1) {
11676                 cache = btrfs_lookup_first_block_group(fs_info, start);
11677                 if (!cache)
11678                         break;
11679                 cache->cached = 1;
11680                 start = cache->key.objectid + cache->key.offset;
11681         }
11682
11683         btrfs_release_path(&path);
11684         return 0;
11685 }
11686
11687 static int reset_balance(struct btrfs_trans_handle *trans,
11688                          struct btrfs_fs_info *fs_info)
11689 {
11690         struct btrfs_root *root = fs_info->tree_root;
11691         struct btrfs_path path;
11692         struct extent_buffer *leaf;
11693         struct btrfs_key key;
11694         int del_slot, del_nr = 0;
11695         int ret;
11696         int found = 0;
11697
11698         btrfs_init_path(&path);
11699         key.objectid = BTRFS_BALANCE_OBJECTID;
11700         key.type = BTRFS_BALANCE_ITEM_KEY;
11701         key.offset = 0;
11702         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11703         if (ret) {
11704                 if (ret > 0)
11705                         ret = 0;
11706                 if (!ret)
11707                         goto reinit_data_reloc;
11708                 else
11709                         goto out;
11710         }
11711
11712         ret = btrfs_del_item(trans, root, &path);
11713         if (ret)
11714                 goto out;
11715         btrfs_release_path(&path);
11716
11717         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11718         key.type = BTRFS_ROOT_ITEM_KEY;
11719         key.offset = 0;
11720         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11721         if (ret < 0)
11722                 goto out;
11723         while (1) {
11724                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11725                         if (!found)
11726                                 break;
11727
11728                         if (del_nr) {
11729                                 ret = btrfs_del_items(trans, root, &path,
11730                                                       del_slot, del_nr);
11731                                 del_nr = 0;
11732                                 if (ret)
11733                                         goto out;
11734                         }
11735                         key.offset++;
11736                         btrfs_release_path(&path);
11737
11738                         found = 0;
11739                         ret = btrfs_search_slot(trans, root, &key, &path,
11740                                                 -1, 1);
11741                         if (ret < 0)
11742                                 goto out;
11743                         continue;
11744                 }
11745                 found = 1;
11746                 leaf = path.nodes[0];
11747                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11748                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11749                         break;
11750                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11751                         path.slots[0]++;
11752                         continue;
11753                 }
11754                 if (!del_nr) {
11755                         del_slot = path.slots[0];
11756                         del_nr = 1;
11757                 } else {
11758                         del_nr++;
11759                 }
11760                 path.slots[0]++;
11761         }
11762
11763         if (del_nr) {
11764                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11765                 if (ret)
11766                         goto out;
11767         }
11768         btrfs_release_path(&path);
11769
11770 reinit_data_reloc:
11771         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11772         key.type = BTRFS_ROOT_ITEM_KEY;
11773         key.offset = (u64)-1;
11774         root = btrfs_read_fs_root(fs_info, &key);
11775         if (IS_ERR(root)) {
11776                 fprintf(stderr, "Error reading data reloc tree\n");
11777                 ret = PTR_ERR(root);
11778                 goto out;
11779         }
11780         record_root_in_trans(trans, root);
11781         ret = btrfs_fsck_reinit_root(trans, root, 0);
11782         if (ret)
11783                 goto out;
11784         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11785 out:
11786         btrfs_release_path(&path);
11787         return ret;
11788 }
11789
11790 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11791                               struct btrfs_fs_info *fs_info)
11792 {
11793         u64 start = 0;
11794         int ret;
11795
11796         /*
11797          * The only reason we don't do this is because right now we're just
11798          * walking the trees we find and pinning down their bytes, we don't look
11799          * at any of the leaves.  In order to do mixed groups we'd have to check
11800          * the leaves of any fs roots and pin down the bytes for any file
11801          * extents we find.  Not hard but why do it if we don't have to?
11802          */
11803         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11804                 fprintf(stderr, "We don't support re-initing the extent tree "
11805                         "for mixed block groups yet, please notify a btrfs "
11806                         "developer you want to do this so they can add this "
11807                         "functionality.\n");
11808                 return -EINVAL;
11809         }
11810
11811         /*
11812          * first we need to walk all of the trees except the extent tree and pin
11813          * down the bytes that are in use so we don't overwrite any existing
11814          * metadata.
11815          */
11816         ret = pin_metadata_blocks(fs_info);
11817         if (ret) {
11818                 fprintf(stderr, "error pinning down used bytes\n");
11819                 return ret;
11820         }
11821
11822         /*
11823          * Need to drop all the block groups since we're going to recreate all
11824          * of them again.
11825          */
11826         btrfs_free_block_groups(fs_info);
11827         ret = reset_block_groups(fs_info);
11828         if (ret) {
11829                 fprintf(stderr, "error resetting the block groups\n");
11830                 return ret;
11831         }
11832
11833         /* Ok we can allocate now, reinit the extent root */
11834         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11835         if (ret) {
11836                 fprintf(stderr, "extent root initialization failed\n");
11837                 /*
11838                  * When the transaction code is updated we should end the
11839                  * transaction, but for now progs only knows about commit so
11840                  * just return an error.
11841                  */
11842                 return ret;
11843         }
11844
11845         /*
11846          * Now we have all the in-memory block groups setup so we can make
11847          * allocations properly, and the metadata we care about is safe since we
11848          * pinned all of it above.
11849          */
11850         while (1) {
11851                 struct btrfs_block_group_cache *cache;
11852
11853                 cache = btrfs_lookup_first_block_group(fs_info, start);
11854                 if (!cache)
11855                         break;
11856                 start = cache->key.objectid + cache->key.offset;
11857                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11858                                         &cache->key, &cache->item,
11859                                         sizeof(cache->item));
11860                 if (ret) {
11861                         fprintf(stderr, "Error adding block group\n");
11862                         return ret;
11863                 }
11864                 btrfs_extent_post_op(trans, fs_info->extent_root);
11865         }
11866
11867         ret = reset_balance(trans, fs_info);
11868         if (ret)
11869                 fprintf(stderr, "error resetting the pending balance\n");
11870
11871         return ret;
11872 }
11873
11874 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11875 {
11876         struct btrfs_path path;
11877         struct btrfs_trans_handle *trans;
11878         struct btrfs_key key;
11879         int ret;
11880
11881         printf("Recowing metadata block %llu\n", eb->start);
11882         key.objectid = btrfs_header_owner(eb);
11883         key.type = BTRFS_ROOT_ITEM_KEY;
11884         key.offset = (u64)-1;
11885
11886         root = btrfs_read_fs_root(root->fs_info, &key);
11887         if (IS_ERR(root)) {
11888                 fprintf(stderr, "Couldn't find owner root %llu\n",
11889                         key.objectid);
11890                 return PTR_ERR(root);
11891         }
11892
11893         trans = btrfs_start_transaction(root, 1);
11894         if (IS_ERR(trans))
11895                 return PTR_ERR(trans);
11896
11897         btrfs_init_path(&path);
11898         path.lowest_level = btrfs_header_level(eb);
11899         if (path.lowest_level)
11900                 btrfs_node_key_to_cpu(eb, &key, 0);
11901         else
11902                 btrfs_item_key_to_cpu(eb, &key, 0);
11903
11904         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11905         btrfs_commit_transaction(trans, root);
11906         btrfs_release_path(&path);
11907         return ret;
11908 }
11909
11910 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11911 {
11912         struct btrfs_path path;
11913         struct btrfs_trans_handle *trans;
11914         struct btrfs_key key;
11915         int ret;
11916
11917         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11918                bad->key.type, bad->key.offset);
11919         key.objectid = bad->root_id;
11920         key.type = BTRFS_ROOT_ITEM_KEY;
11921         key.offset = (u64)-1;
11922
11923         root = btrfs_read_fs_root(root->fs_info, &key);
11924         if (IS_ERR(root)) {
11925                 fprintf(stderr, "Couldn't find owner root %llu\n",
11926                         key.objectid);
11927                 return PTR_ERR(root);
11928         }
11929
11930         trans = btrfs_start_transaction(root, 1);
11931         if (IS_ERR(trans))
11932                 return PTR_ERR(trans);
11933
11934         btrfs_init_path(&path);
11935         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11936         if (ret) {
11937                 if (ret > 0)
11938                         ret = 0;
11939                 goto out;
11940         }
11941         ret = btrfs_del_item(trans, root, &path);
11942 out:
11943         btrfs_commit_transaction(trans, root);
11944         btrfs_release_path(&path);
11945         return ret;
11946 }
11947
11948 static int zero_log_tree(struct btrfs_root *root)
11949 {
11950         struct btrfs_trans_handle *trans;
11951         int ret;
11952
11953         trans = btrfs_start_transaction(root, 1);
11954         if (IS_ERR(trans)) {
11955                 ret = PTR_ERR(trans);
11956                 return ret;
11957         }
11958         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11959         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11960         ret = btrfs_commit_transaction(trans, root);
11961         return ret;
11962 }
11963
11964 static int populate_csum(struct btrfs_trans_handle *trans,
11965                          struct btrfs_root *csum_root, char *buf, u64 start,
11966                          u64 len)
11967 {
11968         u64 offset = 0;
11969         u64 sectorsize;
11970         int ret = 0;
11971
11972         while (offset < len) {
11973                 sectorsize = csum_root->sectorsize;
11974                 ret = read_extent_data(csum_root, buf, start + offset,
11975                                        &sectorsize, 0);
11976                 if (ret)
11977                         break;
11978                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11979                                             start + offset, buf, sectorsize);
11980                 if (ret)
11981                         break;
11982                 offset += sectorsize;
11983         }
11984         return ret;
11985 }
11986
11987 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11988                                       struct btrfs_root *csum_root,
11989                                       struct btrfs_root *cur_root)
11990 {
11991         struct btrfs_path path;
11992         struct btrfs_key key;
11993         struct extent_buffer *node;
11994         struct btrfs_file_extent_item *fi;
11995         char *buf = NULL;
11996         u64 start = 0;
11997         u64 len = 0;
11998         int slot = 0;
11999         int ret = 0;
12000
12001         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12002         if (!buf)
12003                 return -ENOMEM;
12004
12005         btrfs_init_path(&path);
12006         key.objectid = 0;
12007         key.offset = 0;
12008         key.type = 0;
12009         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12010         if (ret < 0)
12011                 goto out;
12012         /* Iterate all regular file extents and fill its csum */
12013         while (1) {
12014                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12015
12016                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12017                         goto next;
12018                 node = path.nodes[0];
12019                 slot = path.slots[0];
12020                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12021                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12022                         goto next;
12023                 start = btrfs_file_extent_disk_bytenr(node, fi);
12024                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12025
12026                 ret = populate_csum(trans, csum_root, buf, start, len);
12027                 if (ret == -EEXIST)
12028                         ret = 0;
12029                 if (ret < 0)
12030                         goto out;
12031 next:
12032                 /*
12033                  * TODO: if next leaf is corrupted, jump to nearest next valid
12034                  * leaf.
12035                  */
12036                 ret = btrfs_next_item(cur_root, &path);
12037                 if (ret < 0)
12038                         goto out;
12039                 if (ret > 0) {
12040                         ret = 0;
12041                         goto out;
12042                 }
12043         }
12044
12045 out:
12046         btrfs_release_path(&path);
12047         free(buf);
12048         return ret;
12049 }
12050
12051 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12052                                   struct btrfs_root *csum_root)
12053 {
12054         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12055         struct btrfs_path path;
12056         struct btrfs_root *tree_root = fs_info->tree_root;
12057         struct btrfs_root *cur_root;
12058         struct extent_buffer *node;
12059         struct btrfs_key key;
12060         int slot = 0;
12061         int ret = 0;
12062
12063         btrfs_init_path(&path);
12064         key.objectid = BTRFS_FS_TREE_OBJECTID;
12065         key.offset = 0;
12066         key.type = BTRFS_ROOT_ITEM_KEY;
12067         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12068         if (ret < 0)
12069                 goto out;
12070         if (ret > 0) {
12071                 ret = -ENOENT;
12072                 goto out;
12073         }
12074
12075         while (1) {
12076                 node = path.nodes[0];
12077                 slot = path.slots[0];
12078                 btrfs_item_key_to_cpu(node, &key, slot);
12079                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12080                         goto out;
12081                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12082                         goto next;
12083                 if (!is_fstree(key.objectid))
12084                         goto next;
12085                 key.offset = (u64)-1;
12086
12087                 cur_root = btrfs_read_fs_root(fs_info, &key);
12088                 if (IS_ERR(cur_root) || !cur_root) {
12089                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12090                                 key.objectid);
12091                         goto out;
12092                 }
12093                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12094                                 cur_root);
12095                 if (ret < 0)
12096                         goto out;
12097 next:
12098                 ret = btrfs_next_item(tree_root, &path);
12099                 if (ret > 0) {
12100                         ret = 0;
12101                         goto out;
12102                 }
12103                 if (ret < 0)
12104                         goto out;
12105         }
12106
12107 out:
12108         btrfs_release_path(&path);
12109         return ret;
12110 }
12111
12112 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12113                                       struct btrfs_root *csum_root)
12114 {
12115         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12116         struct btrfs_path path;
12117         struct btrfs_extent_item *ei;
12118         struct extent_buffer *leaf;
12119         char *buf;
12120         struct btrfs_key key;
12121         int ret;
12122
12123         btrfs_init_path(&path);
12124         key.objectid = 0;
12125         key.type = BTRFS_EXTENT_ITEM_KEY;
12126         key.offset = 0;
12127         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12128         if (ret < 0) {
12129                 btrfs_release_path(&path);
12130                 return ret;
12131         }
12132
12133         buf = malloc(csum_root->sectorsize);
12134         if (!buf) {
12135                 btrfs_release_path(&path);
12136                 return -ENOMEM;
12137         }
12138
12139         while (1) {
12140                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12141                         ret = btrfs_next_leaf(extent_root, &path);
12142                         if (ret < 0)
12143                                 break;
12144                         if (ret) {
12145                                 ret = 0;
12146                                 break;
12147                         }
12148                 }
12149                 leaf = path.nodes[0];
12150
12151                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12152                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12153                         path.slots[0]++;
12154                         continue;
12155                 }
12156
12157                 ei = btrfs_item_ptr(leaf, path.slots[0],
12158                                     struct btrfs_extent_item);
12159                 if (!(btrfs_extent_flags(leaf, ei) &
12160                       BTRFS_EXTENT_FLAG_DATA)) {
12161                         path.slots[0]++;
12162                         continue;
12163                 }
12164
12165                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12166                                     key.offset);
12167                 if (ret)
12168                         break;
12169                 path.slots[0]++;
12170         }
12171
12172         btrfs_release_path(&path);
12173         free(buf);
12174         return ret;
12175 }
12176
12177 /*
12178  * Recalculate the csum and put it into the csum tree.
12179  *
12180  * Extent tree init will wipe out all the extent info, so in that case, we
12181  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12182  * will use fs/subvol trees to init the csum tree.
12183  */
12184 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12185                           struct btrfs_root *csum_root,
12186                           int search_fs_tree)
12187 {
12188         if (search_fs_tree)
12189                 return fill_csum_tree_from_fs(trans, csum_root);
12190         else
12191                 return fill_csum_tree_from_extent(trans, csum_root);
12192 }
12193
12194 static void free_roots_info_cache(void)
12195 {
12196         if (!roots_info_cache)
12197                 return;
12198
12199         while (!cache_tree_empty(roots_info_cache)) {
12200                 struct cache_extent *entry;
12201                 struct root_item_info *rii;
12202
12203                 entry = first_cache_extent(roots_info_cache);
12204                 if (!entry)
12205                         break;
12206                 remove_cache_extent(roots_info_cache, entry);
12207                 rii = container_of(entry, struct root_item_info, cache_extent);
12208                 free(rii);
12209         }
12210
12211         free(roots_info_cache);
12212         roots_info_cache = NULL;
12213 }
12214
12215 static int build_roots_info_cache(struct btrfs_fs_info *info)
12216 {
12217         int ret = 0;
12218         struct btrfs_key key;
12219         struct extent_buffer *leaf;
12220         struct btrfs_path path;
12221
12222         if (!roots_info_cache) {
12223                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12224                 if (!roots_info_cache)
12225                         return -ENOMEM;
12226                 cache_tree_init(roots_info_cache);
12227         }
12228
12229         btrfs_init_path(&path);
12230         key.objectid = 0;
12231         key.type = BTRFS_EXTENT_ITEM_KEY;
12232         key.offset = 0;
12233         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12234         if (ret < 0)
12235                 goto out;
12236         leaf = path.nodes[0];
12237
12238         while (1) {
12239                 struct btrfs_key found_key;
12240                 struct btrfs_extent_item *ei;
12241                 struct btrfs_extent_inline_ref *iref;
12242                 int slot = path.slots[0];
12243                 int type;
12244                 u64 flags;
12245                 u64 root_id;
12246                 u8 level;
12247                 struct cache_extent *entry;
12248                 struct root_item_info *rii;
12249
12250                 if (slot >= btrfs_header_nritems(leaf)) {
12251                         ret = btrfs_next_leaf(info->extent_root, &path);
12252                         if (ret < 0) {
12253                                 break;
12254                         } else if (ret) {
12255                                 ret = 0;
12256                                 break;
12257                         }
12258                         leaf = path.nodes[0];
12259                         slot = path.slots[0];
12260                 }
12261
12262                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12263
12264                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12265                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12266                         goto next;
12267
12268                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12269                 flags = btrfs_extent_flags(leaf, ei);
12270
12271                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12272                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12273                         goto next;
12274
12275                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12276                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12277                         level = found_key.offset;
12278                 } else {
12279                         struct btrfs_tree_block_info *binfo;
12280
12281                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12282                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12283                         level = btrfs_tree_block_level(leaf, binfo);
12284                 }
12285
12286                 /*
12287                  * For a root extent, it must be of the following type and the
12288                  * first (and only one) iref in the item.
12289                  */
12290                 type = btrfs_extent_inline_ref_type(leaf, iref);
12291                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12292                         goto next;
12293
12294                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12295                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12296                 if (!entry) {
12297                         rii = malloc(sizeof(struct root_item_info));
12298                         if (!rii) {
12299                                 ret = -ENOMEM;
12300                                 goto out;
12301                         }
12302                         rii->cache_extent.start = root_id;
12303                         rii->cache_extent.size = 1;
12304                         rii->level = (u8)-1;
12305                         entry = &rii->cache_extent;
12306                         ret = insert_cache_extent(roots_info_cache, entry);
12307                         ASSERT(ret == 0);
12308                 } else {
12309                         rii = container_of(entry, struct root_item_info,
12310                                            cache_extent);
12311                 }
12312
12313                 ASSERT(rii->cache_extent.start == root_id);
12314                 ASSERT(rii->cache_extent.size == 1);
12315
12316                 if (level > rii->level || rii->level == (u8)-1) {
12317                         rii->level = level;
12318                         rii->bytenr = found_key.objectid;
12319                         rii->gen = btrfs_extent_generation(leaf, ei);
12320                         rii->node_count = 1;
12321                 } else if (level == rii->level) {
12322                         rii->node_count++;
12323                 }
12324 next:
12325                 path.slots[0]++;
12326         }
12327
12328 out:
12329         btrfs_release_path(&path);
12330
12331         return ret;
12332 }
12333
12334 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12335                                   struct btrfs_path *path,
12336                                   const struct btrfs_key *root_key,
12337                                   const int read_only_mode)
12338 {
12339         const u64 root_id = root_key->objectid;
12340         struct cache_extent *entry;
12341         struct root_item_info *rii;
12342         struct btrfs_root_item ri;
12343         unsigned long offset;
12344
12345         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12346         if (!entry) {
12347                 fprintf(stderr,
12348                         "Error: could not find extent items for root %llu\n",
12349                         root_key->objectid);
12350                 return -ENOENT;
12351         }
12352
12353         rii = container_of(entry, struct root_item_info, cache_extent);
12354         ASSERT(rii->cache_extent.start == root_id);
12355         ASSERT(rii->cache_extent.size == 1);
12356
12357         if (rii->node_count != 1) {
12358                 fprintf(stderr,
12359                         "Error: could not find btree root extent for root %llu\n",
12360                         root_id);
12361                 return -ENOENT;
12362         }
12363
12364         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12365         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12366
12367         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12368             btrfs_root_level(&ri) != rii->level ||
12369             btrfs_root_generation(&ri) != rii->gen) {
12370
12371                 /*
12372                  * If we're in repair mode but our caller told us to not update
12373                  * the root item, i.e. just check if it needs to be updated, don't
12374                  * print this message, since the caller will call us again shortly
12375                  * for the same root item without read only mode (the caller will
12376                  * open a transaction first).
12377                  */
12378                 if (!(read_only_mode && repair))
12379                         fprintf(stderr,
12380                                 "%sroot item for root %llu,"
12381                                 " current bytenr %llu, current gen %llu, current level %u,"
12382                                 " new bytenr %llu, new gen %llu, new level %u\n",
12383                                 (read_only_mode ? "" : "fixing "),
12384                                 root_id,
12385                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12386                                 btrfs_root_level(&ri),
12387                                 rii->bytenr, rii->gen, rii->level);
12388
12389                 if (btrfs_root_generation(&ri) > rii->gen) {
12390                         fprintf(stderr,
12391                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12392                                 root_id, btrfs_root_generation(&ri), rii->gen);
12393                         return -EINVAL;
12394                 }
12395
12396                 if (!read_only_mode) {
12397                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12398                         btrfs_set_root_level(&ri, rii->level);
12399                         btrfs_set_root_generation(&ri, rii->gen);
12400                         write_extent_buffer(path->nodes[0], &ri,
12401                                             offset, sizeof(ri));
12402                 }
12403
12404                 return 1;
12405         }
12406
12407         return 0;
12408 }
12409
12410 /*
12411  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12412  * caused read-only snapshots to be corrupted if they were created at a moment
12413  * when the source subvolume/snapshot had orphan items. The issue was that the
12414  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12415  * node instead of the post orphan cleanup root node.
12416  * So this function, and its callees, just detects and fixes those cases. Even
12417  * though the regression was for read-only snapshots, this function applies to
12418  * any snapshot/subvolume root.
12419  * This must be run before any other repair code - not doing it so, makes other
12420  * repair code delete or modify backrefs in the extent tree for example, which
12421  * will result in an inconsistent fs after repairing the root items.
12422  */
12423 static int repair_root_items(struct btrfs_fs_info *info)
12424 {
12425         struct btrfs_path path;
12426         struct btrfs_key key;
12427         struct extent_buffer *leaf;
12428         struct btrfs_trans_handle *trans = NULL;
12429         int ret = 0;
12430         int bad_roots = 0;
12431         int need_trans = 0;
12432
12433         btrfs_init_path(&path);
12434
12435         ret = build_roots_info_cache(info);
12436         if (ret)
12437                 goto out;
12438
12439         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12440         key.type = BTRFS_ROOT_ITEM_KEY;
12441         key.offset = 0;
12442
12443 again:
12444         /*
12445          * Avoid opening and committing transactions if a leaf doesn't have
12446          * any root items that need to be fixed, so that we avoid rotating
12447          * backup roots unnecessarily.
12448          */
12449         if (need_trans) {
12450                 trans = btrfs_start_transaction(info->tree_root, 1);
12451                 if (IS_ERR(trans)) {
12452                         ret = PTR_ERR(trans);
12453                         goto out;
12454                 }
12455         }
12456
12457         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12458                                 0, trans ? 1 : 0);
12459         if (ret < 0)
12460                 goto out;
12461         leaf = path.nodes[0];
12462
12463         while (1) {
12464                 struct btrfs_key found_key;
12465
12466                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12467                         int no_more_keys = find_next_key(&path, &key);
12468
12469                         btrfs_release_path(&path);
12470                         if (trans) {
12471                                 ret = btrfs_commit_transaction(trans,
12472                                                                info->tree_root);
12473                                 trans = NULL;
12474                                 if (ret < 0)
12475                                         goto out;
12476                         }
12477                         need_trans = 0;
12478                         if (no_more_keys)
12479                                 break;
12480                         goto again;
12481                 }
12482
12483                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12484
12485                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12486                         goto next;
12487                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12488                         goto next;
12489
12490                 ret = maybe_repair_root_item(info, &path, &found_key,
12491                                              trans ? 0 : 1);
12492                 if (ret < 0)
12493                         goto out;
12494                 if (ret) {
12495                         if (!trans && repair) {
12496                                 need_trans = 1;
12497                                 key = found_key;
12498                                 btrfs_release_path(&path);
12499                                 goto again;
12500                         }
12501                         bad_roots++;
12502                 }
12503 next:
12504                 path.slots[0]++;
12505         }
12506         ret = 0;
12507 out:
12508         free_roots_info_cache();
12509         btrfs_release_path(&path);
12510         if (trans)
12511                 btrfs_commit_transaction(trans, info->tree_root);
12512         if (ret < 0)
12513                 return ret;
12514
12515         return bad_roots;
12516 }
12517
12518 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12519 {
12520         struct btrfs_trans_handle *trans;
12521         struct btrfs_block_group_cache *bg_cache;
12522         u64 current = 0;
12523         int ret = 0;
12524
12525         /* Clear all free space cache inodes and its extent data */
12526         while (1) {
12527                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12528                 if (!bg_cache)
12529                         break;
12530                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12531                 if (ret < 0)
12532                         return ret;
12533                 current = bg_cache->key.objectid + bg_cache->key.offset;
12534         }
12535
12536         /* Don't forget to set cache_generation to -1 */
12537         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12538         if (IS_ERR(trans)) {
12539                 error("failed to update super block cache generation");
12540                 return PTR_ERR(trans);
12541         }
12542         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12543         btrfs_commit_transaction(trans, fs_info->tree_root);
12544
12545         return ret;
12546 }
12547
12548 const char * const cmd_check_usage[] = {
12549         "btrfs check [options] <device>",
12550         "Check structural integrity of a filesystem (unmounted).",
12551         "Check structural integrity of an unmounted filesystem. Verify internal",
12552         "trees' consistency and item connectivity. In the repair mode try to",
12553         "fix the problems found. ",
12554         "WARNING: the repair mode is considered dangerous",
12555         "",
12556         "-s|--super <superblock>     use this superblock copy",
12557         "-b|--backup                 use the first valid backup root copy",
12558         "--repair                    try to repair the filesystem",
12559         "--readonly                  run in read-only mode (default)",
12560         "--init-csum-tree            create a new CRC tree",
12561         "--init-extent-tree          create a new extent tree",
12562         "--mode <MODE>               allows choice of memory/IO trade-offs",
12563         "                            where MODE is one of:",
12564         "                            original - read inodes and extents to memory (requires",
12565         "                                       more memory, does less IO)",
12566         "                            lowmem   - try to use less memory but read blocks again",
12567         "                                       when needed",
12568         "--check-data-csum           verify checksums of data blocks",
12569         "-Q|--qgroup-report          print a report on qgroup consistency",
12570         "-E|--subvol-extents <subvolid>",
12571         "                            print subvolume extents and sharing state",
12572         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12573         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12574         "-p|--progress               indicate progress",
12575         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12576         NULL
12577 };
12578
12579 int cmd_check(int argc, char **argv)
12580 {
12581         struct cache_tree root_cache;
12582         struct btrfs_root *root;
12583         struct btrfs_fs_info *info;
12584         u64 bytenr = 0;
12585         u64 subvolid = 0;
12586         u64 tree_root_bytenr = 0;
12587         u64 chunk_root_bytenr = 0;
12588         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12589         int ret;
12590         int err = 0;
12591         u64 num;
12592         int init_csum_tree = 0;
12593         int readonly = 0;
12594         int clear_space_cache = 0;
12595         int qgroup_report = 0;
12596         int qgroups_repaired = 0;
12597         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12598
12599         while(1) {
12600                 int c;
12601                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12602                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12603                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12604                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12605                 static const struct option long_options[] = {
12606                         { "super", required_argument, NULL, 's' },
12607                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12608                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12609                         { "init-csum-tree", no_argument, NULL,
12610                                 GETOPT_VAL_INIT_CSUM },
12611                         { "init-extent-tree", no_argument, NULL,
12612                                 GETOPT_VAL_INIT_EXTENT },
12613                         { "check-data-csum", no_argument, NULL,
12614                                 GETOPT_VAL_CHECK_CSUM },
12615                         { "backup", no_argument, NULL, 'b' },
12616                         { "subvol-extents", required_argument, NULL, 'E' },
12617                         { "qgroup-report", no_argument, NULL, 'Q' },
12618                         { "tree-root", required_argument, NULL, 'r' },
12619                         { "chunk-root", required_argument, NULL,
12620                                 GETOPT_VAL_CHUNK_TREE },
12621                         { "progress", no_argument, NULL, 'p' },
12622                         { "mode", required_argument, NULL,
12623                                 GETOPT_VAL_MODE },
12624                         { "clear-space-cache", required_argument, NULL,
12625                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12626                         { NULL, 0, NULL, 0}
12627                 };
12628
12629                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12630                 if (c < 0)
12631                         break;
12632                 switch(c) {
12633                         case 'a': /* ignored */ break;
12634                         case 'b':
12635                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12636                                 break;
12637                         case 's':
12638                                 num = arg_strtou64(optarg);
12639                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12640                                         error(
12641                                         "super mirror should be less than %d",
12642                                                 BTRFS_SUPER_MIRROR_MAX);
12643                                         exit(1);
12644                                 }
12645                                 bytenr = btrfs_sb_offset(((int)num));
12646                                 printf("using SB copy %llu, bytenr %llu\n", num,
12647                                        (unsigned long long)bytenr);
12648                                 break;
12649                         case 'Q':
12650                                 qgroup_report = 1;
12651                                 break;
12652                         case 'E':
12653                                 subvolid = arg_strtou64(optarg);
12654                                 break;
12655                         case 'r':
12656                                 tree_root_bytenr = arg_strtou64(optarg);
12657                                 break;
12658                         case GETOPT_VAL_CHUNK_TREE:
12659                                 chunk_root_bytenr = arg_strtou64(optarg);
12660                                 break;
12661                         case 'p':
12662                                 ctx.progress_enabled = true;
12663                                 break;
12664                         case '?':
12665                         case 'h':
12666                                 usage(cmd_check_usage);
12667                         case GETOPT_VAL_REPAIR:
12668                                 printf("enabling repair mode\n");
12669                                 repair = 1;
12670                                 ctree_flags |= OPEN_CTREE_WRITES;
12671                                 break;
12672                         case GETOPT_VAL_READONLY:
12673                                 readonly = 1;
12674                                 break;
12675                         case GETOPT_VAL_INIT_CSUM:
12676                                 printf("Creating a new CRC tree\n");
12677                                 init_csum_tree = 1;
12678                                 repair = 1;
12679                                 ctree_flags |= OPEN_CTREE_WRITES;
12680                                 break;
12681                         case GETOPT_VAL_INIT_EXTENT:
12682                                 init_extent_tree = 1;
12683                                 ctree_flags |= (OPEN_CTREE_WRITES |
12684                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12685                                 repair = 1;
12686                                 break;
12687                         case GETOPT_VAL_CHECK_CSUM:
12688                                 check_data_csum = 1;
12689                                 break;
12690                         case GETOPT_VAL_MODE:
12691                                 check_mode = parse_check_mode(optarg);
12692                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12693                                         error("unknown mode: %s", optarg);
12694                                         exit(1);
12695                                 }
12696                                 break;
12697                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12698                                 if (strcmp(optarg, "v1") == 0) {
12699                                         clear_space_cache = 1;
12700                                 } else if (strcmp(optarg, "v2") == 0) {
12701                                         clear_space_cache = 2;
12702                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12703                                 } else {
12704                                         error(
12705                 "invalid argument to --clear-space-cache, must be v1 or v2");
12706                                         exit(1);
12707                                 }
12708                                 ctree_flags |= OPEN_CTREE_WRITES;
12709                                 break;
12710                 }
12711         }
12712
12713         if (check_argc_exact(argc - optind, 1))
12714                 usage(cmd_check_usage);
12715
12716         if (ctx.progress_enabled) {
12717                 ctx.tp = TASK_NOTHING;
12718                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12719         }
12720
12721         /* This check is the only reason for --readonly to exist */
12722         if (readonly && repair) {
12723                 error("repair options are not compatible with --readonly");
12724                 exit(1);
12725         }
12726
12727         /*
12728          * Not supported yet
12729          */
12730         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12731                 error("low memory mode doesn't support repair yet");
12732                 exit(1);
12733         }
12734
12735         radix_tree_init();
12736         cache_tree_init(&root_cache);
12737
12738         if((ret = check_mounted(argv[optind])) < 0) {
12739                 error("could not check mount status: %s", strerror(-ret));
12740                 err |= !!ret;
12741                 goto err_out;
12742         } else if(ret) {
12743                 error("%s is currently mounted, aborting", argv[optind]);
12744                 ret = -EBUSY;
12745                 err |= !!ret;
12746                 goto err_out;
12747         }
12748
12749         /* only allow partial opening under repair mode */
12750         if (repair)
12751                 ctree_flags |= OPEN_CTREE_PARTIAL;
12752
12753         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12754                                   chunk_root_bytenr, ctree_flags);
12755         if (!info) {
12756                 error("cannot open file system");
12757                 ret = -EIO;
12758                 err |= !!ret;
12759                 goto err_out;
12760         }
12761
12762         global_info = info;
12763         root = info->fs_root;
12764         if (clear_space_cache == 1) {
12765                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12766                         error(
12767                 "free space cache v2 detected, use --clear-space-cache v2");
12768                         ret = 1;
12769                         goto close_out;
12770                 }
12771                 printf("Clearing free space cache\n");
12772                 ret = clear_free_space_cache(info);
12773                 if (ret) {
12774                         error("failed to clear free space cache");
12775                         ret = 1;
12776                 } else {
12777                         printf("Free space cache cleared\n");
12778                 }
12779                 goto close_out;
12780         } else if (clear_space_cache == 2) {
12781                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12782                         printf("no free space cache v2 to clear\n");
12783                         ret = 0;
12784                         goto close_out;
12785                 }
12786                 printf("Clear free space cache v2\n");
12787                 ret = btrfs_clear_free_space_tree(info);
12788                 if (ret) {
12789                         error("failed to clear free space cache v2: %d", ret);
12790                         ret = 1;
12791                 } else {
12792                         printf("free space cache v2 cleared\n");
12793                 }
12794                 goto close_out;
12795         }
12796
12797         /*
12798          * repair mode will force us to commit transaction which
12799          * will make us fail to load log tree when mounting.
12800          */
12801         if (repair && btrfs_super_log_root(info->super_copy)) {
12802                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12803                 if (!ret) {
12804                         ret = 1;
12805                         err |= !!ret;
12806                         goto close_out;
12807                 }
12808                 ret = zero_log_tree(root);
12809                 err |= !!ret;
12810                 if (ret) {
12811                         error("failed to zero log tree: %d", ret);
12812                         goto close_out;
12813                 }
12814         }
12815
12816         uuid_unparse(info->super_copy->fsid, uuidbuf);
12817         if (qgroup_report) {
12818                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12819                        uuidbuf);
12820                 ret = qgroup_verify_all(info);
12821                 err |= !!ret;
12822                 if (ret == 0)
12823                         report_qgroups(1);
12824                 goto close_out;
12825         }
12826         if (subvolid) {
12827                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12828                        subvolid, argv[optind], uuidbuf);
12829                 ret = print_extent_state(info, subvolid);
12830                 err |= !!ret;
12831                 goto close_out;
12832         }
12833         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12834
12835         if (!extent_buffer_uptodate(info->tree_root->node) ||
12836             !extent_buffer_uptodate(info->dev_root->node) ||
12837             !extent_buffer_uptodate(info->chunk_root->node)) {
12838                 error("critical roots corrupted, unable to check the filesystem");
12839                 err |= !!ret;
12840                 ret = -EIO;
12841                 goto close_out;
12842         }
12843
12844         if (init_extent_tree || init_csum_tree) {
12845                 struct btrfs_trans_handle *trans;
12846
12847                 trans = btrfs_start_transaction(info->extent_root, 0);
12848                 if (IS_ERR(trans)) {
12849                         error("error starting transaction");
12850                         ret = PTR_ERR(trans);
12851                         err |= !!ret;
12852                         goto close_out;
12853                 }
12854
12855                 if (init_extent_tree) {
12856                         printf("Creating a new extent tree\n");
12857                         ret = reinit_extent_tree(trans, info);
12858                         err |= !!ret;
12859                         if (ret)
12860                                 goto close_out;
12861                 }
12862
12863                 if (init_csum_tree) {
12864                         printf("Reinitialize checksum tree\n");
12865                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12866                         if (ret) {
12867                                 error("checksum tree initialization failed: %d",
12868                                                 ret);
12869                                 ret = -EIO;
12870                                 err |= !!ret;
12871                                 goto close_out;
12872                         }
12873
12874                         ret = fill_csum_tree(trans, info->csum_root,
12875                                              init_extent_tree);
12876                         err |= !!ret;
12877                         if (ret) {
12878                                 error("checksum tree refilling failed: %d", ret);
12879                                 return -EIO;
12880                         }
12881                 }
12882                 /*
12883                  * Ok now we commit and run the normal fsck, which will add
12884                  * extent entries for all of the items it finds.
12885                  */
12886                 ret = btrfs_commit_transaction(trans, info->extent_root);
12887                 err |= !!ret;
12888                 if (ret)
12889                         goto close_out;
12890         }
12891         if (!extent_buffer_uptodate(info->extent_root->node)) {
12892                 error("critical: extent_root, unable to check the filesystem");
12893                 ret = -EIO;
12894                 err |= !!ret;
12895                 goto close_out;
12896         }
12897         if (!extent_buffer_uptodate(info->csum_root->node)) {
12898                 error("critical: csum_root, unable to check the filesystem");
12899                 ret = -EIO;
12900                 err |= !!ret;
12901                 goto close_out;
12902         }
12903
12904         if (!ctx.progress_enabled)
12905                 fprintf(stderr, "checking extents\n");
12906         if (check_mode == CHECK_MODE_LOWMEM)
12907                 ret = check_chunks_and_extents_v2(root);
12908         else
12909                 ret = check_chunks_and_extents(root);
12910         err |= !!ret;
12911         if (ret)
12912                 error(
12913                 "errors found in extent allocation tree or chunk allocation");
12914
12915         ret = repair_root_items(info);
12916         err |= !!ret;
12917         if (ret < 0)
12918                 goto close_out;
12919         if (repair) {
12920                 fprintf(stderr, "Fixed %d roots.\n", ret);
12921                 ret = 0;
12922         } else if (ret > 0) {
12923                 fprintf(stderr,
12924                        "Found %d roots with an outdated root item.\n",
12925                        ret);
12926                 fprintf(stderr,
12927                         "Please run a filesystem check with the option --repair to fix them.\n");
12928                 ret = 1;
12929                 err |= !!ret;
12930                 goto close_out;
12931         }
12932
12933         if (!ctx.progress_enabled) {
12934                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12935                         fprintf(stderr, "checking free space tree\n");
12936                 else
12937                         fprintf(stderr, "checking free space cache\n");
12938         }
12939         ret = check_space_cache(root);
12940         err |= !!ret;
12941         if (ret)
12942                 goto out;
12943
12944         /*
12945          * We used to have to have these hole extents in between our real
12946          * extents so if we don't have this flag set we need to make sure there
12947          * are no gaps in the file extents for inodes, otherwise we can just
12948          * ignore it when this happens.
12949          */
12950         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12951         if (!ctx.progress_enabled)
12952                 fprintf(stderr, "checking fs roots\n");
12953         if (check_mode == CHECK_MODE_LOWMEM)
12954                 ret = check_fs_roots_v2(root->fs_info);
12955         else
12956                 ret = check_fs_roots(root, &root_cache);
12957         err |= !!ret;
12958         if (ret)
12959                 goto out;
12960
12961         fprintf(stderr, "checking csums\n");
12962         ret = check_csums(root);
12963         err |= !!ret;
12964         if (ret)
12965                 goto out;
12966
12967         fprintf(stderr, "checking root refs\n");
12968         /* For low memory mode, check_fs_roots_v2 handles root refs */
12969         if (check_mode != CHECK_MODE_LOWMEM) {
12970                 ret = check_root_refs(root, &root_cache);
12971                 err |= !!ret;
12972                 if (ret)
12973                         goto out;
12974         }
12975
12976         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12977                 struct extent_buffer *eb;
12978
12979                 eb = list_first_entry(&root->fs_info->recow_ebs,
12980                                       struct extent_buffer, recow);
12981                 list_del_init(&eb->recow);
12982                 ret = recow_extent_buffer(root, eb);
12983                 err |= !!ret;
12984                 if (ret)
12985                         break;
12986         }
12987
12988         while (!list_empty(&delete_items)) {
12989                 struct bad_item *bad;
12990
12991                 bad = list_first_entry(&delete_items, struct bad_item, list);
12992                 list_del_init(&bad->list);
12993                 if (repair) {
12994                         ret = delete_bad_item(root, bad);
12995                         err |= !!ret;
12996                 }
12997                 free(bad);
12998         }
12999
13000         if (info->quota_enabled) {
13001                 fprintf(stderr, "checking quota groups\n");
13002                 ret = qgroup_verify_all(info);
13003                 err |= !!ret;
13004                 if (ret)
13005                         goto out;
13006                 report_qgroups(0);
13007                 ret = repair_qgroups(info, &qgroups_repaired);
13008                 err |= !!ret;
13009                 if (err)
13010                         goto out;
13011                 ret = 0;
13012         }
13013
13014         if (!list_empty(&root->fs_info->recow_ebs)) {
13015                 error("transid errors in file system");
13016                 ret = 1;
13017                 err |= !!ret;
13018         }
13019 out:
13020         if (found_old_backref) { /*
13021                  * there was a disk format change when mixed
13022                  * backref was in testing tree. The old format
13023                  * existed about one week.
13024                  */
13025                 printf("\n * Found old mixed backref format. "
13026                        "The old format is not supported! *"
13027                        "\n * Please mount the FS in readonly mode, "
13028                        "backup data and re-format the FS. *\n\n");
13029                 err |= 1;
13030         }
13031         printf("found %llu bytes used err is %d\n",
13032                (unsigned long long)bytes_used, ret);
13033         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13034         printf("total tree bytes: %llu\n",
13035                (unsigned long long)total_btree_bytes);
13036         printf("total fs tree bytes: %llu\n",
13037                (unsigned long long)total_fs_tree_bytes);
13038         printf("total extent tree bytes: %llu\n",
13039                (unsigned long long)total_extent_tree_bytes);
13040         printf("btree space waste bytes: %llu\n",
13041                (unsigned long long)btree_space_waste);
13042         printf("file data blocks allocated: %llu\n referenced %llu\n",
13043                 (unsigned long long)data_bytes_allocated,
13044                 (unsigned long long)data_bytes_referenced);
13045
13046         free_qgroup_counts();
13047         free_root_recs_tree(&root_cache);
13048 close_out:
13049         close_ctree(root);
13050 err_out:
13051         if (ctx.progress_enabled)
13052                 task_deinit(ctx.info);
13053
13054         return err;
13055 }