btrfs-progs: check: disambiguate between cases where add_tree_backref fails
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 /*
1872  * Returns >0  Found error, not fatal, should continue
1873  * Returns <0  Fatal error, must exit the whole check
1874  * Returns 0   No errors found
1875  */
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877                                struct node_refs *nrefs, int *level, int ext_ref)
1878 {
1879         struct extent_buffer *cur = path->nodes[0];
1880         struct btrfs_key key;
1881         u64 cur_bytenr;
1882         u32 nritems;
1883         u64 first_ino = 0;
1884         int root_level = btrfs_header_level(root->node);
1885         int i;
1886         int ret = 0; /* Final return value */
1887         int err = 0; /* Positive error bitmap */
1888
1889         cur_bytenr = cur->start;
1890
1891         /* skip to first inode item or the first inode number change */
1892         nritems = btrfs_header_nritems(cur);
1893         for (i = 0; i < nritems; i++) {
1894                 btrfs_item_key_to_cpu(cur, &key, i);
1895                 if (i == 0)
1896                         first_ino = key.objectid;
1897                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898                     (first_ino && first_ino != key.objectid))
1899                         break;
1900         }
1901         if (i == nritems) {
1902                 path->slots[0] = nritems;
1903                 return 0;
1904         }
1905         path->slots[0] = i;
1906
1907 again:
1908         err |= check_inode_item(root, path, ext_ref);
1909
1910         if (err & LAST_ITEM)
1911                 goto out;
1912
1913         /* still have inode items in thie leaf */
1914         if (cur->start == cur_bytenr)
1915                 goto again;
1916
1917         /*
1918          * we have switched to another leaf, above nodes may
1919          * have changed, here walk down the path, if a node
1920          * or leaf is shared, check whether we can skip this
1921          * node or leaf.
1922          */
1923         for (i = root_level; i >= 0; i--) {
1924                 if (path->nodes[i]->start == nrefs->bytenr[i])
1925                         continue;
1926
1927                 ret = update_nodes_refs(root,
1928                                 path->nodes[i]->start,
1929                                 nrefs, i);
1930                 if (ret)
1931                         goto out;
1932
1933                 if (!nrefs->need_check[i]) {
1934                         *level += 1;
1935                         break;
1936                 }
1937         }
1938
1939         for (i = 0; i < *level; i++) {
1940                 free_extent_buffer(path->nodes[i]);
1941                 path->nodes[i] = NULL;
1942         }
1943 out:
1944         err &= ~LAST_ITEM;
1945         if (err && !ret)
1946                 ret = err;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         free_extent_buffer(next);
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 /*
2217  * Returns >0  Found error, should continue
2218  * Returns <0  Fatal error, must exit the whole check
2219  * Returns 0   No errors found
2220  */
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222                              int *level, struct node_refs *nrefs, int ext_ref)
2223 {
2224         enum btrfs_tree_block_status status;
2225         u64 bytenr;
2226         u64 ptr_gen;
2227         struct extent_buffer *next;
2228         struct extent_buffer *cur;
2229         u32 blocksize;
2230         int ret;
2231
2232         WARN_ON(*level < 0);
2233         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2234
2235         ret = update_nodes_refs(root, path->nodes[*level]->start,
2236                                 nrefs, *level);
2237         if (ret < 0)
2238                 return ret;
2239
2240         while (*level >= 0) {
2241                 WARN_ON(*level < 0);
2242                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243                 cur = path->nodes[*level];
2244
2245                 if (btrfs_header_level(cur) != *level)
2246                         WARN_ON(1);
2247
2248                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249                         break;
2250                 /* Don't forgot to check leaf/node validation */
2251                 if (*level == 0) {
2252                         ret = btrfs_check_leaf(root, NULL, cur);
2253                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2254                                 ret = -EIO;
2255                                 break;
2256                         }
2257                         ret = process_one_leaf_v2(root, path, nrefs,
2258                                                   level, ext_ref);
2259                         break;
2260                 } else {
2261                         ret = btrfs_check_node(root, NULL, cur);
2262                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2263                                 ret = -EIO;
2264                                 break;
2265                         }
2266                 }
2267                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269                 blocksize = root->nodesize;
2270
2271                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2272                 if (ret)
2273                         break;
2274                 if (!nrefs->need_check[*level - 1]) {
2275                         path->slots[*level]++;
2276                         continue;
2277                 }
2278
2279                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root, bytenr, blocksize,
2284                                                ptr_gen);
2285                         if (!extent_buffer_uptodate(next)) {
2286                                 struct btrfs_key node_key;
2287
2288                                 btrfs_node_key_to_cpu(path->nodes[*level],
2289                                                       &node_key,
2290                                                       path->slots[*level]);
2291                                 btrfs_add_corrupt_extent_record(root->fs_info,
2292                                                 &node_key,
2293                                                 path->nodes[*level]->start,
2294                                                 root->nodesize, *level);
2295                                 ret = -EIO;
2296                                 break;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret < 0) 
2302                         break;
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         ret = -EIO;
2311                         break;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319         return ret;
2320 }
2321
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323                         struct walk_control *wc, int *level)
2324 {
2325         int i;
2326         struct extent_buffer *leaf;
2327
2328         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329                 leaf = path->nodes[i];
2330                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2331                         path->slots[i]++;
2332                         *level = i;
2333                         return 0;
2334                 } else {
2335                         free_extent_buffer(path->nodes[*level]);
2336                         path->nodes[*level] = NULL;
2337                         BUG_ON(*level > wc->active_node);
2338                         if (*level == wc->active_node)
2339                                 leave_shared_node(root, wc, *level);
2340                         *level = i + 1;
2341                 }
2342         }
2343         return 1;
2344 }
2345
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2347                            int *level)
2348 {
2349         int i;
2350         struct extent_buffer *leaf;
2351
2352         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353                 leaf = path->nodes[i];
2354                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355                         path->slots[i]++;
2356                         *level = i;
2357                         return 0;
2358                 } else {
2359                         free_extent_buffer(path->nodes[*level]);
2360                         path->nodes[*level] = NULL;
2361                         *level = i + 1;
2362                 }
2363         }
2364         return 1;
2365 }
2366
2367 static int check_root_dir(struct inode_record *rec)
2368 {
2369         struct inode_backref *backref;
2370         int ret = -1;
2371
2372         if (!rec->found_inode_item || rec->errors)
2373                 goto out;
2374         if (rec->nlink != 1 || rec->found_link != 0)
2375                 goto out;
2376         if (list_empty(&rec->backrefs))
2377                 goto out;
2378         backref = to_inode_backref(rec->backrefs.next);
2379         if (!backref->found_inode_ref)
2380                 goto out;
2381         if (backref->index != 0 || backref->namelen != 2 ||
2382             memcmp(backref->name, "..", 2))
2383                 goto out;
2384         if (backref->found_dir_index || backref->found_dir_item)
2385                 goto out;
2386         ret = 0;
2387 out:
2388         return ret;
2389 }
2390
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392                               struct btrfs_root *root, struct btrfs_path *path,
2393                               struct inode_record *rec)
2394 {
2395         struct btrfs_inode_item *ei;
2396         struct btrfs_key key;
2397         int ret;
2398
2399         key.objectid = rec->ino;
2400         key.type = BTRFS_INODE_ITEM_KEY;
2401         key.offset = (u64)-1;
2402
2403         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2404         if (ret < 0)
2405                 goto out;
2406         if (ret) {
2407                 if (!path->slots[0]) {
2408                         ret = -ENOENT;
2409                         goto out;
2410                 }
2411                 path->slots[0]--;
2412                 ret = 0;
2413         }
2414         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415         if (key.objectid != rec->ino) {
2416                 ret = -ENOENT;
2417                 goto out;
2418         }
2419
2420         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421                             struct btrfs_inode_item);
2422         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423         btrfs_mark_buffer_dirty(path->nodes[0]);
2424         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426                root->root_key.objectid);
2427 out:
2428         btrfs_release_path(path);
2429         return ret;
2430 }
2431
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433                                     struct btrfs_root *root,
2434                                     struct btrfs_path *path,
2435                                     struct inode_record *rec)
2436 {
2437         int ret;
2438
2439         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440         btrfs_release_path(path);
2441         if (!ret)
2442                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2443         return ret;
2444 }
2445
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447                                struct btrfs_root *root,
2448                                struct btrfs_path *path,
2449                                struct inode_record *rec)
2450 {
2451         struct btrfs_inode_item *ei;
2452         struct btrfs_key key;
2453         int ret = 0;
2454
2455         key.objectid = rec->ino;
2456         key.type = BTRFS_INODE_ITEM_KEY;
2457         key.offset = 0;
2458
2459         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2460         if (ret) {
2461                 if (ret > 0)
2462                         ret = -ENOENT;
2463                 goto out;
2464         }
2465
2466         /* Since ret == 0, no need to check anything */
2467         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468                             struct btrfs_inode_item);
2469         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470         btrfs_mark_buffer_dirty(path->nodes[0]);
2471         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472         printf("reset nbytes for ino %llu root %llu\n",
2473                rec->ino, root->root_key.objectid);
2474 out:
2475         btrfs_release_path(path);
2476         return ret;
2477 }
2478
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480                                  struct cache_tree *inode_cache,
2481                                  struct inode_record *rec,
2482                                  struct inode_backref *backref)
2483 {
2484         struct btrfs_path path;
2485         struct btrfs_trans_handle *trans;
2486         struct btrfs_dir_item *dir_item;
2487         struct extent_buffer *leaf;
2488         struct btrfs_key key;
2489         struct btrfs_disk_key disk_key;
2490         struct inode_record *dir_rec;
2491         unsigned long name_ptr;
2492         u32 data_size = sizeof(*dir_item) + backref->namelen;
2493         int ret;
2494
2495         trans = btrfs_start_transaction(root, 1);
2496         if (IS_ERR(trans))
2497                 return PTR_ERR(trans);
2498
2499         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500                 (unsigned long long)rec->ino);
2501
2502         btrfs_init_path(&path);
2503         key.objectid = backref->dir;
2504         key.type = BTRFS_DIR_INDEX_KEY;
2505         key.offset = backref->index;
2506         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2507         BUG_ON(ret);
2508
2509         leaf = path.nodes[0];
2510         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2511
2512         disk_key.objectid = cpu_to_le64(rec->ino);
2513         disk_key.type = BTRFS_INODE_ITEM_KEY;
2514         disk_key.offset = 0;
2515
2516         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518         btrfs_set_dir_data_len(leaf, dir_item, 0);
2519         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520         name_ptr = (unsigned long)(dir_item + 1);
2521         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522         btrfs_mark_buffer_dirty(leaf);
2523         btrfs_release_path(&path);
2524         btrfs_commit_transaction(trans, root);
2525
2526         backref->found_dir_index = 1;
2527         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528         BUG_ON(IS_ERR(dir_rec));
2529         if (!dir_rec)
2530                 return 0;
2531         dir_rec->found_size += backref->namelen;
2532         if (dir_rec->found_size == dir_rec->isize &&
2533             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535         if (dir_rec->found_size != dir_rec->isize)
2536                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2537
2538         return 0;
2539 }
2540
2541 static int delete_dir_index(struct btrfs_root *root,
2542                             struct inode_backref *backref)
2543 {
2544         struct btrfs_trans_handle *trans;
2545         struct btrfs_dir_item *di;
2546         struct btrfs_path path;
2547         int ret = 0;
2548
2549         trans = btrfs_start_transaction(root, 1);
2550         if (IS_ERR(trans))
2551                 return PTR_ERR(trans);
2552
2553         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554                 (unsigned long long)backref->dir,
2555                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556                 (unsigned long long)root->objectid);
2557
2558         btrfs_init_path(&path);
2559         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560                                     backref->name, backref->namelen,
2561                                     backref->index, -1);
2562         if (IS_ERR(di)) {
2563                 ret = PTR_ERR(di);
2564                 btrfs_release_path(&path);
2565                 btrfs_commit_transaction(trans, root);
2566                 if (ret == -ENOENT)
2567                         return 0;
2568                 return ret;
2569         }
2570
2571         if (!di)
2572                 ret = btrfs_del_item(trans, root, &path);
2573         else
2574                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2575         BUG_ON(ret);
2576         btrfs_release_path(&path);
2577         btrfs_commit_transaction(trans, root);
2578         return ret;
2579 }
2580
2581 static int create_inode_item(struct btrfs_root *root,
2582                              struct inode_record *rec,
2583                              int root_dir)
2584 {
2585         struct btrfs_trans_handle *trans;
2586         struct btrfs_inode_item inode_item;
2587         time_t now = time(NULL);
2588         int ret;
2589
2590         trans = btrfs_start_transaction(root, 1);
2591         if (IS_ERR(trans)) {
2592                 ret = PTR_ERR(trans);
2593                 return ret;
2594         }
2595
2596         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597                 "be incomplete, please check permissions and content after "
2598                 "the fsck completes.\n", (unsigned long long)root->objectid,
2599                 (unsigned long long)rec->ino);
2600
2601         memset(&inode_item, 0, sizeof(inode_item));
2602         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2603         if (root_dir)
2604                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2605         else
2606                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608         if (rec->found_dir_item) {
2609                 if (rec->found_file_extent)
2610                         fprintf(stderr, "root %llu inode %llu has both a dir "
2611                                 "item and extents, unsure if it is a dir or a "
2612                                 "regular file so setting it as a directory\n",
2613                                 (unsigned long long)root->objectid,
2614                                 (unsigned long long)rec->ino);
2615                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617         } else if (!rec->found_dir_item) {
2618                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2620         }
2621         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2629
2630         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2631         BUG_ON(ret);
2632         btrfs_commit_transaction(trans, root);
2633         return 0;
2634 }
2635
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637                                  struct inode_record *rec,
2638                                  struct cache_tree *inode_cache,
2639                                  int delete)
2640 {
2641         struct inode_backref *tmp, *backref;
2642         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2643         int ret = 0;
2644         int repaired = 0;
2645
2646         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647                 if (!delete && rec->ino == root_dirid) {
2648                         if (!rec->found_inode_item) {
2649                                 ret = create_inode_item(root, rec, 1);
2650                                 if (ret)
2651                                         break;
2652                                 repaired++;
2653                         }
2654                 }
2655
2656                 /* Index 0 for root dir's are special, don't mess with it */
2657                 if (rec->ino == root_dirid && backref->index == 0)
2658                         continue;
2659
2660                 if (delete &&
2661                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2662                      (backref->found_dir_index && backref->found_inode_ref &&
2663                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664                         ret = delete_dir_index(root, backref);
2665                         if (ret)
2666                                 break;
2667                         repaired++;
2668                         list_del(&backref->list);
2669                         free(backref);
2670                 }
2671
2672                 if (!delete && !backref->found_dir_index &&
2673                     backref->found_dir_item && backref->found_inode_ref) {
2674                         ret = add_missing_dir_index(root, inode_cache, rec,
2675                                                     backref);
2676                         if (ret)
2677                                 break;
2678                         repaired++;
2679                         if (backref->found_dir_item &&
2680                             backref->found_dir_index &&
2681                             backref->found_dir_index) {
2682                                 if (!backref->errors &&
2683                                     backref->found_inode_ref) {
2684                                         list_del(&backref->list);
2685                                         free(backref);
2686                                 }
2687                         }
2688                 }
2689
2690                 if (!delete && (!backref->found_dir_index &&
2691                                 !backref->found_dir_item &&
2692                                 backref->found_inode_ref)) {
2693                         struct btrfs_trans_handle *trans;
2694                         struct btrfs_key location;
2695
2696                         ret = check_dir_conflict(root, backref->name,
2697                                                  backref->namelen,
2698                                                  backref->dir,
2699                                                  backref->index);
2700                         if (ret) {
2701                                 /*
2702                                  * let nlink fixing routine to handle it,
2703                                  * which can do it better.
2704                                  */
2705                                 ret = 0;
2706                                 break;
2707                         }
2708                         location.objectid = rec->ino;
2709                         location.type = BTRFS_INODE_ITEM_KEY;
2710                         location.offset = 0;
2711
2712                         trans = btrfs_start_transaction(root, 1);
2713                         if (IS_ERR(trans)) {
2714                                 ret = PTR_ERR(trans);
2715                                 break;
2716                         }
2717                         fprintf(stderr, "adding missing dir index/item pair "
2718                                 "for inode %llu\n",
2719                                 (unsigned long long)rec->ino);
2720                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2721                                                     backref->namelen,
2722                                                     backref->dir, &location,
2723                                                     imode_to_type(rec->imode),
2724                                                     backref->index);
2725                         BUG_ON(ret);
2726                         btrfs_commit_transaction(trans, root);
2727                         repaired++;
2728                 }
2729
2730                 if (!delete && (backref->found_inode_ref &&
2731                                 backref->found_dir_index &&
2732                                 backref->found_dir_item &&
2733                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734                                 !rec->found_inode_item)) {
2735                         ret = create_inode_item(root, rec, 0);
2736                         if (ret)
2737                                 break;
2738                         repaired++;
2739                 }
2740
2741         }
2742         return ret ? ret : repaired;
2743 }
2744
2745 /*
2746  * To determine the file type for nlink/inode_item repair
2747  *
2748  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749  * Return -ENOENT if file type is not found.
2750  */
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2752 {
2753         struct inode_backref *backref;
2754
2755         /* For inode item recovered case */
2756         if (rec->found_inode_item) {
2757                 *type = imode_to_type(rec->imode);
2758                 return 0;
2759         }
2760
2761         list_for_each_entry(backref, &rec->backrefs, list) {
2762                 if (backref->found_dir_index || backref->found_dir_item) {
2763                         *type = backref->filetype;
2764                         return 0;
2765                 }
2766         }
2767         return -ENOENT;
2768 }
2769
2770 /*
2771  * To determine the file name for nlink repair
2772  *
2773  * Return 0 if file name is found, set name and namelen.
2774  * Return -ENOENT if file name is not found.
2775  */
2776 static int find_file_name(struct inode_record *rec,
2777                           char *name, int *namelen)
2778 {
2779         struct inode_backref *backref;
2780
2781         list_for_each_entry(backref, &rec->backrefs, list) {
2782                 if (backref->found_dir_index || backref->found_dir_item ||
2783                     backref->found_inode_ref) {
2784                         memcpy(name, backref->name, backref->namelen);
2785                         *namelen = backref->namelen;
2786                         return 0;
2787                 }
2788         }
2789         return -ENOENT;
2790 }
2791
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794                        struct btrfs_root *root,
2795                        struct btrfs_path *path,
2796                        struct inode_record *rec)
2797 {
2798         struct inode_backref *backref;
2799         struct inode_backref *tmp;
2800         struct btrfs_key key;
2801         struct btrfs_inode_item *inode_item;
2802         int ret = 0;
2803
2804         /* We don't believe this either, reset it and iterate backref */
2805         rec->found_link = 0;
2806
2807         /* Remove all backref including the valid ones */
2808         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810                                    backref->index, backref->name,
2811                                    backref->namelen, 0);
2812                 if (ret < 0)
2813                         goto out;
2814
2815                 /* remove invalid backref, so it won't be added back */
2816                 if (!(backref->found_dir_index &&
2817                       backref->found_dir_item &&
2818                       backref->found_inode_ref)) {
2819                         list_del(&backref->list);
2820                         free(backref);
2821                 } else {
2822                         rec->found_link++;
2823                 }
2824         }
2825
2826         /* Set nlink to 0 */
2827         key.objectid = rec->ino;
2828         key.type = BTRFS_INODE_ITEM_KEY;
2829         key.offset = 0;
2830         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2831         if (ret < 0)
2832                 goto out;
2833         if (ret > 0) {
2834                 ret = -ENOENT;
2835                 goto out;
2836         }
2837         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838                                     struct btrfs_inode_item);
2839         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840         btrfs_mark_buffer_dirty(path->nodes[0]);
2841         btrfs_release_path(path);
2842
2843         /*
2844          * Add back valid inode_ref/dir_item/dir_index,
2845          * add_link() will handle the nlink inc, so new nlink must be correct
2846          */
2847         list_for_each_entry(backref, &rec->backrefs, list) {
2848                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849                                      backref->name, backref->namelen,
2850                                      backref->filetype, &backref->index, 1);
2851                 if (ret < 0)
2852                         goto out;
2853         }
2854 out:
2855         btrfs_release_path(path);
2856         return ret;
2857 }
2858
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860                                 struct btrfs_root *root,
2861                                 struct btrfs_path *path,
2862                                 u64 *highest_ino)
2863 {
2864         struct btrfs_key key, found_key;
2865         int ret;
2866
2867         btrfs_init_path(path);
2868         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2869         key.offset = -1;
2870         key.type = BTRFS_INODE_ITEM_KEY;
2871         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2872         if (ret == 1) {
2873                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874                                 path->slots[0] - 1);
2875                 *highest_ino = found_key.objectid;
2876                 ret = 0;
2877         }
2878         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2879                 ret = -EOVERFLOW;
2880         btrfs_release_path(path);
2881         return ret;
2882 }
2883
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885                                struct btrfs_root *root,
2886                                struct btrfs_path *path,
2887                                struct inode_record *rec)
2888 {
2889         char *dir_name = "lost+found";
2890         char namebuf[BTRFS_NAME_LEN] = {0};
2891         u64 lost_found_ino;
2892         u32 mode = 0700;
2893         u8 type = 0;
2894         int namelen = 0;
2895         int name_recovered = 0;
2896         int type_recovered = 0;
2897         int ret = 0;
2898
2899         /*
2900          * Get file name and type first before these invalid inode ref
2901          * are deleted by remove_all_invalid_backref()
2902          */
2903         name_recovered = !find_file_name(rec, namebuf, &namelen);
2904         type_recovered = !find_file_type(rec, &type);
2905
2906         if (!name_recovered) {
2907                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908                        rec->ino, rec->ino);
2909                 namelen = count_digits(rec->ino);
2910                 sprintf(namebuf, "%llu", rec->ino);
2911                 name_recovered = 1;
2912         }
2913         if (!type_recovered) {
2914                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2915                        rec->ino);
2916                 type = BTRFS_FT_REG_FILE;
2917                 type_recovered = 1;
2918         }
2919
2920         ret = reset_nlink(trans, root, path, rec);
2921         if (ret < 0) {
2922                 fprintf(stderr,
2923                         "Failed to reset nlink for inode %llu: %s\n",
2924                         rec->ino, strerror(-ret));
2925                 goto out;
2926         }
2927
2928         if (rec->found_link == 0) {
2929                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2930                 if (ret < 0)
2931                         goto out;
2932                 lost_found_ino++;
2933                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2935                                   mode);
2936                 if (ret < 0) {
2937                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938                                 dir_name, strerror(-ret));
2939                         goto out;
2940                 }
2941                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942                                      namebuf, namelen, type, NULL, 1);
2943                 /*
2944                  * Add ".INO" suffix several times to handle case where
2945                  * "FILENAME.INO" is already taken by another file.
2946                  */
2947                 while (ret == -EEXIST) {
2948                         /*
2949                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2950                          */
2951                         if (namelen + count_digits(rec->ino) + 1 >
2952                             BTRFS_NAME_LEN) {
2953                                 ret = -EFBIG;
2954                                 goto out;
2955                         }
2956                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2957                                  ".%llu", rec->ino);
2958                         namelen += count_digits(rec->ino) + 1;
2959                         ret = btrfs_add_link(trans, root, rec->ino,
2960                                              lost_found_ino, namebuf,
2961                                              namelen, type, NULL, 1);
2962                 }
2963                 if (ret < 0) {
2964                         fprintf(stderr,
2965                                 "Failed to link the inode %llu to %s dir: %s\n",
2966                                 rec->ino, dir_name, strerror(-ret));
2967                         goto out;
2968                 }
2969                 /*
2970                  * Just increase the found_link, don't actually add the
2971                  * backref. This will make things easier and this inode
2972                  * record will be freed after the repair is done.
2973                  * So fsck will not report problem about this inode.
2974                  */
2975                 rec->found_link++;
2976                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977                        namelen, namebuf, dir_name);
2978         }
2979         printf("Fixed the nlink of inode %llu\n", rec->ino);
2980 out:
2981         /*
2982          * Clear the flag anyway, or we will loop forever for the same inode
2983          * as it will not be removed from the bad inode list and the dead loop
2984          * happens.
2985          */
2986         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987         btrfs_release_path(path);
2988         return ret;
2989 }
2990
2991 /*
2992  * Check if there is any normal(reg or prealloc) file extent for given
2993  * ino.
2994  * This is used to determine the file type when neither its dir_index/item or
2995  * inode_item exists.
2996  *
2997  * This will *NOT* report error, if any error happens, just consider it does
2998  * not have any normal file extent.
2999  */
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3001 {
3002         struct btrfs_path path;
3003         struct btrfs_key key;
3004         struct btrfs_key found_key;
3005         struct btrfs_file_extent_item *fi;
3006         u8 type;
3007         int ret = 0;
3008
3009         btrfs_init_path(&path);
3010         key.objectid = ino;
3011         key.type = BTRFS_EXTENT_DATA_KEY;
3012         key.offset = 0;
3013
3014         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3015         if (ret < 0) {
3016                 ret = 0;
3017                 goto out;
3018         }
3019         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020                 ret = btrfs_next_leaf(root, &path);
3021                 if (ret) {
3022                         ret = 0;
3023                         goto out;
3024                 }
3025         }
3026         while (1) {
3027                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3028                                       path.slots[0]);
3029                 if (found_key.objectid != ino ||
3030                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3031                         break;
3032                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033                                     struct btrfs_file_extent_item);
3034                 type = btrfs_file_extent_type(path.nodes[0], fi);
3035                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3036                         ret = 1;
3037                         goto out;
3038                 }
3039         }
3040 out:
3041         btrfs_release_path(&path);
3042         return ret;
3043 }
3044
3045 static u32 btrfs_type_to_imode(u8 type)
3046 {
3047         static u32 imode_by_btrfs_type[] = {
3048                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3049                 [BTRFS_FT_DIR]          = S_IFDIR,
3050                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3051                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3052                 [BTRFS_FT_FIFO]         = S_IFIFO,
3053                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3054                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3055         };
3056
3057         return imode_by_btrfs_type[(type)];
3058 }
3059
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061                                 struct btrfs_root *root,
3062                                 struct btrfs_path *path,
3063                                 struct inode_record *rec)
3064 {
3065         u8 filetype;
3066         u32 mode = 0700;
3067         int type_recovered = 0;
3068         int ret = 0;
3069
3070         printf("Trying to rebuild inode:%llu\n", rec->ino);
3071
3072         type_recovered = !find_file_type(rec, &filetype);
3073
3074         /*
3075          * Try to determine inode type if type not found.
3076          *
3077          * For found regular file extent, it must be FILE.
3078          * For found dir_item/index, it must be DIR.
3079          *
3080          * For undetermined one, use FILE as fallback.
3081          *
3082          * TODO:
3083          * 1. If found backref(inode_index/item is already handled) to it,
3084          *    it must be DIR.
3085          *    Need new inode-inode ref structure to allow search for that.
3086          */
3087         if (!type_recovered) {
3088                 if (rec->found_file_extent &&
3089                     find_normal_file_extent(root, rec->ino)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else if (rec->found_dir_item) {
3093                         type_recovered = 1;
3094                         filetype = BTRFS_FT_DIR;
3095                 } else if (!list_empty(&rec->orphan_extents)) {
3096                         type_recovered = 1;
3097                         filetype = BTRFS_FT_REG_FILE;
3098                 } else{
3099                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3100                                rec->ino);
3101                         type_recovered = 1;
3102                         filetype = BTRFS_FT_REG_FILE;
3103                 }
3104         }
3105
3106         ret = btrfs_new_inode(trans, root, rec->ino,
3107                               mode | btrfs_type_to_imode(filetype));
3108         if (ret < 0)
3109                 goto out;
3110
3111         /*
3112          * Here inode rebuild is done, we only rebuild the inode item,
3113          * don't repair the nlink(like move to lost+found).
3114          * That is the job of nlink repair.
3115          *
3116          * We just fill the record and return
3117          */
3118         rec->found_dir_item = 1;
3119         rec->imode = mode | btrfs_type_to_imode(filetype);
3120         rec->nlink = 0;
3121         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122         /* Ensure the inode_nlinks repair function will be called */
3123         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3124 out:
3125         return ret;
3126 }
3127
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129                                       struct btrfs_root *root,
3130                                       struct btrfs_path *path,
3131                                       struct inode_record *rec)
3132 {
3133         struct orphan_data_extent *orphan;
3134         struct orphan_data_extent *tmp;
3135         int ret = 0;
3136
3137         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3138                 /*
3139                  * Check for conflicting file extents
3140                  *
3141                  * Here we don't know whether the extents is compressed or not,
3142                  * so we can only assume it not compressed nor data offset,
3143                  * and use its disk_len as extent length.
3144                  */
3145                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146                                        orphan->offset, orphan->disk_len, 0);
3147                 btrfs_release_path(path);
3148                 if (ret < 0)
3149                         goto out;
3150                 if (!ret) {
3151                         fprintf(stderr,
3152                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153                                 orphan->disk_bytenr, orphan->disk_len);
3154                         ret = btrfs_free_extent(trans,
3155                                         root->fs_info->extent_root,
3156                                         orphan->disk_bytenr, orphan->disk_len,
3157                                         0, root->objectid, orphan->objectid,
3158                                         orphan->offset);
3159                         if (ret < 0)
3160                                 goto out;
3161                 }
3162                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163                                 orphan->offset, orphan->disk_bytenr,
3164                                 orphan->disk_len, orphan->disk_len);
3165                 if (ret < 0)
3166                         goto out;
3167
3168                 /* Update file size info */
3169                 rec->found_size += orphan->disk_len;
3170                 if (rec->found_size == rec->nbytes)
3171                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3172
3173                 /* Update the file extent hole info too */
3174                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3175                                            orphan->disk_len);
3176                 if (ret < 0)
3177                         goto out;
3178                 if (RB_EMPTY_ROOT(&rec->holes))
3179                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3180
3181                 list_del(&orphan->list);
3182                 free(orphan);
3183         }
3184         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3185 out:
3186         return ret;
3187 }
3188
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190                                         struct btrfs_root *root,
3191                                         struct btrfs_path *path,
3192                                         struct inode_record *rec)
3193 {
3194         struct rb_node *node;
3195         struct file_extent_hole *hole;
3196         int found = 0;
3197         int ret = 0;
3198
3199         node = rb_first(&rec->holes);
3200
3201         while (node) {
3202                 found = 1;
3203                 hole = rb_entry(node, struct file_extent_hole, node);
3204                 ret = btrfs_punch_hole(trans, root, rec->ino,
3205                                        hole->start, hole->len);
3206                 if (ret < 0)
3207                         goto out;
3208                 ret = del_file_extent_hole(&rec->holes, hole->start,
3209                                            hole->len);
3210                 if (ret < 0)
3211                         goto out;
3212                 if (RB_EMPTY_ROOT(&rec->holes))
3213                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214                 node = rb_first(&rec->holes);
3215         }
3216         /* special case for a file losing all its file extent */
3217         if (!found) {
3218                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219                                        round_up(rec->isize, root->sectorsize));
3220                 if (ret < 0)
3221                         goto out;
3222         }
3223         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224                rec->ino, root->objectid);
3225 out:
3226         return ret;
3227 }
3228
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3230 {
3231         struct btrfs_trans_handle *trans;
3232         struct btrfs_path path;
3233         int ret = 0;
3234
3235         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236                              I_ERR_NO_ORPHAN_ITEM |
3237                              I_ERR_LINK_COUNT_WRONG |
3238                              I_ERR_NO_INODE_ITEM |
3239                              I_ERR_FILE_EXTENT_ORPHAN |
3240                              I_ERR_FILE_EXTENT_DISCOUNT|
3241                              I_ERR_FILE_NBYTES_WRONG)))
3242                 return rec->errors;
3243
3244         /*
3245          * For nlink repair, it may create a dir and add link, so
3246          * 2 for parent(256)'s dir_index and dir_item
3247          * 2 for lost+found dir's inode_item and inode_ref
3248          * 1 for the new inode_ref of the file
3249          * 2 for lost+found dir's dir_index and dir_item for the file
3250          */
3251         trans = btrfs_start_transaction(root, 7);
3252         if (IS_ERR(trans))
3253                 return PTR_ERR(trans);
3254
3255         btrfs_init_path(&path);
3256         if (rec->errors & I_ERR_NO_INODE_ITEM)
3257                 ret = repair_inode_no_item(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263                 ret = repair_inode_isize(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267                 ret = repair_inode_nlinks(trans, root, &path, rec);
3268         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269                 ret = repair_inode_nbytes(trans, root, &path, rec);
3270         btrfs_commit_transaction(trans, root);
3271         btrfs_release_path(&path);
3272         return ret;
3273 }
3274
3275 static int check_inode_recs(struct btrfs_root *root,
3276                             struct cache_tree *inode_cache)
3277 {
3278         struct cache_extent *cache;
3279         struct ptr_node *node;
3280         struct inode_record *rec;
3281         struct inode_backref *backref;
3282         int stage = 0;
3283         int ret = 0;
3284         int err = 0;
3285         u64 error = 0;
3286         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3287
3288         if (btrfs_root_refs(&root->root_item) == 0) {
3289                 if (!cache_tree_empty(inode_cache))
3290                         fprintf(stderr, "warning line %d\n", __LINE__);
3291                 return 0;
3292         }
3293
3294         /*
3295          * We need to repair backrefs first because we could change some of the
3296          * errors in the inode recs.
3297          *
3298          * We also need to go through and delete invalid backrefs first and then
3299          * add the correct ones second.  We do this because we may get EEXIST
3300          * when adding back the correct index because we hadn't yet deleted the
3301          * invalid index.
3302          *
3303          * For example, if we were missing a dir index then the directories
3304          * isize would be wrong, so if we fixed the isize to what we thought it
3305          * would be and then fixed the backref we'd still have a invalid fs, so
3306          * we need to add back the dir index and then check to see if the isize
3307          * is still wrong.
3308          */
3309         while (stage < 3) {
3310                 stage++;
3311                 if (stage == 3 && !err)
3312                         break;
3313
3314                 cache = search_cache_extent(inode_cache, 0);
3315                 while (repair && cache) {
3316                         node = container_of(cache, struct ptr_node, cache);
3317                         rec = node->data;
3318                         cache = next_cache_extent(cache);
3319
3320                         /* Need to free everything up and rescan */
3321                         if (stage == 3) {
3322                                 remove_cache_extent(inode_cache, &node->cache);
3323                                 free(node);
3324                                 free_inode_rec(rec);
3325                                 continue;
3326                         }
3327
3328                         if (list_empty(&rec->backrefs))
3329                                 continue;
3330
3331                         ret = repair_inode_backrefs(root, rec, inode_cache,
3332                                                     stage == 1);
3333                         if (ret < 0) {
3334                                 err = ret;
3335                                 stage = 2;
3336                                 break;
3337                         } if (ret > 0) {
3338                                 err = -EAGAIN;
3339                         }
3340                 }
3341         }
3342         if (err)
3343                 return err;
3344
3345         rec = get_inode_rec(inode_cache, root_dirid, 0);
3346         BUG_ON(IS_ERR(rec));
3347         if (rec) {
3348                 ret = check_root_dir(rec);
3349                 if (ret) {
3350                         fprintf(stderr, "root %llu root dir %llu error\n",
3351                                 (unsigned long long)root->root_key.objectid,
3352                                 (unsigned long long)root_dirid);
3353                         print_inode_error(root, rec);
3354                         error++;
3355                 }
3356         } else {
3357                 if (repair) {
3358                         struct btrfs_trans_handle *trans;
3359
3360                         trans = btrfs_start_transaction(root, 1);
3361                         if (IS_ERR(trans)) {
3362                                 err = PTR_ERR(trans);
3363                                 return err;
3364                         }
3365
3366                         fprintf(stderr,
3367                                 "root %llu missing its root dir, recreating\n",
3368                                 (unsigned long long)root->objectid);
3369
3370                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3371                         BUG_ON(ret);
3372
3373                         btrfs_commit_transaction(trans, root);
3374                         return -EAGAIN;
3375                 }
3376
3377                 fprintf(stderr, "root %llu root dir %llu not found\n",
3378                         (unsigned long long)root->root_key.objectid,
3379                         (unsigned long long)root_dirid);
3380         }
3381
3382         while (1) {
3383                 cache = search_cache_extent(inode_cache, 0);
3384                 if (!cache)
3385                         break;
3386                 node = container_of(cache, struct ptr_node, cache);
3387                 rec = node->data;
3388                 remove_cache_extent(inode_cache, &node->cache);
3389                 free(node);
3390                 if (rec->ino == root_dirid ||
3391                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392                         free_inode_rec(rec);
3393                         continue;
3394                 }
3395
3396                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397                         ret = check_orphan_item(root, rec->ino);
3398                         if (ret == 0)
3399                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400                         if (can_free_inode_rec(rec)) {
3401                                 free_inode_rec(rec);
3402                                 continue;
3403                         }
3404                 }
3405
3406                 if (!rec->found_inode_item)
3407                         rec->errors |= I_ERR_NO_INODE_ITEM;
3408                 if (rec->found_link != rec->nlink)
3409                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3410                 if (repair) {
3411                         ret = try_repair_inode(root, rec);
3412                         if (ret == 0 && can_free_inode_rec(rec)) {
3413                                 free_inode_rec(rec);
3414                                 continue;
3415                         }
3416                         ret = 0;
3417                 }
3418
3419                 if (!(repair && ret == 0))
3420                         error++;
3421                 print_inode_error(root, rec);
3422                 list_for_each_entry(backref, &rec->backrefs, list) {
3423                         if (!backref->found_dir_item)
3424                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425                         if (!backref->found_dir_index)
3426                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427                         if (!backref->found_inode_ref)
3428                                 backref->errors |= REF_ERR_NO_INODE_REF;
3429                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430                                 " namelen %u name %s filetype %d errors %x",
3431                                 (unsigned long long)backref->dir,
3432                                 (unsigned long long)backref->index,
3433                                 backref->namelen, backref->name,
3434                                 backref->filetype, backref->errors);
3435                         print_ref_error(backref->errors);
3436                 }
3437                 free_inode_rec(rec);
3438         }
3439         return (error > 0) ? -1 : 0;
3440 }
3441
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3443                                         u64 objectid)
3444 {
3445         struct cache_extent *cache;
3446         struct root_record *rec = NULL;
3447         int ret;
3448
3449         cache = lookup_cache_extent(root_cache, objectid, 1);
3450         if (cache) {
3451                 rec = container_of(cache, struct root_record, cache);
3452         } else {
3453                 rec = calloc(1, sizeof(*rec));
3454                 if (!rec)
3455                         return ERR_PTR(-ENOMEM);
3456                 rec->objectid = objectid;
3457                 INIT_LIST_HEAD(&rec->backrefs);
3458                 rec->cache.start = objectid;
3459                 rec->cache.size = 1;
3460
3461                 ret = insert_cache_extent(root_cache, &rec->cache);
3462                 if (ret)
3463                         return ERR_PTR(-EEXIST);
3464         }
3465         return rec;
3466 }
3467
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469                                              u64 ref_root, u64 dir, u64 index,
3470                                              const char *name, int namelen)
3471 {
3472         struct root_backref *backref;
3473
3474         list_for_each_entry(backref, &rec->backrefs, list) {
3475                 if (backref->ref_root != ref_root || backref->dir != dir ||
3476                     backref->namelen != namelen)
3477                         continue;
3478                 if (memcmp(name, backref->name, namelen))
3479                         continue;
3480                 return backref;
3481         }
3482
3483         backref = calloc(1, sizeof(*backref) + namelen + 1);
3484         if (!backref)
3485                 return NULL;
3486         backref->ref_root = ref_root;
3487         backref->dir = dir;
3488         backref->index = index;
3489         backref->namelen = namelen;
3490         memcpy(backref->name, name, namelen);
3491         backref->name[namelen] = '\0';
3492         list_add_tail(&backref->list, &rec->backrefs);
3493         return backref;
3494 }
3495
3496 static void free_root_record(struct cache_extent *cache)
3497 {
3498         struct root_record *rec;
3499         struct root_backref *backref;
3500
3501         rec = container_of(cache, struct root_record, cache);
3502         while (!list_empty(&rec->backrefs)) {
3503                 backref = to_root_backref(rec->backrefs.next);
3504                 list_del(&backref->list);
3505                 free(backref);
3506         }
3507
3508         free(rec);
3509 }
3510
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3512
3513 static int add_root_backref(struct cache_tree *root_cache,
3514                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3515                             const char *name, int namelen,
3516                             int item_type, int errors)
3517 {
3518         struct root_record *rec;
3519         struct root_backref *backref;
3520
3521         rec = get_root_rec(root_cache, root_id);
3522         BUG_ON(IS_ERR(rec));
3523         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3524         BUG_ON(!backref);
3525
3526         backref->errors |= errors;
3527
3528         if (item_type != BTRFS_DIR_ITEM_KEY) {
3529                 if (backref->found_dir_index || backref->found_back_ref ||
3530                     backref->found_forward_ref) {
3531                         if (backref->index != index)
3532                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3533                 } else {
3534                         backref->index = index;
3535                 }
3536         }
3537
3538         if (item_type == BTRFS_DIR_ITEM_KEY) {
3539                 if (backref->found_forward_ref)
3540                         rec->found_ref++;
3541                 backref->found_dir_item = 1;
3542         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543                 backref->found_dir_index = 1;
3544         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545                 if (backref->found_forward_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3547                 else if (backref->found_dir_item)
3548                         rec->found_ref++;
3549                 backref->found_forward_ref = 1;
3550         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551                 if (backref->found_back_ref)
3552                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553                 backref->found_back_ref = 1;
3554         } else {
3555                 BUG_ON(1);
3556         }
3557
3558         if (backref->found_forward_ref && backref->found_dir_item)
3559                 backref->reachable = 1;
3560         return 0;
3561 }
3562
3563 static int merge_root_recs(struct btrfs_root *root,
3564                            struct cache_tree *src_cache,
3565                            struct cache_tree *dst_cache)
3566 {
3567         struct cache_extent *cache;
3568         struct ptr_node *node;
3569         struct inode_record *rec;
3570         struct inode_backref *backref;
3571         int ret = 0;
3572
3573         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574                 free_inode_recs_tree(src_cache);
3575                 return 0;
3576         }
3577
3578         while (1) {
3579                 cache = search_cache_extent(src_cache, 0);
3580                 if (!cache)
3581                         break;
3582                 node = container_of(cache, struct ptr_node, cache);
3583                 rec = node->data;
3584                 remove_cache_extent(src_cache, &node->cache);
3585                 free(node);
3586
3587                 ret = is_child_root(root, root->objectid, rec->ino);
3588                 if (ret < 0)
3589                         break;
3590                 else if (ret == 0)
3591                         goto skip;
3592
3593                 list_for_each_entry(backref, &rec->backrefs, list) {
3594                         BUG_ON(backref->found_inode_ref);
3595                         if (backref->found_dir_item)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3600                                         backref->errors);
3601                         if (backref->found_dir_index)
3602                                 add_root_backref(dst_cache, rec->ino,
3603                                         root->root_key.objectid, backref->dir,
3604                                         backref->index, backref->name,
3605                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3606                                         backref->errors);
3607                 }
3608 skip:
3609                 free_inode_rec(rec);
3610         }
3611         if (ret < 0)
3612                 return ret;
3613         return 0;
3614 }
3615
3616 static int check_root_refs(struct btrfs_root *root,
3617                            struct cache_tree *root_cache)
3618 {
3619         struct root_record *rec;
3620         struct root_record *ref_root;
3621         struct root_backref *backref;
3622         struct cache_extent *cache;
3623         int loop = 1;
3624         int ret;
3625         int error;
3626         int errors = 0;
3627
3628         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629         BUG_ON(IS_ERR(rec));
3630         rec->found_ref = 1;
3631
3632         /* fixme: this can not detect circular references */
3633         while (loop) {
3634                 loop = 0;
3635                 cache = search_cache_extent(root_cache, 0);
3636                 while (1) {
3637                         if (!cache)
3638                                 break;
3639                         rec = container_of(cache, struct root_record, cache);
3640                         cache = next_cache_extent(cache);
3641
3642                         if (rec->found_ref == 0)
3643                                 continue;
3644
3645                         list_for_each_entry(backref, &rec->backrefs, list) {
3646                                 if (!backref->reachable)
3647                                         continue;
3648
3649                                 ref_root = get_root_rec(root_cache,
3650                                                         backref->ref_root);
3651                                 BUG_ON(IS_ERR(ref_root));
3652                                 if (ref_root->found_ref > 0)
3653                                         continue;
3654
3655                                 backref->reachable = 0;
3656                                 rec->found_ref--;
3657                                 if (rec->found_ref == 0)
3658                                         loop = 1;
3659                         }
3660                 }
3661         }
3662
3663         cache = search_cache_extent(root_cache, 0);
3664         while (1) {
3665                 if (!cache)
3666                         break;
3667                 rec = container_of(cache, struct root_record, cache);
3668                 cache = next_cache_extent(cache);
3669
3670                 if (rec->found_ref == 0 &&
3671                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673                         ret = check_orphan_item(root->fs_info->tree_root,
3674                                                 rec->objectid);
3675                         if (ret == 0)
3676                                 continue;
3677
3678                         /*
3679                          * If we don't have a root item then we likely just have
3680                          * a dir item in a snapshot for this root but no actual
3681                          * ref key or anything so it's meaningless.
3682                          */
3683                         if (!rec->found_root_item)
3684                                 continue;
3685                         errors++;
3686                         fprintf(stderr, "fs tree %llu not referenced\n",
3687                                 (unsigned long long)rec->objectid);
3688                 }
3689
3690                 error = 0;
3691                 if (rec->found_ref > 0 && !rec->found_root_item)
3692                         error = 1;
3693                 list_for_each_entry(backref, &rec->backrefs, list) {
3694                         if (!backref->found_dir_item)
3695                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696                         if (!backref->found_dir_index)
3697                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698                         if (!backref->found_back_ref)
3699                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700                         if (!backref->found_forward_ref)
3701                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3702                         if (backref->reachable && backref->errors)
3703                                 error = 1;
3704                 }
3705                 if (!error)
3706                         continue;
3707
3708                 errors++;
3709                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710                         (unsigned long long)rec->objectid, rec->found_ref,
3711                          rec->found_root_item ? "" : "not found");
3712
3713                 list_for_each_entry(backref, &rec->backrefs, list) {
3714                         if (!backref->reachable)
3715                                 continue;
3716                         if (!backref->errors && rec->found_root_item)
3717                                 continue;
3718                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719                                 " index %llu namelen %u name %s errors %x\n",
3720                                 (unsigned long long)backref->ref_root,
3721                                 (unsigned long long)backref->dir,
3722                                 (unsigned long long)backref->index,
3723                                 backref->namelen, backref->name,
3724                                 backref->errors);
3725                         print_ref_error(backref->errors);
3726                 }
3727         }
3728         return errors > 0 ? 1 : 0;
3729 }
3730
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732                             struct btrfs_key *key,
3733                             struct cache_tree *root_cache)
3734 {
3735         u64 dirid;
3736         u64 index;
3737         u32 len;
3738         u32 name_len;
3739         struct btrfs_root_ref *ref;
3740         char namebuf[BTRFS_NAME_LEN];
3741         int error;
3742
3743         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3744
3745         dirid = btrfs_root_ref_dirid(eb, ref);
3746         index = btrfs_root_ref_sequence(eb, ref);
3747         name_len = btrfs_root_ref_name_len(eb, ref);
3748
3749         if (name_len <= BTRFS_NAME_LEN) {
3750                 len = name_len;
3751                 error = 0;
3752         } else {
3753                 len = BTRFS_NAME_LEN;
3754                 error = REF_ERR_NAME_TOO_LONG;
3755         }
3756         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3757
3758         if (key->type == BTRFS_ROOT_REF_KEY) {
3759                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760                                  index, namebuf, len, key->type, error);
3761         } else {
3762                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763                                  index, namebuf, len, key->type, error);
3764         }
3765         return 0;
3766 }
3767
3768 static void free_corrupt_block(struct cache_extent *cache)
3769 {
3770         struct btrfs_corrupt_block *corrupt;
3771
3772         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3773         free(corrupt);
3774 }
3775
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3777
3778 /*
3779  * Repair the btree of the given root.
3780  *
3781  * The fix is to remove the node key in corrupt_blocks cache_tree.
3782  * and rebalance the tree.
3783  * After the fix, the btree should be writeable.
3784  */
3785 static int repair_btree(struct btrfs_root *root,
3786                         struct cache_tree *corrupt_blocks)
3787 {
3788         struct btrfs_trans_handle *trans;
3789         struct btrfs_path path;
3790         struct btrfs_corrupt_block *corrupt;
3791         struct cache_extent *cache;
3792         struct btrfs_key key;
3793         u64 offset;
3794         int level;
3795         int ret = 0;
3796
3797         if (cache_tree_empty(corrupt_blocks))
3798                 return 0;
3799
3800         trans = btrfs_start_transaction(root, 1);
3801         if (IS_ERR(trans)) {
3802                 ret = PTR_ERR(trans);
3803                 fprintf(stderr, "Error starting transaction: %s\n",
3804                         strerror(-ret));
3805                 return ret;
3806         }
3807         btrfs_init_path(&path);
3808         cache = first_cache_extent(corrupt_blocks);
3809         while (cache) {
3810                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3811                                        cache);
3812                 level = corrupt->level;
3813                 path.lowest_level = level;
3814                 key.objectid = corrupt->key.objectid;
3815                 key.type = corrupt->key.type;
3816                 key.offset = corrupt->key.offset;
3817
3818                 /*
3819                  * Here we don't want to do any tree balance, since it may
3820                  * cause a balance with corrupted brother leaf/node,
3821                  * so ins_len set to 0 here.
3822                  * Balance will be done after all corrupt node/leaf is deleted.
3823                  */
3824                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3825                 if (ret < 0)
3826                         goto out;
3827                 offset = btrfs_node_blockptr(path.nodes[level],
3828                                              path.slots[level]);
3829
3830                 /* Remove the ptr */
3831                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3832                 if (ret < 0)
3833                         goto out;
3834                 /*
3835                  * Remove the corresponding extent
3836                  * return value is not concerned.
3837                  */
3838                 btrfs_release_path(&path);
3839                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840                                         0, root->root_key.objectid,
3841                                         level - 1, 0);
3842                 cache = next_cache_extent(cache);
3843         }
3844
3845         /* Balance the btree using btrfs_search_slot() */
3846         cache = first_cache_extent(corrupt_blocks);
3847         while (cache) {
3848                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3849                                        cache);
3850                 memcpy(&key, &corrupt->key, sizeof(key));
3851                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3852                 if (ret < 0)
3853                         goto out;
3854                 /* return will always >0 since it won't find the item */
3855                 ret = 0;
3856                 btrfs_release_path(&path);
3857                 cache = next_cache_extent(cache);
3858         }
3859 out:
3860         btrfs_commit_transaction(trans, root);
3861         btrfs_release_path(&path);
3862         return ret;
3863 }
3864
3865 static int check_fs_root(struct btrfs_root *root,
3866                          struct cache_tree *root_cache,
3867                          struct walk_control *wc)
3868 {
3869         int ret = 0;
3870         int err = 0;
3871         int wret;
3872         int level;
3873         struct btrfs_path path;
3874         struct shared_node root_node;
3875         struct root_record *rec;
3876         struct btrfs_root_item *root_item = &root->root_item;
3877         struct cache_tree corrupt_blocks;
3878         struct orphan_data_extent *orphan;
3879         struct orphan_data_extent *tmp;
3880         enum btrfs_tree_block_status status;
3881         struct node_refs nrefs;
3882
3883         /*
3884          * Reuse the corrupt_block cache tree to record corrupted tree block
3885          *
3886          * Unlike the usage in extent tree check, here we do it in a per
3887          * fs/subvol tree base.
3888          */
3889         cache_tree_init(&corrupt_blocks);
3890         root->fs_info->corrupt_blocks = &corrupt_blocks;
3891
3892         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893                 rec = get_root_rec(root_cache, root->root_key.objectid);
3894                 BUG_ON(IS_ERR(rec));
3895                 if (btrfs_root_refs(root_item) > 0)
3896                         rec->found_root_item = 1;
3897         }
3898
3899         btrfs_init_path(&path);
3900         memset(&root_node, 0, sizeof(root_node));
3901         cache_tree_init(&root_node.root_cache);
3902         cache_tree_init(&root_node.inode_cache);
3903         memset(&nrefs, 0, sizeof(nrefs));
3904
3905         /* Move the orphan extent record to corresponding inode_record */
3906         list_for_each_entry_safe(orphan, tmp,
3907                                  &root->orphan_data_extents, list) {
3908                 struct inode_record *inode;
3909
3910                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3911                                       1);
3912                 BUG_ON(IS_ERR(inode));
3913                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914                 list_move(&orphan->list, &inode->orphan_extents);
3915         }
3916
3917         level = btrfs_header_level(root->node);
3918         memset(wc->nodes, 0, sizeof(wc->nodes));
3919         wc->nodes[level] = &root_node;
3920         wc->active_node = level;
3921         wc->root_level = level;
3922
3923         /* We may not have checked the root block, lets do that now */
3924         if (btrfs_is_leaf(root->node))
3925                 status = btrfs_check_leaf(root, NULL, root->node);
3926         else
3927                 status = btrfs_check_node(root, NULL, root->node);
3928         if (status != BTRFS_TREE_BLOCK_CLEAN)
3929                 return -EIO;
3930
3931         if (btrfs_root_refs(root_item) > 0 ||
3932             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933                 path.nodes[level] = root->node;
3934                 extent_buffer_get(root->node);
3935                 path.slots[level] = 0;
3936         } else {
3937                 struct btrfs_key key;
3938                 struct btrfs_disk_key found_key;
3939
3940                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941                 level = root_item->drop_level;
3942                 path.lowest_level = level;
3943                 if (level > btrfs_header_level(root->node) ||
3944                     level >= BTRFS_MAX_LEVEL) {
3945                         error("ignoring invalid drop level: %u", level);
3946                         goto skip_walking;
3947                 }
3948                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3949                 if (wret < 0)
3950                         goto skip_walking;
3951                 btrfs_node_key(path.nodes[level], &found_key,
3952                                 path.slots[level]);
3953                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954                                         sizeof(found_key)));
3955         }
3956
3957         while (1) {
3958                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963
3964                 wret = walk_up_tree(root, &path, wc, &level);
3965                 if (wret < 0)
3966                         ret = wret;
3967                 if (wret != 0)
3968                         break;
3969         }
3970 skip_walking:
3971         btrfs_release_path(&path);
3972
3973         if (!cache_tree_empty(&corrupt_blocks)) {
3974                 struct cache_extent *cache;
3975                 struct btrfs_corrupt_block *corrupt;
3976
3977                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978                        root->root_key.objectid);
3979                 cache = first_cache_extent(&corrupt_blocks);
3980                 while (cache) {
3981                         corrupt = container_of(cache,
3982                                                struct btrfs_corrupt_block,
3983                                                cache);
3984                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985                                cache->start, corrupt->level,
3986                                corrupt->key.objectid, corrupt->key.type,
3987                                corrupt->key.offset);
3988                         cache = next_cache_extent(cache);
3989                 }
3990                 if (repair) {
3991                         printf("Try to repair the btree for root %llu\n",
3992                                root->root_key.objectid);
3993                         ret = repair_btree(root, &corrupt_blocks);
3994                         if (ret < 0)
3995                                 fprintf(stderr, "Failed to repair btree: %s\n",
3996                                         strerror(-ret));
3997                         if (!ret)
3998                                 printf("Btree for root %llu is fixed\n",
3999                                        root->root_key.objectid);
4000                 }
4001         }
4002
4003         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4004         if (err < 0)
4005                 ret = err;
4006
4007         if (root_node.current) {
4008                 root_node.current->checked = 1;
4009                 maybe_free_inode_rec(&root_node.inode_cache,
4010                                 root_node.current);
4011         }
4012
4013         err = check_inode_recs(root, &root_node.inode_cache);
4014         if (!ret)
4015                 ret = err;
4016
4017         free_corrupt_blocks_tree(&corrupt_blocks);
4018         root->fs_info->corrupt_blocks = NULL;
4019         free_orphan_data_extents(&root->orphan_data_extents);
4020         return ret;
4021 }
4022
4023 static int fs_root_objectid(u64 objectid)
4024 {
4025         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4027                 return 1;
4028         return is_fstree(objectid);
4029 }
4030
4031 static int check_fs_roots(struct btrfs_root *root,
4032                           struct cache_tree *root_cache)
4033 {
4034         struct btrfs_path path;
4035         struct btrfs_key key;
4036         struct walk_control wc;
4037         struct extent_buffer *leaf, *tree_node;
4038         struct btrfs_root *tmp_root;
4039         struct btrfs_root *tree_root = root->fs_info->tree_root;
4040         int ret;
4041         int err = 0;
4042
4043         if (ctx.progress_enabled) {
4044                 ctx.tp = TASK_FS_ROOTS;
4045                 task_start(ctx.info);
4046         }
4047
4048         /*
4049          * Just in case we made any changes to the extent tree that weren't
4050          * reflected into the free space cache yet.
4051          */
4052         if (repair)
4053                 reset_cached_block_groups(root->fs_info);
4054         memset(&wc, 0, sizeof(wc));
4055         cache_tree_init(&wc.shared);
4056         btrfs_init_path(&path);
4057
4058 again:
4059         key.offset = 0;
4060         key.objectid = 0;
4061         key.type = BTRFS_ROOT_ITEM_KEY;
4062         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4063         if (ret < 0) {
4064                 err = 1;
4065                 goto out;
4066         }
4067         tree_node = tree_root->node;
4068         while (1) {
4069                 if (tree_node != tree_root->node) {
4070                         free_root_recs_tree(root_cache);
4071                         btrfs_release_path(&path);
4072                         goto again;
4073                 }
4074                 leaf = path.nodes[0];
4075                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076                         ret = btrfs_next_leaf(tree_root, &path);
4077                         if (ret) {
4078                                 if (ret < 0)
4079                                         err = 1;
4080                                 break;
4081                         }
4082                         leaf = path.nodes[0];
4083                 }
4084                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086                     fs_root_objectid(key.objectid)) {
4087                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088                                 tmp_root = btrfs_read_fs_root_no_cache(
4089                                                 root->fs_info, &key);
4090                         } else {
4091                                 key.offset = (u64)-1;
4092                                 tmp_root = btrfs_read_fs_root(
4093                                                 root->fs_info, &key);
4094                         }
4095                         if (IS_ERR(tmp_root)) {
4096                                 err = 1;
4097                                 goto next;
4098                         }
4099                         ret = check_fs_root(tmp_root, root_cache, &wc);
4100                         if (ret == -EAGAIN) {
4101                                 free_root_recs_tree(root_cache);
4102                                 btrfs_release_path(&path);
4103                                 goto again;
4104                         }
4105                         if (ret)
4106                                 err = 1;
4107                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108                                 btrfs_free_fs_root(tmp_root);
4109                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4111                         process_root_ref(leaf, path.slots[0], &key,
4112                                          root_cache);
4113                 }
4114 next:
4115                 path.slots[0]++;
4116         }
4117 out:
4118         btrfs_release_path(&path);
4119         if (err)
4120                 free_extent_cache_tree(&wc.shared);
4121         if (!cache_tree_empty(&wc.shared))
4122                 fprintf(stderr, "warning line %d\n", __LINE__);
4123
4124         task_stop(ctx.info);
4125
4126         return err;
4127 }
4128
4129 /*
4130  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131  * INODE_REF/INODE_EXTREF match.
4132  *
4133  * @root:       the root of the fs/file tree
4134  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4135  * @key:        the key of the DIR_ITEM/DIR_INDEX
4136  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4137  *              distinguish root_dir between normal dir/file
4138  * @name:       the name in the INODE_REF/INODE_EXTREF
4139  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4140  * @mode:       the st_mode of INODE_ITEM
4141  *
4142  * Return 0 if no error occurred.
4143  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4145  * dir/file.
4146  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147  * not match for normal dir/file.
4148  */
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150                          struct btrfs_key *key, u64 index, char *name,
4151                          u32 namelen, u32 mode)
4152 {
4153         struct btrfs_path path;
4154         struct extent_buffer *node;
4155         struct btrfs_dir_item *di;
4156         struct btrfs_key location;
4157         char namebuf[BTRFS_NAME_LEN] = {0};
4158         u32 total;
4159         u32 cur = 0;
4160         u32 len;
4161         u32 name_len;
4162         u32 data_len;
4163         u8 filetype;
4164         int slot;
4165         int ret;
4166
4167         btrfs_init_path(&path);
4168         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4169         if (ret < 0) {
4170                 ret = DIR_ITEM_MISSING;
4171                 goto out;
4172         }
4173
4174         /* Process root dir and goto out*/
4175         if (index == 0) {
4176                 if (ret == 0) {
4177                         ret = ROOT_DIR_ERROR;
4178                         error(
4179                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4180                                 root->objectid,
4181                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4182                                         "REF" : "EXTREF",
4183                                 ref_key->objectid, ref_key->offset,
4184                                 key->type == BTRFS_DIR_ITEM_KEY ?
4185                                         "DIR_ITEM" : "DIR_INDEX");
4186                 } else {
4187                         ret = 0;
4188                 }
4189
4190                 goto out;
4191         }
4192
4193         /* Process normal file/dir */
4194         if (ret > 0) {
4195                 ret = DIR_ITEM_MISSING;
4196                 error(
4197                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4198                         root->objectid,
4199                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200                         ref_key->objectid, ref_key->offset,
4201                         key->type == BTRFS_DIR_ITEM_KEY ?
4202                                 "DIR_ITEM" : "DIR_INDEX",
4203                         key->objectid, key->offset, namelen, name,
4204                         imode_to_type(mode));
4205                 goto out;
4206         }
4207
4208         /* Check whether inode_id/filetype/name match */
4209         node = path.nodes[0];
4210         slot = path.slots[0];
4211         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212         total = btrfs_item_size_nr(node, slot);
4213         while (cur < total) {
4214                 ret = DIR_ITEM_MISMATCH;
4215                 name_len = btrfs_dir_name_len(node, di);
4216                 data_len = btrfs_dir_data_len(node, di);
4217
4218                 btrfs_dir_item_key_to_cpu(node, di, &location);
4219                 if (location.objectid != ref_key->objectid ||
4220                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4221                     location.offset != 0)
4222                         goto next;
4223
4224                 filetype = btrfs_dir_type(node, di);
4225                 if (imode_to_type(mode) != filetype)
4226                         goto next;
4227
4228                 if (name_len <= BTRFS_NAME_LEN) {
4229                         len = name_len;
4230                 } else {
4231                         len = BTRFS_NAME_LEN;
4232                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4233                         root->objectid,
4234                         key->type == BTRFS_DIR_ITEM_KEY ?
4235                         "DIR_ITEM" : "DIR_INDEX",
4236                         key->objectid, key->offset, name_len);
4237                 }
4238                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239                 if (len != namelen || strncmp(namebuf, name, len))
4240                         goto next;
4241
4242                 ret = 0;
4243                 goto out;
4244 next:
4245                 len = sizeof(*di) + name_len + data_len;
4246                 di = (struct btrfs_dir_item *)((char *)di + len);
4247                 cur += len;
4248         }
4249         if (ret == DIR_ITEM_MISMATCH)
4250                 error(
4251                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4252                         root->objectid,
4253                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254                         ref_key->objectid, ref_key->offset,
4255                         key->type == BTRFS_DIR_ITEM_KEY ?
4256                                 "DIR_ITEM" : "DIR_INDEX",
4257                         key->objectid, key->offset, namelen, name,
4258                         imode_to_type(mode));
4259 out:
4260         btrfs_release_path(&path);
4261         return ret;
4262 }
4263
4264 /*
4265  * Traverse the given INODE_REF and call find_dir_item() to find related
4266  * DIR_ITEM/DIR_INDEX.
4267  *
4268  * @root:       the root of the fs/file tree
4269  * @ref_key:    the key of the INODE_REF
4270  * @refs:       the count of INODE_REF
4271  * @mode:       the st_mode of INODE_ITEM
4272  *
4273  * Return 0 if no error occurred.
4274  */
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276                            struct extent_buffer *node, int slot, u64 *refs,
4277                            int mode)
4278 {
4279         struct btrfs_key key;
4280         struct btrfs_inode_ref *ref;
4281         char namebuf[BTRFS_NAME_LEN] = {0};
4282         u32 total;
4283         u32 cur = 0;
4284         u32 len;
4285         u32 name_len;
4286         u64 index;
4287         int ret, err = 0;
4288
4289         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290         total = btrfs_item_size_nr(node, slot);
4291
4292 next:
4293         /* Update inode ref count */
4294         (*refs)++;
4295
4296         index = btrfs_inode_ref_index(node, ref);
4297         name_len = btrfs_inode_ref_name_len(node, ref);
4298         if (name_len <= BTRFS_NAME_LEN) {
4299                 len = name_len;
4300         } else {
4301                 len = BTRFS_NAME_LEN;
4302                 warning("root %llu INODE_REF[%llu %llu] name too long",
4303                         root->objectid, ref_key->objectid, ref_key->offset);
4304         }
4305
4306         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4307
4308         /* Check root dir ref name */
4309         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311                       root->objectid, ref_key->objectid, ref_key->offset,
4312                       namebuf);
4313                 err |= ROOT_DIR_ERROR;
4314         }
4315
4316         /* Find related DIR_INDEX */
4317         key.objectid = ref_key->offset;
4318         key.type = BTRFS_DIR_INDEX_KEY;
4319         key.offset = index;
4320         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4321         err |= ret;
4322
4323         /* Find related dir_item */
4324         key.objectid = ref_key->offset;
4325         key.type = BTRFS_DIR_ITEM_KEY;
4326         key.offset = btrfs_name_hash(namebuf, len);
4327         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4328         err |= ret;
4329
4330         len = sizeof(*ref) + name_len;
4331         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4332         cur += len;
4333         if (cur < total)
4334                 goto next;
4335
4336         return err;
4337 }
4338
4339 /*
4340  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341  * DIR_ITEM/DIR_INDEX.
4342  *
4343  * @root:       the root of the fs/file tree
4344  * @ref_key:    the key of the INODE_EXTREF
4345  * @refs:       the count of INODE_EXTREF
4346  * @mode:       the st_mode of INODE_ITEM
4347  *
4348  * Return 0 if no error occurred.
4349  */
4350 static int check_inode_extref(struct btrfs_root *root,
4351                               struct btrfs_key *ref_key,
4352                               struct extent_buffer *node, int slot, u64 *refs,
4353                               int mode)
4354 {
4355         struct btrfs_key key;
4356         struct btrfs_inode_extref *extref;
4357         char namebuf[BTRFS_NAME_LEN] = {0};
4358         u32 total;
4359         u32 cur = 0;
4360         u32 len;
4361         u32 name_len;
4362         u64 index;
4363         u64 parent;
4364         int ret;
4365         int err = 0;
4366
4367         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368         total = btrfs_item_size_nr(node, slot);
4369
4370 next:
4371         /* update inode ref count */
4372         (*refs)++;
4373         name_len = btrfs_inode_extref_name_len(node, extref);
4374         index = btrfs_inode_extref_index(node, extref);
4375         parent = btrfs_inode_extref_parent(node, extref);
4376         if (name_len <= BTRFS_NAME_LEN) {
4377                 len = name_len;
4378         } else {
4379                 len = BTRFS_NAME_LEN;
4380                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381                         root->objectid, ref_key->objectid, ref_key->offset);
4382         }
4383         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4384
4385         /* Check root dir ref name */
4386         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388                       root->objectid, ref_key->objectid, ref_key->offset,
4389                       namebuf);
4390                 err |= ROOT_DIR_ERROR;
4391         }
4392
4393         /* find related dir_index */
4394         key.objectid = parent;
4395         key.type = BTRFS_DIR_INDEX_KEY;
4396         key.offset = index;
4397         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4398         err |= ret;
4399
4400         /* find related dir_item */
4401         key.objectid = parent;
4402         key.type = BTRFS_DIR_ITEM_KEY;
4403         key.offset = btrfs_name_hash(namebuf, len);
4404         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4405         err |= ret;
4406
4407         len = sizeof(*extref) + name_len;
4408         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4409         cur += len;
4410
4411         if (cur < total)
4412                 goto next;
4413
4414         return err;
4415 }
4416
4417 /*
4418  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419  * DIR_ITEM/DIR_INDEX match.
4420  *
4421  * @root:       the root of the fs/file tree
4422  * @key:        the key of the INODE_REF/INODE_EXTREF
4423  * @name:       the name in the INODE_REF/INODE_EXTREF
4424  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4425  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4426  * to (u64)-1
4427  * @ext_ref:    the EXTENDED_IREF feature
4428  *
4429  * Return 0 if no error occurred.
4430  * Return >0 for error bitmap
4431  */
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433                           char *name, int namelen, u64 index,
4434                           unsigned int ext_ref)
4435 {
4436         struct btrfs_path path;
4437         struct btrfs_inode_ref *ref;
4438         struct btrfs_inode_extref *extref;
4439         struct extent_buffer *node;
4440         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4441         u32 total;
4442         u32 cur = 0;
4443         u32 len;
4444         u32 ref_namelen;
4445         u64 ref_index;
4446         u64 parent;
4447         u64 dir_id;
4448         int slot;
4449         int ret;
4450
4451         btrfs_init_path(&path);
4452         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4453         if (ret) {
4454                 ret = INODE_REF_MISSING;
4455                 goto extref;
4456         }
4457
4458         node = path.nodes[0];
4459         slot = path.slots[0];
4460
4461         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462         total = btrfs_item_size_nr(node, slot);
4463
4464         /* Iterate all entry of INODE_REF */
4465         while (cur < total) {
4466                 ret = INODE_REF_MISSING;
4467
4468                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469                 ref_index = btrfs_inode_ref_index(node, ref);
4470                 if (index != (u64)-1 && index != ref_index)
4471                         goto next_ref;
4472
4473                 if (ref_namelen <= BTRFS_NAME_LEN) {
4474                         len = ref_namelen;
4475                 } else {
4476                         len = BTRFS_NAME_LEN;
4477                         warning("root %llu INODE %s[%llu %llu] name too long",
4478                                 root->objectid,
4479                                 key->type == BTRFS_INODE_REF_KEY ?
4480                                         "REF" : "EXTREF",
4481                                 key->objectid, key->offset);
4482                 }
4483                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4484                                    len);
4485
4486                 if (len != namelen || strncmp(ref_namebuf, name, len))
4487                         goto next_ref;
4488
4489                 ret = 0;
4490                 goto out;
4491 next_ref:
4492                 len = sizeof(*ref) + ref_namelen;
4493                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4494                 cur += len;
4495         }
4496
4497 extref:
4498         /* Skip if not support EXTENDED_IREF feature */
4499         if (!ext_ref)
4500                 goto out;
4501
4502         btrfs_release_path(&path);
4503         btrfs_init_path(&path);
4504
4505         dir_id = key->offset;
4506         key->type = BTRFS_INODE_EXTREF_KEY;
4507         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4508
4509         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4510         if (ret) {
4511                 ret = INODE_REF_MISSING;
4512                 goto out;
4513         }
4514
4515         node = path.nodes[0];
4516         slot = path.slots[0];
4517
4518         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4519         cur = 0;
4520         total = btrfs_item_size_nr(node, slot);
4521
4522         /* Iterate all entry of INODE_EXTREF */
4523         while (cur < total) {
4524                 ret = INODE_REF_MISSING;
4525
4526                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527                 ref_index = btrfs_inode_extref_index(node, extref);
4528                 parent = btrfs_inode_extref_parent(node, extref);
4529                 if (index != (u64)-1 && index != ref_index)
4530                         goto next_extref;
4531
4532                 if (parent != dir_id)
4533                         goto next_extref;
4534
4535                 if (ref_namelen <= BTRFS_NAME_LEN) {
4536                         len = ref_namelen;
4537                 } else {
4538                         len = BTRFS_NAME_LEN;
4539                         warning("root %llu INODE %s[%llu %llu] name too long",
4540                                 root->objectid,
4541                                 key->type == BTRFS_INODE_REF_KEY ?
4542                                         "REF" : "EXTREF",
4543                                 key->objectid, key->offset);
4544                 }
4545                 read_extent_buffer(node, ref_namebuf,
4546                                    (unsigned long)(extref + 1), len);
4547
4548                 if (len != namelen || strncmp(ref_namebuf, name, len))
4549                         goto next_extref;
4550
4551                 ret = 0;
4552                 goto out;
4553
4554 next_extref:
4555                 len = sizeof(*extref) + ref_namelen;
4556                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4557                 cur += len;
4558
4559         }
4560 out:
4561         btrfs_release_path(&path);
4562         return ret;
4563 }
4564
4565 /*
4566  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4568  *
4569  * @root:       the root of the fs/file tree
4570  * @key:        the key of the INODE_REF/INODE_EXTREF
4571  * @size:       the st_size of the INODE_ITEM
4572  * @ext_ref:    the EXTENDED_IREF feature
4573  *
4574  * Return 0 if no error occurred.
4575  */
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577                           struct extent_buffer *node, int slot, u64 *size,
4578                           unsigned int ext_ref)
4579 {
4580         struct btrfs_dir_item *di;
4581         struct btrfs_inode_item *ii;
4582         struct btrfs_path path;
4583         struct btrfs_key location;
4584         char namebuf[BTRFS_NAME_LEN] = {0};
4585         u32 total;
4586         u32 cur = 0;
4587         u32 len;
4588         u32 name_len;
4589         u32 data_len;
4590         u8 filetype;
4591         u32 mode;
4592         u64 index;
4593         int ret;
4594         int err = 0;
4595
4596         /*
4597          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598          * ignore index check.
4599          */
4600         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4601
4602         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603         total = btrfs_item_size_nr(node, slot);
4604
4605         while (cur < total) {
4606                 data_len = btrfs_dir_data_len(node, di);
4607                 if (data_len)
4608                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610                               "DIR_ITEM" : "DIR_INDEX",
4611                               key->objectid, key->offset, data_len);
4612
4613                 name_len = btrfs_dir_name_len(node, di);
4614                 if (name_len <= BTRFS_NAME_LEN) {
4615                         len = name_len;
4616                 } else {
4617                         len = BTRFS_NAME_LEN;
4618                         warning("root %llu %s[%llu %llu] name too long",
4619                                 root->objectid,
4620                                 key->type == BTRFS_DIR_ITEM_KEY ?
4621                                 "DIR_ITEM" : "DIR_INDEX",
4622                                 key->objectid, key->offset);
4623                 }
4624                 (*size) += name_len;
4625
4626                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627                 filetype = btrfs_dir_type(node, di);
4628
4629                 btrfs_init_path(&path);
4630                 btrfs_dir_item_key_to_cpu(node, di, &location);
4631
4632                 /* Ignore related ROOT_ITEM check */
4633                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4634                         goto next;
4635
4636                 /* Check relative INODE_ITEM(existence/filetype) */
4637                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4638                 if (ret) {
4639                         err |= INODE_ITEM_MISSING;
4640                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643                               key->offset, location.objectid, name_len,
4644                               namebuf, filetype);
4645                         goto next;
4646                 }
4647
4648                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649                                     struct btrfs_inode_item);
4650                 mode = btrfs_inode_mode(path.nodes[0], ii);
4651
4652                 if (imode_to_type(mode) != filetype) {
4653                         err |= INODE_ITEM_MISMATCH;
4654                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657                               key->offset, name_len, namebuf, filetype);
4658                 }
4659
4660                 /* Check relative INODE_REF/INODE_EXTREF */
4661                 location.type = BTRFS_INODE_REF_KEY;
4662                 location.offset = key->objectid;
4663                 ret = find_inode_ref(root, &location, namebuf, len,
4664                                        index, ext_ref);
4665                 err |= ret;
4666                 if (ret & INODE_REF_MISSING)
4667                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670                               key->offset, name_len, namebuf, filetype);
4671
4672 next:
4673                 btrfs_release_path(&path);
4674                 len = sizeof(*di) + name_len + data_len;
4675                 di = (struct btrfs_dir_item *)((char *)di + len);
4676                 cur += len;
4677
4678                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680                               root->objectid, key->objectid, key->offset);
4681                         break;
4682                 }
4683         }
4684
4685         return err;
4686 }
4687
4688 /*
4689  * Check file extent datasum/hole, update the size of the file extents,
4690  * check and update the last offset of the file extent.
4691  *
4692  * @root:       the root of fs/file tree.
4693  * @fkey:       the key of the file extent.
4694  * @nodatasum:  INODE_NODATASUM feature.
4695  * @size:       the sum of all EXTENT_DATA items size for this inode.
4696  * @end:        the offset of the last extent.
4697  *
4698  * Return 0 if no error occurred.
4699  */
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701                              struct extent_buffer *node, int slot,
4702                              unsigned int nodatasum, u64 *size, u64 *end)
4703 {
4704         struct btrfs_file_extent_item *fi;
4705         u64 disk_bytenr;
4706         u64 disk_num_bytes;
4707         u64 extent_num_bytes;
4708         u64 extent_offset;
4709         u64 csum_found;         /* In byte size, sectorsize aligned */
4710         u64 search_start;       /* Logical range start we search for csum */
4711         u64 search_len;         /* Logical range len we search for csum */
4712         unsigned int extent_type;
4713         unsigned int is_hole;
4714         int compressed = 0;
4715         int ret;
4716         int err = 0;
4717
4718         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4719
4720         /* Check inline extent */
4721         extent_type = btrfs_file_extent_type(node, fi);
4722         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723                 struct btrfs_item *e = btrfs_item_nr(slot);
4724                 u32 item_inline_len;
4725
4726                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728                 compressed = btrfs_file_extent_compression(node, fi);
4729                 if (extent_num_bytes == 0) {
4730                         error(
4731                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732                                 root->objectid, fkey->objectid, fkey->offset);
4733                         err |= FILE_EXTENT_ERROR;
4734                 }
4735                 if (!compressed && extent_num_bytes != item_inline_len) {
4736                         error(
4737                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738                                 root->objectid, fkey->objectid, fkey->offset,
4739                                 extent_num_bytes, item_inline_len);
4740                         err |= FILE_EXTENT_ERROR;
4741                 }
4742                 *size += extent_num_bytes;
4743                 return err;
4744         }
4745
4746         /* Check extent type */
4747         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749                 err |= FILE_EXTENT_ERROR;
4750                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752                 return err;
4753         }
4754
4755         /* Check REG_EXTENT/PREALLOC_EXTENT */
4756         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759         extent_offset = btrfs_file_extent_offset(node, fi);
4760         compressed = btrfs_file_extent_compression(node, fi);
4761         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4762
4763         /*
4764          * Check EXTENT_DATA csum
4765          *
4766          * For plain (uncompressed) extent, we should only check the range
4767          * we're referring to, as it's possible that part of prealloc extent
4768          * has been written, and has csum:
4769          *
4770          * |<--- Original large preallocated extent A ---->|
4771          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4772          *      No csum                         Has csum
4773          *
4774          * For compressed extent, we should check the whole range.
4775          */
4776         if (!compressed) {
4777                 search_start = disk_bytenr + extent_offset;
4778                 search_len = extent_num_bytes;
4779         } else {
4780                 search_start = disk_bytenr;
4781                 search_len = disk_num_bytes;
4782         }
4783         ret = count_csum_range(root, search_start, search_len, &csum_found);
4784         if (csum_found > 0 && nodatasum) {
4785                 err |= ODD_CSUM_ITEM;
4786                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787                       root->objectid, fkey->objectid, fkey->offset);
4788         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789                    !is_hole && (ret < 0 || csum_found < search_len)) {
4790                 err |= CSUM_ITEM_MISSING;
4791                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792                       root->objectid, fkey->objectid, fkey->offset,
4793                       csum_found, search_len);
4794         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795                 err |= ODD_CSUM_ITEM;
4796                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4798         }
4799
4800         /* Check EXTENT_DATA hole */
4801         if (no_holes && is_hole) {
4802                 err |= FILE_EXTENT_ERROR;
4803                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804                       root->objectid, fkey->objectid, fkey->offset);
4805         } else if (!no_holes && *end != fkey->offset) {
4806                 err |= FILE_EXTENT_ERROR;
4807                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808                       root->objectid, fkey->objectid, fkey->offset);
4809         }
4810
4811         *end += extent_num_bytes;
4812         if (!is_hole)
4813                 *size += extent_num_bytes;
4814
4815         return err;
4816 }
4817
4818 /*
4819  * Check INODE_ITEM and related ITEMs (the same inode number)
4820  * 1. check link count
4821  * 2. check inode ref/extref
4822  * 3. check dir item/index
4823  *
4824  * @ext_ref:    the EXTENDED_IREF feature
4825  *
4826  * Return 0 if no error occurred.
4827  * Return >0 for error or hit the traversal is done(by error bitmap)
4828  */
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830                             unsigned int ext_ref)
4831 {
4832         struct extent_buffer *node;
4833         struct btrfs_inode_item *ii;
4834         struct btrfs_key key;
4835         u64 inode_id;
4836         u32 mode;
4837         u64 nlink;
4838         u64 nbytes;
4839         u64 isize;
4840         u64 size = 0;
4841         u64 refs = 0;
4842         u64 extent_end = 0;
4843         u64 extent_size = 0;
4844         unsigned int dir;
4845         unsigned int nodatasum;
4846         int slot;
4847         int ret;
4848         int err = 0;
4849
4850         node = path->nodes[0];
4851         slot = path->slots[0];
4852
4853         btrfs_item_key_to_cpu(node, &key, slot);
4854         inode_id = key.objectid;
4855
4856         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857                 ret = btrfs_next_item(root, path);
4858                 if (ret > 0)
4859                         err |= LAST_ITEM;
4860                 return err;
4861         }
4862
4863         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864         isize = btrfs_inode_size(node, ii);
4865         nbytes = btrfs_inode_nbytes(node, ii);
4866         mode = btrfs_inode_mode(node, ii);
4867         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868         nlink = btrfs_inode_nlink(node, ii);
4869         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4870
4871         while (1) {
4872                 ret = btrfs_next_item(root, path);
4873                 if (ret < 0) {
4874                         /* out will fill 'err' rusing current statistics */
4875                         goto out;
4876                 } else if (ret > 0) {
4877                         err |= LAST_ITEM;
4878                         goto out;
4879                 }
4880
4881                 node = path->nodes[0];
4882                 slot = path->slots[0];
4883                 btrfs_item_key_to_cpu(node, &key, slot);
4884                 if (key.objectid != inode_id)
4885                         goto out;
4886
4887                 switch (key.type) {
4888                 case BTRFS_INODE_REF_KEY:
4889                         ret = check_inode_ref(root, &key, node, slot, &refs,
4890                                               mode);
4891                         err |= ret;
4892                         break;
4893                 case BTRFS_INODE_EXTREF_KEY:
4894                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896                                         root->objectid, key.objectid,
4897                                         key.offset);
4898                         ret = check_inode_extref(root, &key, node, slot, &refs,
4899                                                  mode);
4900                         err |= ret;
4901                         break;
4902                 case BTRFS_DIR_ITEM_KEY:
4903                 case BTRFS_DIR_INDEX_KEY:
4904                         if (!dir) {
4905                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906                                         root->objectid, inode_id,
4907                                         imode_to_type(mode), key.objectid,
4908                                         key.offset);
4909                         }
4910                         ret = check_dir_item(root, &key, node, slot, &size,
4911                                              ext_ref);
4912                         err |= ret;
4913                         break;
4914                 case BTRFS_EXTENT_DATA_KEY:
4915                         if (dir) {
4916                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917                                         root->objectid, inode_id, key.objectid,
4918                                         key.offset);
4919                         }
4920                         ret = check_file_extent(root, &key, node, slot,
4921                                                 nodatasum, &extent_size,
4922                                                 &extent_end);
4923                         err |= ret;
4924                         break;
4925                 case BTRFS_XATTR_ITEM_KEY:
4926                         break;
4927                 default:
4928                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929                               key.objectid, key.type, key.offset);
4930                 }
4931         }
4932
4933 out:
4934         /* verify INODE_ITEM nlink/isize/nbytes */
4935         if (dir) {
4936                 if (nlink != 1) {
4937                         err |= LINK_COUNT_ERROR;
4938                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939                               root->objectid, inode_id, nlink);
4940                 }
4941
4942                 /*
4943                  * Just a warning, as dir inode nbytes is just an
4944                  * instructive value.
4945                  */
4946                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948                                 root->objectid, inode_id, root->nodesize);
4949                 }
4950
4951                 if (isize != size) {
4952                         err |= ISIZE_ERROR;
4953                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954                               root->objectid, inode_id, isize, size);
4955                 }
4956         } else {
4957                 if (nlink != refs) {
4958                         err |= LINK_COUNT_ERROR;
4959                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960                               root->objectid, inode_id, nlink, refs);
4961                 } else if (!nlink) {
4962                         err |= ORPHAN_ITEM;
4963                 }
4964
4965                 if (!nbytes && !no_holes && extent_end < isize) {
4966                         err |= NBYTES_ERROR;
4967                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968                               root->objectid, inode_id, isize);
4969                 }
4970
4971                 if (nbytes != extent_size) {
4972                         err |= NBYTES_ERROR;
4973                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974                               root->objectid, inode_id, nbytes, extent_size);
4975                 }
4976         }
4977
4978         return err;
4979 }
4980
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4982 {
4983         struct btrfs_path path;
4984         struct btrfs_key key;
4985         int err = 0;
4986         int ret;
4987
4988         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989         key.type = BTRFS_INODE_ITEM_KEY;
4990         key.offset = 0;
4991
4992         /* For root being dropped, we don't need to check first inode */
4993         if (btrfs_root_refs(&root->root_item) == 0 &&
4994             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4995             key.objectid)
4996                 return 0;
4997
4998         btrfs_init_path(&path);
4999
5000         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5001         if (ret < 0)
5002                 goto out;
5003         if (ret > 0) {
5004                 ret = 0;
5005                 err |= INODE_ITEM_MISSING;
5006                 error("first inode item of root %llu is missing",
5007                       root->objectid);
5008         }
5009
5010         err |= check_inode_item(root, &path, ext_ref);
5011         err &= ~LAST_ITEM;
5012         if (err && !ret)
5013                 ret = -EIO;
5014 out:
5015         btrfs_release_path(&path);
5016         return ret;
5017 }
5018
5019 /*
5020  * Iterate all item on the tree and call check_inode_item() to check.
5021  *
5022  * @root:       the root of the tree to be checked.
5023  * @ext_ref:    the EXTENDED_IREF feature
5024  *
5025  * Return 0 if no error found.
5026  * Return <0 for error.
5027  */
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5029 {
5030         struct btrfs_path path;
5031         struct node_refs nrefs;
5032         struct btrfs_root_item *root_item = &root->root_item;
5033         int ret;
5034         int level;
5035         int err = 0;
5036
5037         /*
5038          * We need to manually check the first inode item(256)
5039          * As the following traversal function will only start from
5040          * the first inode item in the leaf, if inode item(256) is missing
5041          * we will just skip it forever.
5042          */
5043         ret = check_fs_first_inode(root, ext_ref);
5044         if (ret < 0)
5045                 return ret;
5046
5047         memset(&nrefs, 0, sizeof(nrefs));
5048         level = btrfs_header_level(root->node);
5049         btrfs_init_path(&path);
5050
5051         if (btrfs_root_refs(root_item) > 0 ||
5052             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053                 path.nodes[level] = root->node;
5054                 path.slots[level] = 0;
5055                 extent_buffer_get(root->node);
5056         } else {
5057                 struct btrfs_key key;
5058
5059                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060                 level = root_item->drop_level;
5061                 path.lowest_level = level;
5062                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5063                 if (ret < 0)
5064                         goto out;
5065                 ret = 0;
5066         }
5067
5068         while (1) {
5069                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5070                 err |= !!ret;
5071
5072                 /* if ret is negative, walk shall stop */
5073                 if (ret < 0) {
5074                         ret = err;
5075                         break;
5076                 }
5077
5078                 ret = walk_up_tree_v2(root, &path, &level);
5079                 if (ret != 0) {
5080                         /* Normal exit, reset ret to err */
5081                         ret = err;
5082                         break;
5083                 }
5084         }
5085
5086 out:
5087         btrfs_release_path(&path);
5088         return ret;
5089 }
5090
5091 /*
5092  * Find the relative ref for root_ref and root_backref.
5093  *
5094  * @root:       the root of the root tree.
5095  * @ref_key:    the key of the root ref.
5096  *
5097  * Return 0 if no error occurred.
5098  */
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100                           struct extent_buffer *node, int slot)
5101 {
5102         struct btrfs_path path;
5103         struct btrfs_key key;
5104         struct btrfs_root_ref *ref;
5105         struct btrfs_root_ref *backref;
5106         char ref_name[BTRFS_NAME_LEN] = {0};
5107         char backref_name[BTRFS_NAME_LEN] = {0};
5108         u64 ref_dirid;
5109         u64 ref_seq;
5110         u32 ref_namelen;
5111         u64 backref_dirid;
5112         u64 backref_seq;
5113         u32 backref_namelen;
5114         u32 len;
5115         int ret;
5116         int err = 0;
5117
5118         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119         ref_dirid = btrfs_root_ref_dirid(node, ref);
5120         ref_seq = btrfs_root_ref_sequence(node, ref);
5121         ref_namelen = btrfs_root_ref_name_len(node, ref);
5122
5123         if (ref_namelen <= BTRFS_NAME_LEN) {
5124                 len = ref_namelen;
5125         } else {
5126                 len = BTRFS_NAME_LEN;
5127                 warning("%s[%llu %llu] ref_name too long",
5128                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5129                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5130                         ref_key->offset);
5131         }
5132         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5133
5134         /* Find relative root_ref */
5135         key.objectid = ref_key->offset;
5136         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137         key.offset = ref_key->objectid;
5138
5139         btrfs_init_path(&path);
5140         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5141         if (ret) {
5142                 err |= ROOT_REF_MISSING;
5143                 error("%s[%llu %llu] couldn't find relative ref",
5144                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5145                       "ROOT_REF" : "ROOT_BACKREF",
5146                       ref_key->objectid, ref_key->offset);
5147                 goto out;
5148         }
5149
5150         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151                                  struct btrfs_root_ref);
5152         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5155
5156         if (backref_namelen <= BTRFS_NAME_LEN) {
5157                 len = backref_namelen;
5158         } else {
5159                 len = BTRFS_NAME_LEN;
5160                 warning("%s[%llu %llu] ref_name too long",
5161                         key.type == BTRFS_ROOT_REF_KEY ?
5162                         "ROOT_REF" : "ROOT_BACKREF",
5163                         key.objectid, key.offset);
5164         }
5165         read_extent_buffer(path.nodes[0], backref_name,
5166                            (unsigned long)(backref + 1), len);
5167
5168         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169             ref_namelen != backref_namelen ||
5170             strncmp(ref_name, backref_name, len)) {
5171                 err |= ROOT_REF_MISMATCH;
5172                 error("%s[%llu %llu] mismatch relative ref",
5173                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5174                       "ROOT_REF" : "ROOT_BACKREF",
5175                       ref_key->objectid, ref_key->offset);
5176         }
5177 out:
5178         btrfs_release_path(&path);
5179         return err;
5180 }
5181
5182 /*
5183  * Check all fs/file tree in low_memory mode.
5184  *
5185  * 1. for fs tree root item, call check_fs_root_v2()
5186  * 2. for fs tree root ref/backref, call check_root_ref()
5187  *
5188  * Return 0 if no error occurred.
5189  */
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5191 {
5192         struct btrfs_root *tree_root = fs_info->tree_root;
5193         struct btrfs_root *cur_root = NULL;
5194         struct btrfs_path path;
5195         struct btrfs_key key;
5196         struct extent_buffer *node;
5197         unsigned int ext_ref;
5198         int slot;
5199         int ret;
5200         int err = 0;
5201
5202         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5203
5204         btrfs_init_path(&path);
5205         key.objectid = BTRFS_FS_TREE_OBJECTID;
5206         key.offset = 0;
5207         key.type = BTRFS_ROOT_ITEM_KEY;
5208
5209         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5210         if (ret < 0) {
5211                 err = ret;
5212                 goto out;
5213         } else if (ret > 0) {
5214                 err = -ENOENT;
5215                 goto out;
5216         }
5217
5218         while (1) {
5219                 node = path.nodes[0];
5220                 slot = path.slots[0];
5221                 btrfs_item_key_to_cpu(node, &key, slot);
5222                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5223                         goto out;
5224                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225                     fs_root_objectid(key.objectid)) {
5226                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5228                                                                        &key);
5229                         } else {
5230                                 key.offset = (u64)-1;
5231                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5232                         }
5233
5234                         if (IS_ERR(cur_root)) {
5235                                 error("Fail to read fs/subvol tree: %lld",
5236                                       key.objectid);
5237                                 err = -EIO;
5238                                 goto next;
5239                         }
5240
5241                         ret = check_fs_root_v2(cur_root, ext_ref);
5242                         err |= ret;
5243
5244                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245                                 btrfs_free_fs_root(cur_root);
5246                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248                         ret = check_root_ref(tree_root, &key, node, slot);
5249                         err |= ret;
5250                 }
5251 next:
5252                 ret = btrfs_next_item(tree_root, &path);
5253                 if (ret > 0)
5254                         goto out;
5255                 if (ret < 0) {
5256                         err = ret;
5257                         goto out;
5258                 }
5259         }
5260
5261 out:
5262         btrfs_release_path(&path);
5263         return err;
5264 }
5265
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5267 {
5268         struct list_head *cur = rec->backrefs.next;
5269         struct extent_backref *back;
5270         struct tree_backref *tback;
5271         struct data_backref *dback;
5272         u64 found = 0;
5273         int err = 0;
5274
5275         while(cur != &rec->backrefs) {
5276                 back = to_extent_backref(cur);
5277                 cur = cur->next;
5278                 if (!back->found_extent_tree) {
5279                         err = 1;
5280                         if (!print_errs)
5281                                 goto out;
5282                         if (back->is_data) {
5283                                 dback = to_data_backref(back);
5284                                 fprintf(stderr, "Backref %llu %s %llu"
5285                                         " owner %llu offset %llu num_refs %lu"
5286                                         " not found in extent tree\n",
5287                                         (unsigned long long)rec->start,
5288                                         back->full_backref ?
5289                                         "parent" : "root",
5290                                         back->full_backref ?
5291                                         (unsigned long long)dback->parent:
5292                                         (unsigned long long)dback->root,
5293                                         (unsigned long long)dback->owner,
5294                                         (unsigned long long)dback->offset,
5295                                         (unsigned long)dback->num_refs);
5296                         } else {
5297                                 tback = to_tree_backref(back);
5298                                 fprintf(stderr, "Backref %llu parent %llu"
5299                                         " root %llu not found in extent tree\n",
5300                                         (unsigned long long)rec->start,
5301                                         (unsigned long long)tback->parent,
5302                                         (unsigned long long)tback->root);
5303                         }
5304                 }
5305                 if (!back->is_data && !back->found_ref) {
5306                         err = 1;
5307                         if (!print_errs)
5308                                 goto out;
5309                         tback = to_tree_backref(back);
5310                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311                                 (unsigned long long)rec->start,
5312                                 back->full_backref ? "parent" : "root",
5313                                 back->full_backref ?
5314                                 (unsigned long long)tback->parent :
5315                                 (unsigned long long)tback->root, back);
5316                 }
5317                 if (back->is_data) {
5318                         dback = to_data_backref(back);
5319                         if (dback->found_ref != dback->num_refs) {
5320                                 err = 1;
5321                                 if (!print_errs)
5322                                         goto out;
5323                                 fprintf(stderr, "Incorrect local backref count"
5324                                         " on %llu %s %llu owner %llu"
5325                                         " offset %llu found %u wanted %u back %p\n",
5326                                         (unsigned long long)rec->start,
5327                                         back->full_backref ?
5328                                         "parent" : "root",
5329                                         back->full_backref ?
5330                                         (unsigned long long)dback->parent:
5331                                         (unsigned long long)dback->root,
5332                                         (unsigned long long)dback->owner,
5333                                         (unsigned long long)dback->offset,
5334                                         dback->found_ref, dback->num_refs, back);
5335                         }
5336                         if (dback->disk_bytenr != rec->start) {
5337                                 err = 1;
5338                                 if (!print_errs)
5339                                         goto out;
5340                                 fprintf(stderr, "Backref disk bytenr does not"
5341                                         " match extent record, bytenr=%llu, "
5342                                         "ref bytenr=%llu\n",
5343                                         (unsigned long long)rec->start,
5344                                         (unsigned long long)dback->disk_bytenr);
5345                         }
5346
5347                         if (dback->bytes != rec->nr) {
5348                                 err = 1;
5349                                 if (!print_errs)
5350                                         goto out;
5351                                 fprintf(stderr, "Backref bytes do not match "
5352                                         "extent backref, bytenr=%llu, ref "
5353                                         "bytes=%llu, backref bytes=%llu\n",
5354                                         (unsigned long long)rec->start,
5355                                         (unsigned long long)rec->nr,
5356                                         (unsigned long long)dback->bytes);
5357                         }
5358                 }
5359                 if (!back->is_data) {
5360                         found += 1;
5361                 } else {
5362                         dback = to_data_backref(back);
5363                         found += dback->found_ref;
5364                 }
5365         }
5366         if (found != rec->refs) {
5367                 err = 1;
5368                 if (!print_errs)
5369                         goto out;
5370                 fprintf(stderr, "Incorrect global backref count "
5371                         "on %llu found %llu wanted %llu\n",
5372                         (unsigned long long)rec->start,
5373                         (unsigned long long)found,
5374                         (unsigned long long)rec->refs);
5375         }
5376 out:
5377         return err;
5378 }
5379
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5381 {
5382         struct extent_backref *back;
5383         struct list_head *cur;
5384         while (!list_empty(&rec->backrefs)) {
5385                 cur = rec->backrefs.next;
5386                 back = to_extent_backref(cur);
5387                 list_del(cur);
5388                 free(back);
5389         }
5390         return 0;
5391 }
5392
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5394 {
5395         struct cache_extent *cache;
5396         struct extent_record *rec;
5397
5398         while (1) {
5399                 cache = first_cache_extent(extent_cache);
5400                 if (!cache)
5401                         break;
5402                 rec = container_of(cache, struct extent_record, cache);
5403                 remove_cache_extent(extent_cache, cache);
5404                 free_all_extent_backrefs(rec);
5405                 free(rec);
5406         }
5407 }
5408
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410                                  struct extent_record *rec)
5411 {
5412         if (rec->content_checked && rec->owner_ref_checked &&
5413             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415             !rec->bad_full_backref && !rec->crossing_stripes &&
5416             !rec->wrong_chunk_type) {
5417                 remove_cache_extent(extent_cache, &rec->cache);
5418                 free_all_extent_backrefs(rec);
5419                 list_del_init(&rec->list);
5420                 free(rec);
5421         }
5422         return 0;
5423 }
5424
5425 static int check_owner_ref(struct btrfs_root *root,
5426                             struct extent_record *rec,
5427                             struct extent_buffer *buf)
5428 {
5429         struct extent_backref *node;
5430         struct tree_backref *back;
5431         struct btrfs_root *ref_root;
5432         struct btrfs_key key;
5433         struct btrfs_path path;
5434         struct extent_buffer *parent;
5435         int level;
5436         int found = 0;
5437         int ret;
5438
5439         list_for_each_entry(node, &rec->backrefs, list) {
5440                 if (node->is_data)
5441                         continue;
5442                 if (!node->found_ref)
5443                         continue;
5444                 if (node->full_backref)
5445                         continue;
5446                 back = to_tree_backref(node);
5447                 if (btrfs_header_owner(buf) == back->root)
5448                         return 0;
5449         }
5450         BUG_ON(rec->is_root);
5451
5452         /* try to find the block by search corresponding fs tree */
5453         key.objectid = btrfs_header_owner(buf);
5454         key.type = BTRFS_ROOT_ITEM_KEY;
5455         key.offset = (u64)-1;
5456
5457         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458         if (IS_ERR(ref_root))
5459                 return 1;
5460
5461         level = btrfs_header_level(buf);
5462         if (level == 0)
5463                 btrfs_item_key_to_cpu(buf, &key, 0);
5464         else
5465                 btrfs_node_key_to_cpu(buf, &key, 0);
5466
5467         btrfs_init_path(&path);
5468         path.lowest_level = level + 1;
5469         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5470         if (ret < 0)
5471                 return 0;
5472
5473         parent = path.nodes[level + 1];
5474         if (parent && buf->start == btrfs_node_blockptr(parent,
5475                                                         path.slots[level + 1]))
5476                 found = 1;
5477
5478         btrfs_release_path(&path);
5479         return found ? 0 : 1;
5480 }
5481
5482 static int is_extent_tree_record(struct extent_record *rec)
5483 {
5484         struct list_head *cur = rec->backrefs.next;
5485         struct extent_backref *node;
5486         struct tree_backref *back;
5487         int is_extent = 0;
5488
5489         while(cur != &rec->backrefs) {
5490                 node = to_extent_backref(cur);
5491                 cur = cur->next;
5492                 if (node->is_data)
5493                         return 0;
5494                 back = to_tree_backref(node);
5495                 if (node->full_backref)
5496                         return 0;
5497                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5498                         is_extent = 1;
5499         }
5500         return is_extent;
5501 }
5502
5503
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505                                struct cache_tree *extent_cache,
5506                                u64 start, u64 len)
5507 {
5508         struct extent_record *rec;
5509         struct cache_extent *cache;
5510         struct btrfs_key key;
5511
5512         cache = lookup_cache_extent(extent_cache, start, len);
5513         if (!cache)
5514                 return 0;
5515
5516         rec = container_of(cache, struct extent_record, cache);
5517         if (!is_extent_tree_record(rec))
5518                 return 0;
5519
5520         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5522 }
5523
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525                        struct extent_buffer *buf, int slot)
5526 {
5527         if (btrfs_header_level(buf)) {
5528                 struct btrfs_key_ptr ptr1, ptr2;
5529
5530                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531                                    sizeof(struct btrfs_key_ptr));
5532                 read_extent_buffer(buf, &ptr2,
5533                                    btrfs_node_key_ptr_offset(slot + 1),
5534                                    sizeof(struct btrfs_key_ptr));
5535                 write_extent_buffer(buf, &ptr1,
5536                                     btrfs_node_key_ptr_offset(slot + 1),
5537                                     sizeof(struct btrfs_key_ptr));
5538                 write_extent_buffer(buf, &ptr2,
5539                                     btrfs_node_key_ptr_offset(slot),
5540                                     sizeof(struct btrfs_key_ptr));
5541                 if (slot == 0) {
5542                         struct btrfs_disk_key key;
5543                         btrfs_node_key(buf, &key, 0);
5544                         btrfs_fixup_low_keys(root, path, &key,
5545                                              btrfs_header_level(buf) + 1);
5546                 }
5547         } else {
5548                 struct btrfs_item *item1, *item2;
5549                 struct btrfs_key k1, k2;
5550                 char *item1_data, *item2_data;
5551                 u32 item1_offset, item2_offset, item1_size, item2_size;
5552
5553                 item1 = btrfs_item_nr(slot);
5554                 item2 = btrfs_item_nr(slot + 1);
5555                 btrfs_item_key_to_cpu(buf, &k1, slot);
5556                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557                 item1_offset = btrfs_item_offset(buf, item1);
5558                 item2_offset = btrfs_item_offset(buf, item2);
5559                 item1_size = btrfs_item_size(buf, item1);
5560                 item2_size = btrfs_item_size(buf, item2);
5561
5562                 item1_data = malloc(item1_size);
5563                 if (!item1_data)
5564                         return -ENOMEM;
5565                 item2_data = malloc(item2_size);
5566                 if (!item2_data) {
5567                         free(item1_data);
5568                         return -ENOMEM;
5569                 }
5570
5571                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5573
5574                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5576                 free(item1_data);
5577                 free(item2_data);
5578
5579                 btrfs_set_item_offset(buf, item1, item2_offset);
5580                 btrfs_set_item_offset(buf, item2, item1_offset);
5581                 btrfs_set_item_size(buf, item1, item2_size);
5582                 btrfs_set_item_size(buf, item2, item1_size);
5583
5584                 path->slots[0] = slot;
5585                 btrfs_set_item_key_unsafe(root, path, &k2);
5586                 path->slots[0] = slot + 1;
5587                 btrfs_set_item_key_unsafe(root, path, &k1);
5588         }
5589         return 0;
5590 }
5591
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5593 {
5594         struct extent_buffer *buf;
5595         struct btrfs_key k1, k2;
5596         int i;
5597         int level = path->lowest_level;
5598         int ret = -EIO;
5599
5600         buf = path->nodes[level];
5601         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5602                 if (level) {
5603                         btrfs_node_key_to_cpu(buf, &k1, i);
5604                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5605                 } else {
5606                         btrfs_item_key_to_cpu(buf, &k1, i);
5607                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5608                 }
5609                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5610                         continue;
5611                 ret = swap_values(root, path, buf, i);
5612                 if (ret)
5613                         break;
5614                 btrfs_mark_buffer_dirty(buf);
5615                 i = 0;
5616         }
5617         return ret;
5618 }
5619
5620 static int delete_bogus_item(struct btrfs_root *root,
5621                              struct btrfs_path *path,
5622                              struct extent_buffer *buf, int slot)
5623 {
5624         struct btrfs_key key;
5625         int nritems = btrfs_header_nritems(buf);
5626
5627         btrfs_item_key_to_cpu(buf, &key, slot);
5628
5629         /* These are all the keys we can deal with missing. */
5630         if (key.type != BTRFS_DIR_INDEX_KEY &&
5631             key.type != BTRFS_EXTENT_ITEM_KEY &&
5632             key.type != BTRFS_METADATA_ITEM_KEY &&
5633             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5635                 return -1;
5636
5637         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638                (unsigned long long)key.objectid, key.type,
5639                (unsigned long long)key.offset, slot, buf->start);
5640         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641                               btrfs_item_nr_offset(slot + 1),
5642                               sizeof(struct btrfs_item) *
5643                               (nritems - slot - 1));
5644         btrfs_set_header_nritems(buf, nritems - 1);
5645         if (slot == 0) {
5646                 struct btrfs_disk_key disk_key;
5647
5648                 btrfs_item_key(buf, &disk_key, 0);
5649                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5650         }
5651         btrfs_mark_buffer_dirty(buf);
5652         return 0;
5653 }
5654
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5656 {
5657         struct extent_buffer *buf;
5658         int i;
5659         int ret = 0;
5660
5661         /* We should only get this for leaves */
5662         BUG_ON(path->lowest_level);
5663         buf = path->nodes[0];
5664 again:
5665         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666                 unsigned int shift = 0, offset;
5667
5668                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669                     BTRFS_LEAF_DATA_SIZE(root)) {
5670                         if (btrfs_item_end_nr(buf, i) >
5671                             BTRFS_LEAF_DATA_SIZE(root)) {
5672                                 ret = delete_bogus_item(root, path, buf, i);
5673                                 if (!ret)
5674                                         goto again;
5675                                 fprintf(stderr, "item is off the end of the "
5676                                         "leaf, can't fix\n");
5677                                 ret = -EIO;
5678                                 break;
5679                         }
5680                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5681                                 btrfs_item_end_nr(buf, i);
5682                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683                            btrfs_item_offset_nr(buf, i - 1)) {
5684                         if (btrfs_item_end_nr(buf, i) >
5685                             btrfs_item_offset_nr(buf, i - 1)) {
5686                                 ret = delete_bogus_item(root, path, buf, i);
5687                                 if (!ret)
5688                                         goto again;
5689                                 fprintf(stderr, "items overlap, can't fix\n");
5690                                 ret = -EIO;
5691                                 break;
5692                         }
5693                         shift = btrfs_item_offset_nr(buf, i - 1) -
5694                                 btrfs_item_end_nr(buf, i);
5695                 }
5696                 if (!shift)
5697                         continue;
5698
5699                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700                        i, shift, (unsigned long long)buf->start);
5701                 offset = btrfs_item_offset_nr(buf, i);
5702                 memmove_extent_buffer(buf,
5703                                       btrfs_leaf_data(buf) + offset + shift,
5704                                       btrfs_leaf_data(buf) + offset,
5705                                       btrfs_item_size_nr(buf, i));
5706                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5707                                       offset + shift);
5708                 btrfs_mark_buffer_dirty(buf);
5709         }
5710
5711         /*
5712          * We may have moved things, in which case we want to exit so we don't
5713          * write those changes out.  Once we have proper abort functionality in
5714          * progs this can be changed to something nicer.
5715          */
5716         BUG_ON(ret);
5717         return ret;
5718 }
5719
5720 /*
5721  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5722  * then just return -EIO.
5723  */
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725                                 struct extent_buffer *buf,
5726                                 enum btrfs_tree_block_status status)
5727 {
5728         struct btrfs_trans_handle *trans;
5729         struct ulist *roots;
5730         struct ulist_node *node;
5731         struct btrfs_root *search_root;
5732         struct btrfs_path path;
5733         struct ulist_iterator iter;
5734         struct btrfs_key root_key, key;
5735         int ret;
5736
5737         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5739                 return -EIO;
5740
5741         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5742         if (ret)
5743                 return -EIO;
5744
5745         btrfs_init_path(&path);
5746         ULIST_ITER_INIT(&iter);
5747         while ((node = ulist_next(roots, &iter))) {
5748                 root_key.objectid = node->val;
5749                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750                 root_key.offset = (u64)-1;
5751
5752                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5753                 if (IS_ERR(root)) {
5754                         ret = -EIO;
5755                         break;
5756                 }
5757
5758
5759                 trans = btrfs_start_transaction(search_root, 0);
5760                 if (IS_ERR(trans)) {
5761                         ret = PTR_ERR(trans);
5762                         break;
5763                 }
5764
5765                 path.lowest_level = btrfs_header_level(buf);
5766                 path.skip_check_block = 1;
5767                 if (path.lowest_level)
5768                         btrfs_node_key_to_cpu(buf, &key, 0);
5769                 else
5770                         btrfs_item_key_to_cpu(buf, &key, 0);
5771                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5772                 if (ret) {
5773                         ret = -EIO;
5774                         btrfs_commit_transaction(trans, search_root);
5775                         break;
5776                 }
5777                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778                         ret = fix_key_order(search_root, &path);
5779                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780                         ret = fix_item_offset(search_root, &path);
5781                 if (ret) {
5782                         btrfs_commit_transaction(trans, search_root);
5783                         break;
5784                 }
5785                 btrfs_release_path(&path);
5786                 btrfs_commit_transaction(trans, search_root);
5787         }
5788         ulist_free(roots);
5789         btrfs_release_path(&path);
5790         return ret;
5791 }
5792
5793 static int check_block(struct btrfs_root *root,
5794                        struct cache_tree *extent_cache,
5795                        struct extent_buffer *buf, u64 flags)
5796 {
5797         struct extent_record *rec;
5798         struct cache_extent *cache;
5799         struct btrfs_key key;
5800         enum btrfs_tree_block_status status;
5801         int ret = 0;
5802         int level;
5803
5804         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5805         if (!cache)
5806                 return 1;
5807         rec = container_of(cache, struct extent_record, cache);
5808         rec->generation = btrfs_header_generation(buf);
5809
5810         level = btrfs_header_level(buf);
5811         if (btrfs_header_nritems(buf) > 0) {
5812
5813                 if (level == 0)
5814                         btrfs_item_key_to_cpu(buf, &key, 0);
5815                 else
5816                         btrfs_node_key_to_cpu(buf, &key, 0);
5817
5818                 rec->info_objectid = key.objectid;
5819         }
5820         rec->info_level = level;
5821
5822         if (btrfs_is_leaf(buf))
5823                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5824         else
5825                 status = btrfs_check_node(root, &rec->parent_key, buf);
5826
5827         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5828                 if (repair)
5829                         status = try_to_fix_bad_block(root, buf, status);
5830                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5831                         ret = -EIO;
5832                         fprintf(stderr, "bad block %llu\n",
5833                                 (unsigned long long)buf->start);
5834                 } else {
5835                         /*
5836                          * Signal to callers we need to start the scan over
5837                          * again since we'll have cowed blocks.
5838                          */
5839                         ret = -EAGAIN;
5840                 }
5841         } else {
5842                 rec->content_checked = 1;
5843                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844                         rec->owner_ref_checked = 1;
5845                 else {
5846                         ret = check_owner_ref(root, rec, buf);
5847                         if (!ret)
5848                                 rec->owner_ref_checked = 1;
5849                 }
5850         }
5851         if (!ret)
5852                 maybe_free_extent_rec(extent_cache, rec);
5853         return ret;
5854 }
5855
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857                                                 u64 parent, u64 root)
5858 {
5859         struct list_head *cur = rec->backrefs.next;
5860         struct extent_backref *node;
5861         struct tree_backref *back;
5862
5863         while(cur != &rec->backrefs) {
5864                 node = to_extent_backref(cur);
5865                 cur = cur->next;
5866                 if (node->is_data)
5867                         continue;
5868                 back = to_tree_backref(node);
5869                 if (parent > 0) {
5870                         if (!node->full_backref)
5871                                 continue;
5872                         if (parent == back->parent)
5873                                 return back;
5874                 } else {
5875                         if (node->full_backref)
5876                                 continue;
5877                         if (back->root == root)
5878                                 return back;
5879                 }
5880         }
5881         return NULL;
5882 }
5883
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885                                                 u64 parent, u64 root)
5886 {
5887         struct tree_backref *ref = malloc(sizeof(*ref));
5888
5889         if (!ref)
5890                 return NULL;
5891         memset(&ref->node, 0, sizeof(ref->node));
5892         if (parent > 0) {
5893                 ref->parent = parent;
5894                 ref->node.full_backref = 1;
5895         } else {
5896                 ref->root = root;
5897                 ref->node.full_backref = 0;
5898         }
5899         list_add_tail(&ref->node.list, &rec->backrefs);
5900
5901         return ref;
5902 }
5903
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905                                                 u64 parent, u64 root,
5906                                                 u64 owner, u64 offset,
5907                                                 int found_ref,
5908                                                 u64 disk_bytenr, u64 bytes)
5909 {
5910         struct list_head *cur = rec->backrefs.next;
5911         struct extent_backref *node;
5912         struct data_backref *back;
5913
5914         while(cur != &rec->backrefs) {
5915                 node = to_extent_backref(cur);
5916                 cur = cur->next;
5917                 if (!node->is_data)
5918                         continue;
5919                 back = to_data_backref(node);
5920                 if (parent > 0) {
5921                         if (!node->full_backref)
5922                                 continue;
5923                         if (parent == back->parent)
5924                                 return back;
5925                 } else {
5926                         if (node->full_backref)
5927                                 continue;
5928                         if (back->root == root && back->owner == owner &&
5929                             back->offset == offset) {
5930                                 if (found_ref && node->found_ref &&
5931                                     (back->bytes != bytes ||
5932                                     back->disk_bytenr != disk_bytenr))
5933                                         continue;
5934                                 return back;
5935                         }
5936                 }
5937         }
5938         return NULL;
5939 }
5940
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942                                                 u64 parent, u64 root,
5943                                                 u64 owner, u64 offset,
5944                                                 u64 max_size)
5945 {
5946         struct data_backref *ref = malloc(sizeof(*ref));
5947
5948         if (!ref)
5949                 return NULL;
5950         memset(&ref->node, 0, sizeof(ref->node));
5951         ref->node.is_data = 1;
5952
5953         if (parent > 0) {
5954                 ref->parent = parent;
5955                 ref->owner = 0;
5956                 ref->offset = 0;
5957                 ref->node.full_backref = 1;
5958         } else {
5959                 ref->root = root;
5960                 ref->owner = owner;
5961                 ref->offset = offset;
5962                 ref->node.full_backref = 0;
5963         }
5964         ref->bytes = max_size;
5965         ref->found_ref = 0;
5966         ref->num_refs = 0;
5967         list_add_tail(&ref->node.list, &rec->backrefs);
5968         if (max_size > rec->max_size)
5969                 rec->max_size = max_size;
5970         return ref;
5971 }
5972
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5975 {
5976         struct btrfs_block_group_cache *bg_cache;
5977
5978         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5979         if (!bg_cache)
5980                 return;
5981
5982         /* data extent, check chunk directly*/
5983         if (!rec->metadata) {
5984                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985                         rec->wrong_chunk_type = 1;
5986                 return;
5987         }
5988
5989         /* metadata extent, check the obvious case first */
5990         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991                                  BTRFS_BLOCK_GROUP_METADATA))) {
5992                 rec->wrong_chunk_type = 1;
5993                 return;
5994         }
5995
5996         /*
5997          * Check SYSTEM extent, as it's also marked as metadata, we can only
5998          * make sure it's a SYSTEM extent by its backref
5999          */
6000         if (!list_empty(&rec->backrefs)) {
6001                 struct extent_backref *node;
6002                 struct tree_backref *tback;
6003                 u64 bg_type;
6004
6005                 node = to_extent_backref(rec->backrefs.next);
6006                 if (node->is_data) {
6007                         /* tree block shouldn't have data backref */
6008                         rec->wrong_chunk_type = 1;
6009                         return;
6010                 }
6011                 tback = container_of(node, struct tree_backref, node);
6012
6013                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6015                 else
6016                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017                 if (!(bg_cache->flags & bg_type))
6018                         rec->wrong_chunk_type = 1;
6019         }
6020 }
6021
6022 /*
6023  * Allocate a new extent record, fill default values from @tmpl and insert int
6024  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025  * the cache, otherwise it fails.
6026  */
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028                 struct extent_record *tmpl)
6029 {
6030         struct extent_record *rec;
6031         int ret = 0;
6032
6033         rec = malloc(sizeof(*rec));
6034         if (!rec)
6035                 return -ENOMEM;
6036         rec->start = tmpl->start;
6037         rec->max_size = tmpl->max_size;
6038         rec->nr = max(tmpl->nr, tmpl->max_size);
6039         rec->found_rec = tmpl->found_rec;
6040         rec->content_checked = tmpl->content_checked;
6041         rec->owner_ref_checked = tmpl->owner_ref_checked;
6042         rec->num_duplicates = 0;
6043         rec->metadata = tmpl->metadata;
6044         rec->flag_block_full_backref = FLAG_UNSET;
6045         rec->bad_full_backref = 0;
6046         rec->crossing_stripes = 0;
6047         rec->wrong_chunk_type = 0;
6048         rec->is_root = tmpl->is_root;
6049         rec->refs = tmpl->refs;
6050         rec->extent_item_refs = tmpl->extent_item_refs;
6051         rec->parent_generation = tmpl->parent_generation;
6052         INIT_LIST_HEAD(&rec->backrefs);
6053         INIT_LIST_HEAD(&rec->dups);
6054         INIT_LIST_HEAD(&rec->list);
6055         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6056         rec->cache.start = tmpl->start;
6057         rec->cache.size = tmpl->nr;
6058         ret = insert_cache_extent(extent_cache, &rec->cache);
6059         if (ret) {
6060                 free(rec);
6061                 return ret;
6062         }
6063         bytes_used += rec->nr;
6064
6065         if (tmpl->metadata)
6066                 rec->crossing_stripes = check_crossing_stripes(global_info,
6067                                 rec->start, global_info->tree_root->nodesize);
6068         check_extent_type(rec);
6069         return ret;
6070 }
6071
6072 /*
6073  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6074  * some are hints:
6075  * - refs              - if found, increase refs
6076  * - is_root           - if found, set
6077  * - content_checked   - if found, set
6078  * - owner_ref_checked - if found, set
6079  *
6080  * If not found, create a new one, initialize and insert.
6081  */
6082 static int add_extent_rec(struct cache_tree *extent_cache,
6083                 struct extent_record *tmpl)
6084 {
6085         struct extent_record *rec;
6086         struct cache_extent *cache;
6087         int ret = 0;
6088         int dup = 0;
6089
6090         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6091         if (cache) {
6092                 rec = container_of(cache, struct extent_record, cache);
6093                 if (tmpl->refs)
6094                         rec->refs++;
6095                 if (rec->nr == 1)
6096                         rec->nr = max(tmpl->nr, tmpl->max_size);
6097
6098                 /*
6099                  * We need to make sure to reset nr to whatever the extent
6100                  * record says was the real size, this way we can compare it to
6101                  * the backrefs.
6102                  */
6103                 if (tmpl->found_rec) {
6104                         if (tmpl->start != rec->start || rec->found_rec) {
6105                                 struct extent_record *tmp;
6106
6107                                 dup = 1;
6108                                 if (list_empty(&rec->list))
6109                                         list_add_tail(&rec->list,
6110                                                       &duplicate_extents);
6111
6112                                 /*
6113                                  * We have to do this song and dance in case we
6114                                  * find an extent record that falls inside of
6115                                  * our current extent record but does not have
6116                                  * the same objectid.
6117                                  */
6118                                 tmp = malloc(sizeof(*tmp));
6119                                 if (!tmp)
6120                                         return -ENOMEM;
6121                                 tmp->start = tmpl->start;
6122                                 tmp->max_size = tmpl->max_size;
6123                                 tmp->nr = tmpl->nr;
6124                                 tmp->found_rec = 1;
6125                                 tmp->metadata = tmpl->metadata;
6126                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6127                                 INIT_LIST_HEAD(&tmp->list);
6128                                 list_add_tail(&tmp->list, &rec->dups);
6129                                 rec->num_duplicates++;
6130                         } else {
6131                                 rec->nr = tmpl->nr;
6132                                 rec->found_rec = 1;
6133                         }
6134                 }
6135
6136                 if (tmpl->extent_item_refs && !dup) {
6137                         if (rec->extent_item_refs) {
6138                                 fprintf(stderr, "block %llu rec "
6139                                         "extent_item_refs %llu, passed %llu\n",
6140                                         (unsigned long long)tmpl->start,
6141                                         (unsigned long long)
6142                                                         rec->extent_item_refs,
6143                                         (unsigned long long)tmpl->extent_item_refs);
6144                         }
6145                         rec->extent_item_refs = tmpl->extent_item_refs;
6146                 }
6147                 if (tmpl->is_root)
6148                         rec->is_root = 1;
6149                 if (tmpl->content_checked)
6150                         rec->content_checked = 1;
6151                 if (tmpl->owner_ref_checked)
6152                         rec->owner_ref_checked = 1;
6153                 memcpy(&rec->parent_key, &tmpl->parent_key,
6154                                 sizeof(tmpl->parent_key));
6155                 if (tmpl->parent_generation)
6156                         rec->parent_generation = tmpl->parent_generation;
6157                 if (rec->max_size < tmpl->max_size)
6158                         rec->max_size = tmpl->max_size;
6159
6160                 /*
6161                  * A metadata extent can't cross stripe_len boundary, otherwise
6162                  * kernel scrub won't be able to handle it.
6163                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6164                  * it.
6165                  */
6166                 if (tmpl->metadata)
6167                         rec->crossing_stripes = check_crossing_stripes(
6168                                         global_info, rec->start,
6169                                         global_info->tree_root->nodesize);
6170                 check_extent_type(rec);
6171                 maybe_free_extent_rec(extent_cache, rec);
6172                 return ret;
6173         }
6174
6175         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6176
6177         return ret;
6178 }
6179
6180 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6181                             u64 parent, u64 root, int found_ref)
6182 {
6183         struct extent_record *rec;
6184         struct tree_backref *back;
6185         struct cache_extent *cache;
6186         int ret;
6187
6188         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6189         if (!cache) {
6190                 struct extent_record tmpl;
6191
6192                 memset(&tmpl, 0, sizeof(tmpl));
6193                 tmpl.start = bytenr;
6194                 tmpl.nr = 1;
6195                 tmpl.metadata = 1;
6196
6197                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6198                 if (ret)
6199                         return ret;
6200
6201                 /* really a bug in cache_extent implement now */
6202                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6203                 if (!cache)
6204                         return -ENOENT;
6205         }
6206
6207         rec = container_of(cache, struct extent_record, cache);
6208         if (rec->start != bytenr) {
6209                 /*
6210                  * Several cause, from unaligned bytenr to over lapping extents
6211                  */
6212                 return -EEXIST;
6213         }
6214
6215         back = find_tree_backref(rec, parent, root);
6216         if (!back) {
6217                 back = alloc_tree_backref(rec, parent, root);
6218                 if (!back)
6219                         return -ENOMEM;
6220         }
6221
6222         if (found_ref) {
6223                 if (back->node.found_ref) {
6224                         fprintf(stderr, "Extent back ref already exists "
6225                                 "for %llu parent %llu root %llu \n",
6226                                 (unsigned long long)bytenr,
6227                                 (unsigned long long)parent,
6228                                 (unsigned long long)root);
6229                 }
6230                 back->node.found_ref = 1;
6231         } else {
6232                 if (back->node.found_extent_tree) {
6233                         fprintf(stderr, "Extent back ref already exists "
6234                                 "for %llu parent %llu root %llu \n",
6235                                 (unsigned long long)bytenr,
6236                                 (unsigned long long)parent,
6237                                 (unsigned long long)root);
6238                 }
6239                 back->node.found_extent_tree = 1;
6240         }
6241         check_extent_type(rec);
6242         maybe_free_extent_rec(extent_cache, rec);
6243         return 0;
6244 }
6245
6246 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6247                             u64 parent, u64 root, u64 owner, u64 offset,
6248                             u32 num_refs, int found_ref, u64 max_size)
6249 {
6250         struct extent_record *rec;
6251         struct data_backref *back;
6252         struct cache_extent *cache;
6253         int ret;
6254
6255         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6256         if (!cache) {
6257                 struct extent_record tmpl;
6258
6259                 memset(&tmpl, 0, sizeof(tmpl));
6260                 tmpl.start = bytenr;
6261                 tmpl.nr = 1;
6262                 tmpl.max_size = max_size;
6263
6264                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6265                 if (ret)
6266                         return ret;
6267
6268                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6269                 if (!cache)
6270                         abort();
6271         }
6272
6273         rec = container_of(cache, struct extent_record, cache);
6274         if (rec->max_size < max_size)
6275                 rec->max_size = max_size;
6276
6277         /*
6278          * If found_ref is set then max_size is the real size and must match the
6279          * existing refs.  So if we have already found a ref then we need to
6280          * make sure that this ref matches the existing one, otherwise we need
6281          * to add a new backref so we can notice that the backrefs don't match
6282          * and we need to figure out who is telling the truth.  This is to
6283          * account for that awful fsync bug I introduced where we'd end up with
6284          * a btrfs_file_extent_item that would have its length include multiple
6285          * prealloc extents or point inside of a prealloc extent.
6286          */
6287         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6288                                  bytenr, max_size);
6289         if (!back) {
6290                 back = alloc_data_backref(rec, parent, root, owner, offset,
6291                                           max_size);
6292                 BUG_ON(!back);
6293         }
6294
6295         if (found_ref) {
6296                 BUG_ON(num_refs != 1);
6297                 if (back->node.found_ref)
6298                         BUG_ON(back->bytes != max_size);
6299                 back->node.found_ref = 1;
6300                 back->found_ref += 1;
6301                 back->bytes = max_size;
6302                 back->disk_bytenr = bytenr;
6303                 rec->refs += 1;
6304                 rec->content_checked = 1;
6305                 rec->owner_ref_checked = 1;
6306         } else {
6307                 if (back->node.found_extent_tree) {
6308                         fprintf(stderr, "Extent back ref already exists "
6309                                 "for %llu parent %llu root %llu "
6310                                 "owner %llu offset %llu num_refs %lu\n",
6311                                 (unsigned long long)bytenr,
6312                                 (unsigned long long)parent,
6313                                 (unsigned long long)root,
6314                                 (unsigned long long)owner,
6315                                 (unsigned long long)offset,
6316                                 (unsigned long)num_refs);
6317                 }
6318                 back->num_refs = num_refs;
6319                 back->node.found_extent_tree = 1;
6320         }
6321         maybe_free_extent_rec(extent_cache, rec);
6322         return 0;
6323 }
6324
6325 static int add_pending(struct cache_tree *pending,
6326                        struct cache_tree *seen, u64 bytenr, u32 size)
6327 {
6328         int ret;
6329         ret = add_cache_extent(seen, bytenr, size);
6330         if (ret)
6331                 return ret;
6332         add_cache_extent(pending, bytenr, size);
6333         return 0;
6334 }
6335
6336 static int pick_next_pending(struct cache_tree *pending,
6337                         struct cache_tree *reada,
6338                         struct cache_tree *nodes,
6339                         u64 last, struct block_info *bits, int bits_nr,
6340                         int *reada_bits)
6341 {
6342         unsigned long node_start = last;
6343         struct cache_extent *cache;
6344         int ret;
6345
6346         cache = search_cache_extent(reada, 0);
6347         if (cache) {
6348                 bits[0].start = cache->start;
6349                 bits[0].size = cache->size;
6350                 *reada_bits = 1;
6351                 return 1;
6352         }
6353         *reada_bits = 0;
6354         if (node_start > 32768)
6355                 node_start -= 32768;
6356
6357         cache = search_cache_extent(nodes, node_start);
6358         if (!cache)
6359                 cache = search_cache_extent(nodes, 0);
6360
6361         if (!cache) {
6362                  cache = search_cache_extent(pending, 0);
6363                  if (!cache)
6364                          return 0;
6365                  ret = 0;
6366                  do {
6367                          bits[ret].start = cache->start;
6368                          bits[ret].size = cache->size;
6369                          cache = next_cache_extent(cache);
6370                          ret++;
6371                  } while (cache && ret < bits_nr);
6372                  return ret;
6373         }
6374
6375         ret = 0;
6376         do {
6377                 bits[ret].start = cache->start;
6378                 bits[ret].size = cache->size;
6379                 cache = next_cache_extent(cache);
6380                 ret++;
6381         } while (cache && ret < bits_nr);
6382
6383         if (bits_nr - ret > 8) {
6384                 u64 lookup = bits[0].start + bits[0].size;
6385                 struct cache_extent *next;
6386                 next = search_cache_extent(pending, lookup);
6387                 while(next) {
6388                         if (next->start - lookup > 32768)
6389                                 break;
6390                         bits[ret].start = next->start;
6391                         bits[ret].size = next->size;
6392                         lookup = next->start + next->size;
6393                         ret++;
6394                         if (ret == bits_nr)
6395                                 break;
6396                         next = next_cache_extent(next);
6397                         if (!next)
6398                                 break;
6399                 }
6400         }
6401         return ret;
6402 }
6403
6404 static void free_chunk_record(struct cache_extent *cache)
6405 {
6406         struct chunk_record *rec;
6407
6408         rec = container_of(cache, struct chunk_record, cache);
6409         list_del_init(&rec->list);
6410         list_del_init(&rec->dextents);
6411         free(rec);
6412 }
6413
6414 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6415 {
6416         cache_tree_free_extents(chunk_cache, free_chunk_record);
6417 }
6418
6419 static void free_device_record(struct rb_node *node)
6420 {
6421         struct device_record *rec;
6422
6423         rec = container_of(node, struct device_record, node);
6424         free(rec);
6425 }
6426
6427 FREE_RB_BASED_TREE(device_cache, free_device_record);
6428
6429 int insert_block_group_record(struct block_group_tree *tree,
6430                               struct block_group_record *bg_rec)
6431 {
6432         int ret;
6433
6434         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6435         if (ret)
6436                 return ret;
6437
6438         list_add_tail(&bg_rec->list, &tree->block_groups);
6439         return 0;
6440 }
6441
6442 static void free_block_group_record(struct cache_extent *cache)
6443 {
6444         struct block_group_record *rec;
6445
6446         rec = container_of(cache, struct block_group_record, cache);
6447         list_del_init(&rec->list);
6448         free(rec);
6449 }
6450
6451 void free_block_group_tree(struct block_group_tree *tree)
6452 {
6453         cache_tree_free_extents(&tree->tree, free_block_group_record);
6454 }
6455
6456 int insert_device_extent_record(struct device_extent_tree *tree,
6457                                 struct device_extent_record *de_rec)
6458 {
6459         int ret;
6460
6461         /*
6462          * Device extent is a bit different from the other extents, because
6463          * the extents which belong to the different devices may have the
6464          * same start and size, so we need use the special extent cache
6465          * search/insert functions.
6466          */
6467         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6468         if (ret)
6469                 return ret;
6470
6471         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6472         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6473         return 0;
6474 }
6475
6476 static void free_device_extent_record(struct cache_extent *cache)
6477 {
6478         struct device_extent_record *rec;
6479
6480         rec = container_of(cache, struct device_extent_record, cache);
6481         if (!list_empty(&rec->chunk_list))
6482                 list_del_init(&rec->chunk_list);
6483         if (!list_empty(&rec->device_list))
6484                 list_del_init(&rec->device_list);
6485         free(rec);
6486 }
6487
6488 void free_device_extent_tree(struct device_extent_tree *tree)
6489 {
6490         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6491 }
6492
6493 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6494 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6495                                  struct extent_buffer *leaf, int slot)
6496 {
6497         struct btrfs_extent_ref_v0 *ref0;
6498         struct btrfs_key key;
6499         int ret;
6500
6501         btrfs_item_key_to_cpu(leaf, &key, slot);
6502         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6503         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6504                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6505                                 0, 0);
6506         } else {
6507                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6508                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6509         }
6510         return ret;
6511 }
6512 #endif
6513
6514 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6515                                             struct btrfs_key *key,
6516                                             int slot)
6517 {
6518         struct btrfs_chunk *ptr;
6519         struct chunk_record *rec;
6520         int num_stripes, i;
6521
6522         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6523         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6524
6525         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6526         if (!rec) {
6527                 fprintf(stderr, "memory allocation failed\n");
6528                 exit(-1);
6529         }
6530
6531         INIT_LIST_HEAD(&rec->list);
6532         INIT_LIST_HEAD(&rec->dextents);
6533         rec->bg_rec = NULL;
6534
6535         rec->cache.start = key->offset;
6536         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6537
6538         rec->generation = btrfs_header_generation(leaf);
6539
6540         rec->objectid = key->objectid;
6541         rec->type = key->type;
6542         rec->offset = key->offset;
6543
6544         rec->length = rec->cache.size;
6545         rec->owner = btrfs_chunk_owner(leaf, ptr);
6546         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6547         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6548         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6549         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6550         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6551         rec->num_stripes = num_stripes;
6552         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6553
6554         for (i = 0; i < rec->num_stripes; ++i) {
6555                 rec->stripes[i].devid =
6556                         btrfs_stripe_devid_nr(leaf, ptr, i);
6557                 rec->stripes[i].offset =
6558                         btrfs_stripe_offset_nr(leaf, ptr, i);
6559                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6560                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6561                                 BTRFS_UUID_SIZE);
6562         }
6563
6564         return rec;
6565 }
6566
6567 static int process_chunk_item(struct cache_tree *chunk_cache,
6568                               struct btrfs_key *key, struct extent_buffer *eb,
6569                               int slot)
6570 {
6571         struct chunk_record *rec;
6572         struct btrfs_chunk *chunk;
6573         int ret = 0;
6574
6575         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6576         /*
6577          * Do extra check for this chunk item,
6578          *
6579          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6580          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6581          * and owner<->key_type check.
6582          */
6583         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6584                                       key->offset);
6585         if (ret < 0) {
6586                 error("chunk(%llu, %llu) is not valid, ignore it",
6587                       key->offset, btrfs_chunk_length(eb, chunk));
6588                 return 0;
6589         }
6590         rec = btrfs_new_chunk_record(eb, key, slot);
6591         ret = insert_cache_extent(chunk_cache, &rec->cache);
6592         if (ret) {
6593                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6594                         rec->offset, rec->length);
6595                 free(rec);
6596         }
6597
6598         return ret;
6599 }
6600
6601 static int process_device_item(struct rb_root *dev_cache,
6602                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6603 {
6604         struct btrfs_dev_item *ptr;
6605         struct device_record *rec;
6606         int ret = 0;
6607
6608         ptr = btrfs_item_ptr(eb,
6609                 slot, struct btrfs_dev_item);
6610
6611         rec = malloc(sizeof(*rec));
6612         if (!rec) {
6613                 fprintf(stderr, "memory allocation failed\n");
6614                 return -ENOMEM;
6615         }
6616
6617         rec->devid = key->offset;
6618         rec->generation = btrfs_header_generation(eb);
6619
6620         rec->objectid = key->objectid;
6621         rec->type = key->type;
6622         rec->offset = key->offset;
6623
6624         rec->devid = btrfs_device_id(eb, ptr);
6625         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6626         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6627
6628         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6629         if (ret) {
6630                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6631                 free(rec);
6632         }
6633
6634         return ret;
6635 }
6636
6637 struct block_group_record *
6638 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6639                              int slot)
6640 {
6641         struct btrfs_block_group_item *ptr;
6642         struct block_group_record *rec;
6643
6644         rec = calloc(1, sizeof(*rec));
6645         if (!rec) {
6646                 fprintf(stderr, "memory allocation failed\n");
6647                 exit(-1);
6648         }
6649
6650         rec->cache.start = key->objectid;
6651         rec->cache.size = key->offset;
6652
6653         rec->generation = btrfs_header_generation(leaf);
6654
6655         rec->objectid = key->objectid;
6656         rec->type = key->type;
6657         rec->offset = key->offset;
6658
6659         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6660         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6661
6662         INIT_LIST_HEAD(&rec->list);
6663
6664         return rec;
6665 }
6666
6667 static int process_block_group_item(struct block_group_tree *block_group_cache,
6668                                     struct btrfs_key *key,
6669                                     struct extent_buffer *eb, int slot)
6670 {
6671         struct block_group_record *rec;
6672         int ret = 0;
6673
6674         rec = btrfs_new_block_group_record(eb, key, slot);
6675         ret = insert_block_group_record(block_group_cache, rec);
6676         if (ret) {
6677                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6678                         rec->objectid, rec->offset);
6679                 free(rec);
6680         }
6681
6682         return ret;
6683 }
6684
6685 struct device_extent_record *
6686 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6687                                struct btrfs_key *key, int slot)
6688 {
6689         struct device_extent_record *rec;
6690         struct btrfs_dev_extent *ptr;
6691
6692         rec = calloc(1, sizeof(*rec));
6693         if (!rec) {
6694                 fprintf(stderr, "memory allocation failed\n");
6695                 exit(-1);
6696         }
6697
6698         rec->cache.objectid = key->objectid;
6699         rec->cache.start = key->offset;
6700
6701         rec->generation = btrfs_header_generation(leaf);
6702
6703         rec->objectid = key->objectid;
6704         rec->type = key->type;
6705         rec->offset = key->offset;
6706
6707         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6708         rec->chunk_objecteid =
6709                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6710         rec->chunk_offset =
6711                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6712         rec->length = btrfs_dev_extent_length(leaf, ptr);
6713         rec->cache.size = rec->length;
6714
6715         INIT_LIST_HEAD(&rec->chunk_list);
6716         INIT_LIST_HEAD(&rec->device_list);
6717
6718         return rec;
6719 }
6720
6721 static int
6722 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6723                            struct btrfs_key *key, struct extent_buffer *eb,
6724                            int slot)
6725 {
6726         struct device_extent_record *rec;
6727         int ret;
6728
6729         rec = btrfs_new_device_extent_record(eb, key, slot);
6730         ret = insert_device_extent_record(dev_extent_cache, rec);
6731         if (ret) {
6732                 fprintf(stderr,
6733                         "Device extent[%llu, %llu, %llu] existed.\n",
6734                         rec->objectid, rec->offset, rec->length);
6735                 free(rec);
6736         }
6737
6738         return ret;
6739 }
6740
6741 static int process_extent_item(struct btrfs_root *root,
6742                                struct cache_tree *extent_cache,
6743                                struct extent_buffer *eb, int slot)
6744 {
6745         struct btrfs_extent_item *ei;
6746         struct btrfs_extent_inline_ref *iref;
6747         struct btrfs_extent_data_ref *dref;
6748         struct btrfs_shared_data_ref *sref;
6749         struct btrfs_key key;
6750         struct extent_record tmpl;
6751         unsigned long end;
6752         unsigned long ptr;
6753         int ret;
6754         int type;
6755         u32 item_size = btrfs_item_size_nr(eb, slot);
6756         u64 refs = 0;
6757         u64 offset;
6758         u64 num_bytes;
6759         int metadata = 0;
6760
6761         btrfs_item_key_to_cpu(eb, &key, slot);
6762
6763         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6764                 metadata = 1;
6765                 num_bytes = root->nodesize;
6766         } else {
6767                 num_bytes = key.offset;
6768         }
6769
6770         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6771                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6772                       key.objectid, root->sectorsize);
6773                 return -EIO;
6774         }
6775         if (item_size < sizeof(*ei)) {
6776 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6777                 struct btrfs_extent_item_v0 *ei0;
6778                 BUG_ON(item_size != sizeof(*ei0));
6779                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6780                 refs = btrfs_extent_refs_v0(eb, ei0);
6781 #else
6782                 BUG();
6783 #endif
6784                 memset(&tmpl, 0, sizeof(tmpl));
6785                 tmpl.start = key.objectid;
6786                 tmpl.nr = num_bytes;
6787                 tmpl.extent_item_refs = refs;
6788                 tmpl.metadata = metadata;
6789                 tmpl.found_rec = 1;
6790                 tmpl.max_size = num_bytes;
6791
6792                 return add_extent_rec(extent_cache, &tmpl);
6793         }
6794
6795         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6796         refs = btrfs_extent_refs(eb, ei);
6797         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6798                 metadata = 1;
6799         else
6800                 metadata = 0;
6801         if (metadata && num_bytes != root->nodesize) {
6802                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6803                       num_bytes, root->nodesize);
6804                 return -EIO;
6805         }
6806         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6807                 error("ignore invalid data extent, length %llu is not aligned to %u",
6808                       num_bytes, root->sectorsize);
6809                 return -EIO;
6810         }
6811
6812         memset(&tmpl, 0, sizeof(tmpl));
6813         tmpl.start = key.objectid;
6814         tmpl.nr = num_bytes;
6815         tmpl.extent_item_refs = refs;
6816         tmpl.metadata = metadata;
6817         tmpl.found_rec = 1;
6818         tmpl.max_size = num_bytes;
6819         add_extent_rec(extent_cache, &tmpl);
6820
6821         ptr = (unsigned long)(ei + 1);
6822         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6823             key.type == BTRFS_EXTENT_ITEM_KEY)
6824                 ptr += sizeof(struct btrfs_tree_block_info);
6825
6826         end = (unsigned long)ei + item_size;
6827         while (ptr < end) {
6828                 iref = (struct btrfs_extent_inline_ref *)ptr;
6829                 type = btrfs_extent_inline_ref_type(eb, iref);
6830                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6831                 switch (type) {
6832                 case BTRFS_TREE_BLOCK_REF_KEY:
6833                         ret = add_tree_backref(extent_cache, key.objectid,
6834                                         0, offset, 0);
6835                         if (ret < 0)
6836                                 error(
6837                         "add_tree_backref failed (extent items tree block): %s",
6838                                       strerror(-ret));
6839                         break;
6840                 case BTRFS_SHARED_BLOCK_REF_KEY:
6841                         ret = add_tree_backref(extent_cache, key.objectid,
6842                                         offset, 0, 0);
6843                         if (ret < 0)
6844                                 error(
6845                         "add_tree_backref failed (extent items shared block): %s",
6846                                       strerror(-ret));
6847                         break;
6848                 case BTRFS_EXTENT_DATA_REF_KEY:
6849                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6850                         add_data_backref(extent_cache, key.objectid, 0,
6851                                         btrfs_extent_data_ref_root(eb, dref),
6852                                         btrfs_extent_data_ref_objectid(eb,
6853                                                                        dref),
6854                                         btrfs_extent_data_ref_offset(eb, dref),
6855                                         btrfs_extent_data_ref_count(eb, dref),
6856                                         0, num_bytes);
6857                         break;
6858                 case BTRFS_SHARED_DATA_REF_KEY:
6859                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6860                         add_data_backref(extent_cache, key.objectid, offset,
6861                                         0, 0, 0,
6862                                         btrfs_shared_data_ref_count(eb, sref),
6863                                         0, num_bytes);
6864                         break;
6865                 default:
6866                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6867                                 key.objectid, key.type, num_bytes);
6868                         goto out;
6869                 }
6870                 ptr += btrfs_extent_inline_ref_size(type);
6871         }
6872         WARN_ON(ptr > end);
6873 out:
6874         return 0;
6875 }
6876
6877 static int check_cache_range(struct btrfs_root *root,
6878                              struct btrfs_block_group_cache *cache,
6879                              u64 offset, u64 bytes)
6880 {
6881         struct btrfs_free_space *entry;
6882         u64 *logical;
6883         u64 bytenr;
6884         int stripe_len;
6885         int i, nr, ret;
6886
6887         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6888                 bytenr = btrfs_sb_offset(i);
6889                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6890                                        cache->key.objectid, bytenr, 0,
6891                                        &logical, &nr, &stripe_len);
6892                 if (ret)
6893                         return ret;
6894
6895                 while (nr--) {
6896                         if (logical[nr] + stripe_len <= offset)
6897                                 continue;
6898                         if (offset + bytes <= logical[nr])
6899                                 continue;
6900                         if (logical[nr] == offset) {
6901                                 if (stripe_len >= bytes) {
6902                                         free(logical);
6903                                         return 0;
6904                                 }
6905                                 bytes -= stripe_len;
6906                                 offset += stripe_len;
6907                         } else if (logical[nr] < offset) {
6908                                 if (logical[nr] + stripe_len >=
6909                                     offset + bytes) {
6910                                         free(logical);
6911                                         return 0;
6912                                 }
6913                                 bytes = (offset + bytes) -
6914                                         (logical[nr] + stripe_len);
6915                                 offset = logical[nr] + stripe_len;
6916                         } else {
6917                                 /*
6918                                  * Could be tricky, the super may land in the
6919                                  * middle of the area we're checking.  First
6920                                  * check the easiest case, it's at the end.
6921                                  */
6922                                 if (logical[nr] + stripe_len >=
6923                                     bytes + offset) {
6924                                         bytes = logical[nr] - offset;
6925                                         continue;
6926                                 }
6927
6928                                 /* Check the left side */
6929                                 ret = check_cache_range(root, cache,
6930                                                         offset,
6931                                                         logical[nr] - offset);
6932                                 if (ret) {
6933                                         free(logical);
6934                                         return ret;
6935                                 }
6936
6937                                 /* Now we continue with the right side */
6938                                 bytes = (offset + bytes) -
6939                                         (logical[nr] + stripe_len);
6940                                 offset = logical[nr] + stripe_len;
6941                         }
6942                 }
6943
6944                 free(logical);
6945         }
6946
6947         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6948         if (!entry) {
6949                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6950                         offset, offset+bytes);
6951                 return -EINVAL;
6952         }
6953
6954         if (entry->offset != offset) {
6955                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6956                         entry->offset);
6957                 return -EINVAL;
6958         }
6959
6960         if (entry->bytes != bytes) {
6961                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6962                         bytes, entry->bytes, offset);
6963                 return -EINVAL;
6964         }
6965
6966         unlink_free_space(cache->free_space_ctl, entry);
6967         free(entry);
6968         return 0;
6969 }
6970
6971 static int verify_space_cache(struct btrfs_root *root,
6972                               struct btrfs_block_group_cache *cache)
6973 {
6974         struct btrfs_path path;
6975         struct extent_buffer *leaf;
6976         struct btrfs_key key;
6977         u64 last;
6978         int ret = 0;
6979
6980         root = root->fs_info->extent_root;
6981
6982         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6983
6984         btrfs_init_path(&path);
6985         key.objectid = last;
6986         key.offset = 0;
6987         key.type = BTRFS_EXTENT_ITEM_KEY;
6988         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6989         if (ret < 0)
6990                 goto out;
6991         ret = 0;
6992         while (1) {
6993                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6994                         ret = btrfs_next_leaf(root, &path);
6995                         if (ret < 0)
6996                                 goto out;
6997                         if (ret > 0) {
6998                                 ret = 0;
6999                                 break;
7000                         }
7001                 }
7002                 leaf = path.nodes[0];
7003                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7004                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7005                         break;
7006                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7007                     key.type != BTRFS_METADATA_ITEM_KEY) {
7008                         path.slots[0]++;
7009                         continue;
7010                 }
7011
7012                 if (last == key.objectid) {
7013                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7014                                 last = key.objectid + key.offset;
7015                         else
7016                                 last = key.objectid + root->nodesize;
7017                         path.slots[0]++;
7018                         continue;
7019                 }
7020
7021                 ret = check_cache_range(root, cache, last,
7022                                         key.objectid - last);
7023                 if (ret)
7024                         break;
7025                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7026                         last = key.objectid + key.offset;
7027                 else
7028                         last = key.objectid + root->nodesize;
7029                 path.slots[0]++;
7030         }
7031
7032         if (last < cache->key.objectid + cache->key.offset)
7033                 ret = check_cache_range(root, cache, last,
7034                                         cache->key.objectid +
7035                                         cache->key.offset - last);
7036
7037 out:
7038         btrfs_release_path(&path);
7039
7040         if (!ret &&
7041             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7042                 fprintf(stderr, "There are still entries left in the space "
7043                         "cache\n");
7044                 ret = -EINVAL;
7045         }
7046
7047         return ret;
7048 }
7049
7050 static int check_space_cache(struct btrfs_root *root)
7051 {
7052         struct btrfs_block_group_cache *cache;
7053         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7054         int ret;
7055         int error = 0;
7056
7057         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7058             btrfs_super_generation(root->fs_info->super_copy) !=
7059             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7060                 printf("cache and super generation don't match, space cache "
7061                        "will be invalidated\n");
7062                 return 0;
7063         }
7064
7065         if (ctx.progress_enabled) {
7066                 ctx.tp = TASK_FREE_SPACE;
7067                 task_start(ctx.info);
7068         }
7069
7070         while (1) {
7071                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7072                 if (!cache)
7073                         break;
7074
7075                 start = cache->key.objectid + cache->key.offset;
7076                 if (!cache->free_space_ctl) {
7077                         if (btrfs_init_free_space_ctl(cache,
7078                                                       root->sectorsize)) {
7079                                 ret = -ENOMEM;
7080                                 break;
7081                         }
7082                 } else {
7083                         btrfs_remove_free_space_cache(cache);
7084                 }
7085
7086                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7087                         ret = exclude_super_stripes(root, cache);
7088                         if (ret) {
7089                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7090                                         strerror(-ret));
7091                                 error++;
7092                                 continue;
7093                         }
7094                         ret = load_free_space_tree(root->fs_info, cache);
7095                         free_excluded_extents(root, cache);
7096                         if (ret < 0) {
7097                                 fprintf(stderr, "could not load free space tree: %s\n",
7098                                         strerror(-ret));
7099                                 error++;
7100                                 continue;
7101                         }
7102                         error += ret;
7103                 } else {
7104                         ret = load_free_space_cache(root->fs_info, cache);
7105                         if (!ret)
7106                                 continue;
7107                 }
7108
7109                 ret = verify_space_cache(root, cache);
7110                 if (ret) {
7111                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7112                                 cache->key.objectid);
7113                         error++;
7114                 }
7115         }
7116
7117         task_stop(ctx.info);
7118
7119         return error ? -EINVAL : 0;
7120 }
7121
7122 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7123                         u64 num_bytes, unsigned long leaf_offset,
7124                         struct extent_buffer *eb) {
7125
7126         u64 offset = 0;
7127         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7128         char *data;
7129         unsigned long csum_offset;
7130         u32 csum;
7131         u32 csum_expected;
7132         u64 read_len;
7133         u64 data_checked = 0;
7134         u64 tmp;
7135         int ret = 0;
7136         int mirror;
7137         int num_copies;
7138
7139         if (num_bytes % root->sectorsize)
7140                 return -EINVAL;
7141
7142         data = malloc(num_bytes);
7143         if (!data)
7144                 return -ENOMEM;
7145
7146         while (offset < num_bytes) {
7147                 mirror = 0;
7148 again:
7149                 read_len = num_bytes - offset;
7150                 /* read as much space once a time */
7151                 ret = read_extent_data(root, data + offset,
7152                                 bytenr + offset, &read_len, mirror);
7153                 if (ret)
7154                         goto out;
7155                 data_checked = 0;
7156                 /* verify every 4k data's checksum */
7157                 while (data_checked < read_len) {
7158                         csum = ~(u32)0;
7159                         tmp = offset + data_checked;
7160
7161                         csum = btrfs_csum_data((char *)data + tmp,
7162                                                csum, root->sectorsize);
7163                         btrfs_csum_final(csum, (u8 *)&csum);
7164
7165                         csum_offset = leaf_offset +
7166                                  tmp / root->sectorsize * csum_size;
7167                         read_extent_buffer(eb, (char *)&csum_expected,
7168                                            csum_offset, csum_size);
7169                         /* try another mirror */
7170                         if (csum != csum_expected) {
7171                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7172                                                 mirror, bytenr + tmp,
7173                                                 csum, csum_expected);
7174                                 num_copies = btrfs_num_copies(
7175                                                 &root->fs_info->mapping_tree,
7176                                                 bytenr, num_bytes);
7177                                 if (mirror < num_copies - 1) {
7178                                         mirror += 1;
7179                                         goto again;
7180                                 }
7181                         }
7182                         data_checked += root->sectorsize;
7183                 }
7184                 offset += read_len;
7185         }
7186 out:
7187         free(data);
7188         return ret;
7189 }
7190
7191 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7192                                u64 num_bytes)
7193 {
7194         struct btrfs_path path;
7195         struct extent_buffer *leaf;
7196         struct btrfs_key key;
7197         int ret;
7198
7199         btrfs_init_path(&path);
7200         key.objectid = bytenr;
7201         key.type = BTRFS_EXTENT_ITEM_KEY;
7202         key.offset = (u64)-1;
7203
7204 again:
7205         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7206                                 0, 0);
7207         if (ret < 0) {
7208                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7209                 btrfs_release_path(&path);
7210                 return ret;
7211         } else if (ret) {
7212                 if (path.slots[0] > 0) {
7213                         path.slots[0]--;
7214                 } else {
7215                         ret = btrfs_prev_leaf(root, &path);
7216                         if (ret < 0) {
7217                                 goto out;
7218                         } else if (ret > 0) {
7219                                 ret = 0;
7220                                 goto out;
7221                         }
7222                 }
7223         }
7224
7225         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7226
7227         /*
7228          * Block group items come before extent items if they have the same
7229          * bytenr, so walk back one more just in case.  Dear future traveller,
7230          * first congrats on mastering time travel.  Now if it's not too much
7231          * trouble could you go back to 2006 and tell Chris to make the
7232          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7233          * EXTENT_ITEM_KEY please?
7234          */
7235         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7236                 if (path.slots[0] > 0) {
7237                         path.slots[0]--;
7238                 } else {
7239                         ret = btrfs_prev_leaf(root, &path);
7240                         if (ret < 0) {
7241                                 goto out;
7242                         } else if (ret > 0) {
7243                                 ret = 0;
7244                                 goto out;
7245                         }
7246                 }
7247                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7248         }
7249
7250         while (num_bytes) {
7251                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7252                         ret = btrfs_next_leaf(root, &path);
7253                         if (ret < 0) {
7254                                 fprintf(stderr, "Error going to next leaf "
7255                                         "%d\n", ret);
7256                                 btrfs_release_path(&path);
7257                                 return ret;
7258                         } else if (ret) {
7259                                 break;
7260                         }
7261                 }
7262                 leaf = path.nodes[0];
7263                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7264                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7265                         path.slots[0]++;
7266                         continue;
7267                 }
7268                 if (key.objectid + key.offset < bytenr) {
7269                         path.slots[0]++;
7270                         continue;
7271                 }
7272                 if (key.objectid > bytenr + num_bytes)
7273                         break;
7274
7275                 if (key.objectid == bytenr) {
7276                         if (key.offset >= num_bytes) {
7277                                 num_bytes = 0;
7278                                 break;
7279                         }
7280                         num_bytes -= key.offset;
7281                         bytenr += key.offset;
7282                 } else if (key.objectid < bytenr) {
7283                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7284                                 num_bytes = 0;
7285                                 break;
7286                         }
7287                         num_bytes = (bytenr + num_bytes) -
7288                                 (key.objectid + key.offset);
7289                         bytenr = key.objectid + key.offset;
7290                 } else {
7291                         if (key.objectid + key.offset < bytenr + num_bytes) {
7292                                 u64 new_start = key.objectid + key.offset;
7293                                 u64 new_bytes = bytenr + num_bytes - new_start;
7294
7295                                 /*
7296                                  * Weird case, the extent is in the middle of
7297                                  * our range, we'll have to search one side
7298                                  * and then the other.  Not sure if this happens
7299                                  * in real life, but no harm in coding it up
7300                                  * anyway just in case.
7301                                  */
7302                                 btrfs_release_path(&path);
7303                                 ret = check_extent_exists(root, new_start,
7304                                                           new_bytes);
7305                                 if (ret) {
7306                                         fprintf(stderr, "Right section didn't "
7307                                                 "have a record\n");
7308                                         break;
7309                                 }
7310                                 num_bytes = key.objectid - bytenr;
7311                                 goto again;
7312                         }
7313                         num_bytes = key.objectid - bytenr;
7314                 }
7315                 path.slots[0]++;
7316         }
7317         ret = 0;
7318
7319 out:
7320         if (num_bytes && !ret) {
7321                 fprintf(stderr, "There are no extents for csum range "
7322                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7323                 ret = 1;
7324         }
7325
7326         btrfs_release_path(&path);
7327         return ret;
7328 }
7329
7330 static int check_csums(struct btrfs_root *root)
7331 {
7332         struct btrfs_path path;
7333         struct extent_buffer *leaf;
7334         struct btrfs_key key;
7335         u64 offset = 0, num_bytes = 0;
7336         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7337         int errors = 0;
7338         int ret;
7339         u64 data_len;
7340         unsigned long leaf_offset;
7341
7342         root = root->fs_info->csum_root;
7343         if (!extent_buffer_uptodate(root->node)) {
7344                 fprintf(stderr, "No valid csum tree found\n");
7345                 return -ENOENT;
7346         }
7347
7348         btrfs_init_path(&path);
7349         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7350         key.type = BTRFS_EXTENT_CSUM_KEY;
7351         key.offset = 0;
7352         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7353         if (ret < 0) {
7354                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7355                 btrfs_release_path(&path);
7356                 return ret;
7357         }
7358
7359         if (ret > 0 && path.slots[0])
7360                 path.slots[0]--;
7361         ret = 0;
7362
7363         while (1) {
7364                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7365                         ret = btrfs_next_leaf(root, &path);
7366                         if (ret < 0) {
7367                                 fprintf(stderr, "Error going to next leaf "
7368                                         "%d\n", ret);
7369                                 break;
7370                         }
7371                         if (ret)
7372                                 break;
7373                 }
7374                 leaf = path.nodes[0];
7375
7376                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7377                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7378                         path.slots[0]++;
7379                         continue;
7380                 }
7381
7382                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7383                               csum_size) * root->sectorsize;
7384                 if (!check_data_csum)
7385                         goto skip_csum_check;
7386                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7387                 ret = check_extent_csums(root, key.offset, data_len,
7388                                          leaf_offset, leaf);
7389                 if (ret)
7390                         break;
7391 skip_csum_check:
7392                 if (!num_bytes) {
7393                         offset = key.offset;
7394                 } else if (key.offset != offset + num_bytes) {
7395                         ret = check_extent_exists(root, offset, num_bytes);
7396                         if (ret) {
7397                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7398                                         "there is no extent record\n",
7399                                         offset, offset+num_bytes);
7400                                 errors++;
7401                         }
7402                         offset = key.offset;
7403                         num_bytes = 0;
7404                 }
7405                 num_bytes += data_len;
7406                 path.slots[0]++;
7407         }
7408
7409         btrfs_release_path(&path);
7410         return errors;
7411 }
7412
7413 static int is_dropped_key(struct btrfs_key *key,
7414                           struct btrfs_key *drop_key) {
7415         if (key->objectid < drop_key->objectid)
7416                 return 1;
7417         else if (key->objectid == drop_key->objectid) {
7418                 if (key->type < drop_key->type)
7419                         return 1;
7420                 else if (key->type == drop_key->type) {
7421                         if (key->offset < drop_key->offset)
7422                                 return 1;
7423                 }
7424         }
7425         return 0;
7426 }
7427
7428 /*
7429  * Here are the rules for FULL_BACKREF.
7430  *
7431  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7432  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7433  *      FULL_BACKREF set.
7434  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7435  *    if it happened after the relocation occurred since we'll have dropped the
7436  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7437  *    have no real way to know for sure.
7438  *
7439  * We process the blocks one root at a time, and we start from the lowest root
7440  * objectid and go to the highest.  So we can just lookup the owner backref for
7441  * the record and if we don't find it then we know it doesn't exist and we have
7442  * a FULL BACKREF.
7443  *
7444  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7445  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7446  * be set or not and then we can check later once we've gathered all the refs.
7447  */
7448 static int calc_extent_flag(struct cache_tree *extent_cache,
7449                            struct extent_buffer *buf,
7450                            struct root_item_record *ri,
7451                            u64 *flags)
7452 {
7453         struct extent_record *rec;
7454         struct cache_extent *cache;
7455         struct tree_backref *tback;
7456         u64 owner = 0;
7457
7458         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7459         /* we have added this extent before */
7460         if (!cache)
7461                 return -ENOENT;
7462
7463         rec = container_of(cache, struct extent_record, cache);
7464
7465         /*
7466          * Except file/reloc tree, we can not have
7467          * FULL BACKREF MODE
7468          */
7469         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7470                 goto normal;
7471         /*
7472          * root node
7473          */
7474         if (buf->start == ri->bytenr)
7475                 goto normal;
7476
7477         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7478                 goto full_backref;
7479
7480         owner = btrfs_header_owner(buf);
7481         if (owner == ri->objectid)
7482                 goto normal;
7483
7484         tback = find_tree_backref(rec, 0, owner);
7485         if (!tback)
7486                 goto full_backref;
7487 normal:
7488         *flags = 0;
7489         if (rec->flag_block_full_backref != FLAG_UNSET &&
7490             rec->flag_block_full_backref != 0)
7491                 rec->bad_full_backref = 1;
7492         return 0;
7493 full_backref:
7494         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7495         if (rec->flag_block_full_backref != FLAG_UNSET &&
7496             rec->flag_block_full_backref != 1)
7497                 rec->bad_full_backref = 1;
7498         return 0;
7499 }
7500
7501 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7502 {
7503         fprintf(stderr, "Invalid key type(");
7504         print_key_type(stderr, 0, key_type);
7505         fprintf(stderr, ") found in root(");
7506         print_objectid(stderr, rootid, 0);
7507         fprintf(stderr, ")\n");
7508 }
7509
7510 /*
7511  * Check if the key is valid with its extent buffer.
7512  *
7513  * This is a early check in case invalid key exists in a extent buffer
7514  * This is not comprehensive yet, but should prevent wrong key/item passed
7515  * further
7516  */
7517 static int check_type_with_root(u64 rootid, u8 key_type)
7518 {
7519         switch (key_type) {
7520         /* Only valid in chunk tree */
7521         case BTRFS_DEV_ITEM_KEY:
7522         case BTRFS_CHUNK_ITEM_KEY:
7523                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7524                         goto err;
7525                 break;
7526         /* valid in csum and log tree */
7527         case BTRFS_CSUM_TREE_OBJECTID:
7528                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7529                       is_fstree(rootid)))
7530                         goto err;
7531                 break;
7532         case BTRFS_EXTENT_ITEM_KEY:
7533         case BTRFS_METADATA_ITEM_KEY:
7534         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7535                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7536                         goto err;
7537                 break;
7538         case BTRFS_ROOT_ITEM_KEY:
7539                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7540                         goto err;
7541                 break;
7542         case BTRFS_DEV_EXTENT_KEY:
7543                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7544                         goto err;
7545                 break;
7546         }
7547         return 0;
7548 err:
7549         report_mismatch_key_root(key_type, rootid);
7550         return -EINVAL;
7551 }
7552
7553 static int run_next_block(struct btrfs_root *root,
7554                           struct block_info *bits,
7555                           int bits_nr,
7556                           u64 *last,
7557                           struct cache_tree *pending,
7558                           struct cache_tree *seen,
7559                           struct cache_tree *reada,
7560                           struct cache_tree *nodes,
7561                           struct cache_tree *extent_cache,
7562                           struct cache_tree *chunk_cache,
7563                           struct rb_root *dev_cache,
7564                           struct block_group_tree *block_group_cache,
7565                           struct device_extent_tree *dev_extent_cache,
7566                           struct root_item_record *ri)
7567 {
7568         struct extent_buffer *buf;
7569         struct extent_record *rec = NULL;
7570         u64 bytenr;
7571         u32 size;
7572         u64 parent;
7573         u64 owner;
7574         u64 flags;
7575         u64 ptr;
7576         u64 gen = 0;
7577         int ret = 0;
7578         int i;
7579         int nritems;
7580         struct btrfs_key key;
7581         struct cache_extent *cache;
7582         int reada_bits;
7583
7584         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7585                                     bits_nr, &reada_bits);
7586         if (nritems == 0)
7587                 return 1;
7588
7589         if (!reada_bits) {
7590                 for(i = 0; i < nritems; i++) {
7591                         ret = add_cache_extent(reada, bits[i].start,
7592                                                bits[i].size);
7593                         if (ret == -EEXIST)
7594                                 continue;
7595
7596                         /* fixme, get the parent transid */
7597                         readahead_tree_block(root, bits[i].start,
7598                                              bits[i].size, 0);
7599                 }
7600         }
7601         *last = bits[0].start;
7602         bytenr = bits[0].start;
7603         size = bits[0].size;
7604
7605         cache = lookup_cache_extent(pending, bytenr, size);
7606         if (cache) {
7607                 remove_cache_extent(pending, cache);
7608                 free(cache);
7609         }
7610         cache = lookup_cache_extent(reada, bytenr, size);
7611         if (cache) {
7612                 remove_cache_extent(reada, cache);
7613                 free(cache);
7614         }
7615         cache = lookup_cache_extent(nodes, bytenr, size);
7616         if (cache) {
7617                 remove_cache_extent(nodes, cache);
7618                 free(cache);
7619         }
7620         cache = lookup_cache_extent(extent_cache, bytenr, size);
7621         if (cache) {
7622                 rec = container_of(cache, struct extent_record, cache);
7623                 gen = rec->parent_generation;
7624         }
7625
7626         /* fixme, get the real parent transid */
7627         buf = read_tree_block(root, bytenr, size, gen);
7628         if (!extent_buffer_uptodate(buf)) {
7629                 record_bad_block_io(root->fs_info,
7630                                     extent_cache, bytenr, size);
7631                 goto out;
7632         }
7633
7634         nritems = btrfs_header_nritems(buf);
7635
7636         flags = 0;
7637         if (!init_extent_tree) {
7638                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7639                                        btrfs_header_level(buf), 1, NULL,
7640                                        &flags);
7641                 if (ret < 0) {
7642                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7643                         if (ret < 0) {
7644                                 fprintf(stderr, "Couldn't calc extent flags\n");
7645                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7646                         }
7647                 }
7648         } else {
7649                 flags = 0;
7650                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7651                 if (ret < 0) {
7652                         fprintf(stderr, "Couldn't calc extent flags\n");
7653                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7654                 }
7655         }
7656
7657         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7658                 if (ri != NULL &&
7659                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7660                     ri->objectid == btrfs_header_owner(buf)) {
7661                         /*
7662                          * Ok we got to this block from it's original owner and
7663                          * we have FULL_BACKREF set.  Relocation can leave
7664                          * converted blocks over so this is altogether possible,
7665                          * however it's not possible if the generation > the
7666                          * last snapshot, so check for this case.
7667                          */
7668                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7669                             btrfs_header_generation(buf) > ri->last_snapshot) {
7670                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7671                                 rec->bad_full_backref = 1;
7672                         }
7673                 }
7674         } else {
7675                 if (ri != NULL &&
7676                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7677                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7678                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7679                         rec->bad_full_backref = 1;
7680                 }
7681         }
7682
7683         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7684                 rec->flag_block_full_backref = 1;
7685                 parent = bytenr;
7686                 owner = 0;
7687         } else {
7688                 rec->flag_block_full_backref = 0;
7689                 parent = 0;
7690                 owner = btrfs_header_owner(buf);
7691         }
7692
7693         ret = check_block(root, extent_cache, buf, flags);
7694         if (ret)
7695                 goto out;
7696
7697         if (btrfs_is_leaf(buf)) {
7698                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7699                 for (i = 0; i < nritems; i++) {
7700                         struct btrfs_file_extent_item *fi;
7701                         btrfs_item_key_to_cpu(buf, &key, i);
7702                         /*
7703                          * Check key type against the leaf owner.
7704                          * Could filter quite a lot of early error if
7705                          * owner is correct
7706                          */
7707                         if (check_type_with_root(btrfs_header_owner(buf),
7708                                                  key.type)) {
7709                                 fprintf(stderr, "ignoring invalid key\n");
7710                                 continue;
7711                         }
7712                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7713                                 process_extent_item(root, extent_cache, buf,
7714                                                     i);
7715                                 continue;
7716                         }
7717                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7718                                 process_extent_item(root, extent_cache, buf,
7719                                                     i);
7720                                 continue;
7721                         }
7722                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7723                                 total_csum_bytes +=
7724                                         btrfs_item_size_nr(buf, i);
7725                                 continue;
7726                         }
7727                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7728                                 process_chunk_item(chunk_cache, &key, buf, i);
7729                                 continue;
7730                         }
7731                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7732                                 process_device_item(dev_cache, &key, buf, i);
7733                                 continue;
7734                         }
7735                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7736                                 process_block_group_item(block_group_cache,
7737                                         &key, buf, i);
7738                                 continue;
7739                         }
7740                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7741                                 process_device_extent_item(dev_extent_cache,
7742                                         &key, buf, i);
7743                                 continue;
7744
7745                         }
7746                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7747 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7748                                 process_extent_ref_v0(extent_cache, buf, i);
7749 #else
7750                                 BUG();
7751 #endif
7752                                 continue;
7753                         }
7754
7755                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7756                                 ret = add_tree_backref(extent_cache,
7757                                                 key.objectid, 0, key.offset, 0);
7758                                 if (ret < 0)
7759                                         error(
7760                                 "add_tree_backref failed (leaf tree block): %s",
7761                                               strerror(-ret));
7762                                 continue;
7763                         }
7764                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7765                                 ret = add_tree_backref(extent_cache,
7766                                                 key.objectid, key.offset, 0, 0);
7767                                 if (ret < 0)
7768                                         error(
7769                                 "add_tree_backref failed (leaf shared block): %s",
7770                                               strerror(-ret));
7771                                 continue;
7772                         }
7773                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7774                                 struct btrfs_extent_data_ref *ref;
7775                                 ref = btrfs_item_ptr(buf, i,
7776                                                 struct btrfs_extent_data_ref);
7777                                 add_data_backref(extent_cache,
7778                                         key.objectid, 0,
7779                                         btrfs_extent_data_ref_root(buf, ref),
7780                                         btrfs_extent_data_ref_objectid(buf,
7781                                                                        ref),
7782                                         btrfs_extent_data_ref_offset(buf, ref),
7783                                         btrfs_extent_data_ref_count(buf, ref),
7784                                         0, root->sectorsize);
7785                                 continue;
7786                         }
7787                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7788                                 struct btrfs_shared_data_ref *ref;
7789                                 ref = btrfs_item_ptr(buf, i,
7790                                                 struct btrfs_shared_data_ref);
7791                                 add_data_backref(extent_cache,
7792                                         key.objectid, key.offset, 0, 0, 0,
7793                                         btrfs_shared_data_ref_count(buf, ref),
7794                                         0, root->sectorsize);
7795                                 continue;
7796                         }
7797                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7798                                 struct bad_item *bad;
7799
7800                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7801                                         continue;
7802                                 if (!owner)
7803                                         continue;
7804                                 bad = malloc(sizeof(struct bad_item));
7805                                 if (!bad)
7806                                         continue;
7807                                 INIT_LIST_HEAD(&bad->list);
7808                                 memcpy(&bad->key, &key,
7809                                        sizeof(struct btrfs_key));
7810                                 bad->root_id = owner;
7811                                 list_add_tail(&bad->list, &delete_items);
7812                                 continue;
7813                         }
7814                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7815                                 continue;
7816                         fi = btrfs_item_ptr(buf, i,
7817                                             struct btrfs_file_extent_item);
7818                         if (btrfs_file_extent_type(buf, fi) ==
7819                             BTRFS_FILE_EXTENT_INLINE)
7820                                 continue;
7821                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7822                                 continue;
7823
7824                         data_bytes_allocated +=
7825                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7826                         if (data_bytes_allocated < root->sectorsize) {
7827                                 abort();
7828                         }
7829                         data_bytes_referenced +=
7830                                 btrfs_file_extent_num_bytes(buf, fi);
7831                         add_data_backref(extent_cache,
7832                                 btrfs_file_extent_disk_bytenr(buf, fi),
7833                                 parent, owner, key.objectid, key.offset -
7834                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7835                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7836                 }
7837         } else {
7838                 int level;
7839                 struct btrfs_key first_key;
7840
7841                 first_key.objectid = 0;
7842
7843                 if (nritems > 0)
7844                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7845                 level = btrfs_header_level(buf);
7846                 for (i = 0; i < nritems; i++) {
7847                         struct extent_record tmpl;
7848
7849                         ptr = btrfs_node_blockptr(buf, i);
7850                         size = root->nodesize;
7851                         btrfs_node_key_to_cpu(buf, &key, i);
7852                         if (ri != NULL) {
7853                                 if ((level == ri->drop_level)
7854                                     && is_dropped_key(&key, &ri->drop_key)) {
7855                                         continue;
7856                                 }
7857                         }
7858
7859                         memset(&tmpl, 0, sizeof(tmpl));
7860                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7861                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7862                         tmpl.start = ptr;
7863                         tmpl.nr = size;
7864                         tmpl.refs = 1;
7865                         tmpl.metadata = 1;
7866                         tmpl.max_size = size;
7867                         ret = add_extent_rec(extent_cache, &tmpl);
7868                         if (ret < 0)
7869                                 goto out;
7870
7871                         ret = add_tree_backref(extent_cache, ptr, parent,
7872                                         owner, 1);
7873                         if (ret < 0) {
7874                                 error(
7875                                 "add_tree_backref failed (non-leaf block): %s",
7876                                       strerror(-ret));
7877                                 continue;
7878                         }
7879
7880                         if (level > 1) {
7881                                 add_pending(nodes, seen, ptr, size);
7882                         } else {
7883                                 add_pending(pending, seen, ptr, size);
7884                         }
7885                 }
7886                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7887                                       nritems) * sizeof(struct btrfs_key_ptr);
7888         }
7889         total_btree_bytes += buf->len;
7890         if (fs_root_objectid(btrfs_header_owner(buf)))
7891                 total_fs_tree_bytes += buf->len;
7892         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7893                 total_extent_tree_bytes += buf->len;
7894         if (!found_old_backref &&
7895             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7896             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7897             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7898                 found_old_backref = 1;
7899 out:
7900         free_extent_buffer(buf);
7901         return ret;
7902 }
7903
7904 static int add_root_to_pending(struct extent_buffer *buf,
7905                                struct cache_tree *extent_cache,
7906                                struct cache_tree *pending,
7907                                struct cache_tree *seen,
7908                                struct cache_tree *nodes,
7909                                u64 objectid)
7910 {
7911         struct extent_record tmpl;
7912         int ret;
7913
7914         if (btrfs_header_level(buf) > 0)
7915                 add_pending(nodes, seen, buf->start, buf->len);
7916         else
7917                 add_pending(pending, seen, buf->start, buf->len);
7918
7919         memset(&tmpl, 0, sizeof(tmpl));
7920         tmpl.start = buf->start;
7921         tmpl.nr = buf->len;
7922         tmpl.is_root = 1;
7923         tmpl.refs = 1;
7924         tmpl.metadata = 1;
7925         tmpl.max_size = buf->len;
7926         add_extent_rec(extent_cache, &tmpl);
7927
7928         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7929             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7930                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7931                                 0, 1);
7932         else
7933                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7934                                 1);
7935         return ret;
7936 }
7937
7938 /* as we fix the tree, we might be deleting blocks that
7939  * we're tracking for repair.  This hook makes sure we
7940  * remove any backrefs for blocks as we are fixing them.
7941  */
7942 static int free_extent_hook(struct btrfs_trans_handle *trans,
7943                             struct btrfs_root *root,
7944                             u64 bytenr, u64 num_bytes, u64 parent,
7945                             u64 root_objectid, u64 owner, u64 offset,
7946                             int refs_to_drop)
7947 {
7948         struct extent_record *rec;
7949         struct cache_extent *cache;
7950         int is_data;
7951         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7952
7953         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7954         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7955         if (!cache)
7956                 return 0;
7957
7958         rec = container_of(cache, struct extent_record, cache);
7959         if (is_data) {
7960                 struct data_backref *back;
7961                 back = find_data_backref(rec, parent, root_objectid, owner,
7962                                          offset, 1, bytenr, num_bytes);
7963                 if (!back)
7964                         goto out;
7965                 if (back->node.found_ref) {
7966                         back->found_ref -= refs_to_drop;
7967                         if (rec->refs)
7968                                 rec->refs -= refs_to_drop;
7969                 }
7970                 if (back->node.found_extent_tree) {
7971                         back->num_refs -= refs_to_drop;
7972                         if (rec->extent_item_refs)
7973                                 rec->extent_item_refs -= refs_to_drop;
7974                 }
7975                 if (back->found_ref == 0)
7976                         back->node.found_ref = 0;
7977                 if (back->num_refs == 0)
7978                         back->node.found_extent_tree = 0;
7979
7980                 if (!back->node.found_extent_tree && back->node.found_ref) {
7981                         list_del(&back->node.list);
7982                         free(back);
7983                 }
7984         } else {
7985                 struct tree_backref *back;
7986                 back = find_tree_backref(rec, parent, root_objectid);
7987                 if (!back)
7988                         goto out;
7989                 if (back->node.found_ref) {
7990                         if (rec->refs)
7991                                 rec->refs--;
7992                         back->node.found_ref = 0;
7993                 }
7994                 if (back->node.found_extent_tree) {
7995                         if (rec->extent_item_refs)
7996                                 rec->extent_item_refs--;
7997                         back->node.found_extent_tree = 0;
7998                 }
7999                 if (!back->node.found_extent_tree && back->node.found_ref) {
8000                         list_del(&back->node.list);
8001                         free(back);
8002                 }
8003         }
8004         maybe_free_extent_rec(extent_cache, rec);
8005 out:
8006         return 0;
8007 }
8008
8009 static int delete_extent_records(struct btrfs_trans_handle *trans,
8010                                  struct btrfs_root *root,
8011                                  struct btrfs_path *path,
8012                                  u64 bytenr)
8013 {
8014         struct btrfs_key key;
8015         struct btrfs_key found_key;
8016         struct extent_buffer *leaf;
8017         int ret;
8018         int slot;
8019
8020
8021         key.objectid = bytenr;
8022         key.type = (u8)-1;
8023         key.offset = (u64)-1;
8024
8025         while(1) {
8026                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8027                                         &key, path, 0, 1);
8028                 if (ret < 0)
8029                         break;
8030
8031                 if (ret > 0) {
8032                         ret = 0;
8033                         if (path->slots[0] == 0)
8034                                 break;
8035                         path->slots[0]--;
8036                 }
8037                 ret = 0;
8038
8039                 leaf = path->nodes[0];
8040                 slot = path->slots[0];
8041
8042                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8043                 if (found_key.objectid != bytenr)
8044                         break;
8045
8046                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8047                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8048                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8049                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8050                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8051                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8052                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8053                         btrfs_release_path(path);
8054                         if (found_key.type == 0) {
8055                                 if (found_key.offset == 0)
8056                                         break;
8057                                 key.offset = found_key.offset - 1;
8058                                 key.type = found_key.type;
8059                         }
8060                         key.type = found_key.type - 1;
8061                         key.offset = (u64)-1;
8062                         continue;
8063                 }
8064
8065                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8066                         found_key.objectid, found_key.type, found_key.offset);
8067
8068                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8069                 if (ret)
8070                         break;
8071                 btrfs_release_path(path);
8072
8073                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8074                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8075                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8076                                 found_key.offset : root->nodesize;
8077
8078                         ret = btrfs_update_block_group(trans, root, bytenr,
8079                                                        bytes, 0, 0);
8080                         if (ret)
8081                                 break;
8082                 }
8083         }
8084
8085         btrfs_release_path(path);
8086         return ret;
8087 }
8088
8089 /*
8090  * for a single backref, this will allocate a new extent
8091  * and add the backref to it.
8092  */
8093 static int record_extent(struct btrfs_trans_handle *trans,
8094                          struct btrfs_fs_info *info,
8095                          struct btrfs_path *path,
8096                          struct extent_record *rec,
8097                          struct extent_backref *back,
8098                          int allocated, u64 flags)
8099 {
8100         int ret = 0;
8101         struct btrfs_root *extent_root = info->extent_root;
8102         struct extent_buffer *leaf;
8103         struct btrfs_key ins_key;
8104         struct btrfs_extent_item *ei;
8105         struct data_backref *dback;
8106         struct btrfs_tree_block_info *bi;
8107
8108         if (!back->is_data)
8109                 rec->max_size = max_t(u64, rec->max_size,
8110                                     info->extent_root->nodesize);
8111
8112         if (!allocated) {
8113                 u32 item_size = sizeof(*ei);
8114
8115                 if (!back->is_data)
8116                         item_size += sizeof(*bi);
8117
8118                 ins_key.objectid = rec->start;
8119                 ins_key.offset = rec->max_size;
8120                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8121
8122                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8123                                         &ins_key, item_size);
8124                 if (ret)
8125                         goto fail;
8126
8127                 leaf = path->nodes[0];
8128                 ei = btrfs_item_ptr(leaf, path->slots[0],
8129                                     struct btrfs_extent_item);
8130
8131                 btrfs_set_extent_refs(leaf, ei, 0);
8132                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8133
8134                 if (back->is_data) {
8135                         btrfs_set_extent_flags(leaf, ei,
8136                                                BTRFS_EXTENT_FLAG_DATA);
8137                 } else {
8138                         struct btrfs_disk_key copy_key;;
8139
8140                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8141                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8142                                              sizeof(*bi));
8143
8144                         btrfs_set_disk_key_objectid(&copy_key,
8145                                                     rec->info_objectid);
8146                         btrfs_set_disk_key_type(&copy_key, 0);
8147                         btrfs_set_disk_key_offset(&copy_key, 0);
8148
8149                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8150                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8151
8152                         btrfs_set_extent_flags(leaf, ei,
8153                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8154                 }
8155
8156                 btrfs_mark_buffer_dirty(leaf);
8157                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8158                                                rec->max_size, 1, 0);
8159                 if (ret)
8160                         goto fail;
8161                 btrfs_release_path(path);
8162         }
8163
8164         if (back->is_data) {
8165                 u64 parent;
8166                 int i;
8167
8168                 dback = to_data_backref(back);
8169                 if (back->full_backref)
8170                         parent = dback->parent;
8171                 else
8172                         parent = 0;
8173
8174                 for (i = 0; i < dback->found_ref; i++) {
8175                         /* if parent != 0, we're doing a full backref
8176                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8177                          * just makes the backref allocator create a data
8178                          * backref
8179                          */
8180                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8181                                                    rec->start, rec->max_size,
8182                                                    parent,
8183                                                    dback->root,
8184                                                    parent ?
8185                                                    BTRFS_FIRST_FREE_OBJECTID :
8186                                                    dback->owner,
8187                                                    dback->offset);
8188                         if (ret)
8189                                 break;
8190                 }
8191                 fprintf(stderr, "adding new data backref"
8192                                 " on %llu %s %llu owner %llu"
8193                                 " offset %llu found %d\n",
8194                                 (unsigned long long)rec->start,
8195                                 back->full_backref ?
8196                                 "parent" : "root",
8197                                 back->full_backref ?
8198                                 (unsigned long long)parent :
8199                                 (unsigned long long)dback->root,
8200                                 (unsigned long long)dback->owner,
8201                                 (unsigned long long)dback->offset,
8202                                 dback->found_ref);
8203         } else {
8204                 u64 parent;
8205                 struct tree_backref *tback;
8206
8207                 tback = to_tree_backref(back);
8208                 if (back->full_backref)
8209                         parent = tback->parent;
8210                 else
8211                         parent = 0;
8212
8213                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8214                                            rec->start, rec->max_size,
8215                                            parent, tback->root, 0, 0);
8216                 fprintf(stderr, "adding new tree backref on "
8217                         "start %llu len %llu parent %llu root %llu\n",
8218                         rec->start, rec->max_size, parent, tback->root);
8219         }
8220 fail:
8221         btrfs_release_path(path);
8222         return ret;
8223 }
8224
8225 static struct extent_entry *find_entry(struct list_head *entries,
8226                                        u64 bytenr, u64 bytes)
8227 {
8228         struct extent_entry *entry = NULL;
8229
8230         list_for_each_entry(entry, entries, list) {
8231                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8232                         return entry;
8233         }
8234
8235         return NULL;
8236 }
8237
8238 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8239 {
8240         struct extent_entry *entry, *best = NULL, *prev = NULL;
8241
8242         list_for_each_entry(entry, entries, list) {
8243                 /*
8244                  * If there are as many broken entries as entries then we know
8245                  * not to trust this particular entry.
8246                  */
8247                 if (entry->broken == entry->count)
8248                         continue;
8249
8250                 /*
8251                  * Special case, when there are only two entries and 'best' is
8252                  * the first one
8253                  */
8254                 if (!prev) {
8255                         best = entry;
8256                         prev = entry;
8257                         continue;
8258                 }
8259
8260                 /*
8261                  * If our current entry == best then we can't be sure our best
8262                  * is really the best, so we need to keep searching.
8263                  */
8264                 if (best && best->count == entry->count) {
8265                         prev = entry;
8266                         best = NULL;
8267                         continue;
8268                 }
8269
8270                 /* Prev == entry, not good enough, have to keep searching */
8271                 if (!prev->broken && prev->count == entry->count)
8272                         continue;
8273
8274                 if (!best)
8275                         best = (prev->count > entry->count) ? prev : entry;
8276                 else if (best->count < entry->count)
8277                         best = entry;
8278                 prev = entry;
8279         }
8280
8281         return best;
8282 }
8283
8284 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8285                       struct data_backref *dback, struct extent_entry *entry)
8286 {
8287         struct btrfs_trans_handle *trans;
8288         struct btrfs_root *root;
8289         struct btrfs_file_extent_item *fi;
8290         struct extent_buffer *leaf;
8291         struct btrfs_key key;
8292         u64 bytenr, bytes;
8293         int ret, err;
8294
8295         key.objectid = dback->root;
8296         key.type = BTRFS_ROOT_ITEM_KEY;
8297         key.offset = (u64)-1;
8298         root = btrfs_read_fs_root(info, &key);
8299         if (IS_ERR(root)) {
8300                 fprintf(stderr, "Couldn't find root for our ref\n");
8301                 return -EINVAL;
8302         }
8303
8304         /*
8305          * The backref points to the original offset of the extent if it was
8306          * split, so we need to search down to the offset we have and then walk
8307          * forward until we find the backref we're looking for.
8308          */
8309         key.objectid = dback->owner;
8310         key.type = BTRFS_EXTENT_DATA_KEY;
8311         key.offset = dback->offset;
8312         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8313         if (ret < 0) {
8314                 fprintf(stderr, "Error looking up ref %d\n", ret);
8315                 return ret;
8316         }
8317
8318         while (1) {
8319                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8320                         ret = btrfs_next_leaf(root, path);
8321                         if (ret) {
8322                                 fprintf(stderr, "Couldn't find our ref, next\n");
8323                                 return -EINVAL;
8324                         }
8325                 }
8326                 leaf = path->nodes[0];
8327                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8328                 if (key.objectid != dback->owner ||
8329                     key.type != BTRFS_EXTENT_DATA_KEY) {
8330                         fprintf(stderr, "Couldn't find our ref, search\n");
8331                         return -EINVAL;
8332                 }
8333                 fi = btrfs_item_ptr(leaf, path->slots[0],
8334                                     struct btrfs_file_extent_item);
8335                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8336                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8337
8338                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8339                         break;
8340                 path->slots[0]++;
8341         }
8342
8343         btrfs_release_path(path);
8344
8345         trans = btrfs_start_transaction(root, 1);
8346         if (IS_ERR(trans))
8347                 return PTR_ERR(trans);
8348
8349         /*
8350          * Ok we have the key of the file extent we want to fix, now we can cow
8351          * down to the thing and fix it.
8352          */
8353         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8354         if (ret < 0) {
8355                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8356                         key.objectid, key.type, key.offset, ret);
8357                 goto out;
8358         }
8359         if (ret > 0) {
8360                 fprintf(stderr, "Well that's odd, we just found this key "
8361                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8362                         key.offset);
8363                 ret = -EINVAL;
8364                 goto out;
8365         }
8366         leaf = path->nodes[0];
8367         fi = btrfs_item_ptr(leaf, path->slots[0],
8368                             struct btrfs_file_extent_item);
8369
8370         if (btrfs_file_extent_compression(leaf, fi) &&
8371             dback->disk_bytenr != entry->bytenr) {
8372                 fprintf(stderr, "Ref doesn't match the record start and is "
8373                         "compressed, please take a btrfs-image of this file "
8374                         "system and send it to a btrfs developer so they can "
8375                         "complete this functionality for bytenr %Lu\n",
8376                         dback->disk_bytenr);
8377                 ret = -EINVAL;
8378                 goto out;
8379         }
8380
8381         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8382                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8383         } else if (dback->disk_bytenr > entry->bytenr) {
8384                 u64 off_diff, offset;
8385
8386                 off_diff = dback->disk_bytenr - entry->bytenr;
8387                 offset = btrfs_file_extent_offset(leaf, fi);
8388                 if (dback->disk_bytenr + offset +
8389                     btrfs_file_extent_num_bytes(leaf, fi) >
8390                     entry->bytenr + entry->bytes) {
8391                         fprintf(stderr, "Ref is past the entry end, please "
8392                                 "take a btrfs-image of this file system and "
8393                                 "send it to a btrfs developer, ref %Lu\n",
8394                                 dback->disk_bytenr);
8395                         ret = -EINVAL;
8396                         goto out;
8397                 }
8398                 offset += off_diff;
8399                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8400                 btrfs_set_file_extent_offset(leaf, fi, offset);
8401         } else if (dback->disk_bytenr < entry->bytenr) {
8402                 u64 offset;
8403
8404                 offset = btrfs_file_extent_offset(leaf, fi);
8405                 if (dback->disk_bytenr + offset < entry->bytenr) {
8406                         fprintf(stderr, "Ref is before the entry start, please"
8407                                 " take a btrfs-image of this file system and "
8408                                 "send it to a btrfs developer, ref %Lu\n",
8409                                 dback->disk_bytenr);
8410                         ret = -EINVAL;
8411                         goto out;
8412                 }
8413
8414                 offset += dback->disk_bytenr;
8415                 offset -= entry->bytenr;
8416                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8417                 btrfs_set_file_extent_offset(leaf, fi, offset);
8418         }
8419
8420         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8421
8422         /*
8423          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8424          * only do this if we aren't using compression, otherwise it's a
8425          * trickier case.
8426          */
8427         if (!btrfs_file_extent_compression(leaf, fi))
8428                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8429         else
8430                 printf("ram bytes may be wrong?\n");
8431         btrfs_mark_buffer_dirty(leaf);
8432 out:
8433         err = btrfs_commit_transaction(trans, root);
8434         btrfs_release_path(path);
8435         return ret ? ret : err;
8436 }
8437
8438 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8439                            struct extent_record *rec)
8440 {
8441         struct extent_backref *back;
8442         struct data_backref *dback;
8443         struct extent_entry *entry, *best = NULL;
8444         LIST_HEAD(entries);
8445         int nr_entries = 0;
8446         int broken_entries = 0;
8447         int ret = 0;
8448         short mismatch = 0;
8449
8450         /*
8451          * Metadata is easy and the backrefs should always agree on bytenr and
8452          * size, if not we've got bigger issues.
8453          */
8454         if (rec->metadata)
8455                 return 0;
8456
8457         list_for_each_entry(back, &rec->backrefs, list) {
8458                 if (back->full_backref || !back->is_data)
8459                         continue;
8460
8461                 dback = to_data_backref(back);
8462
8463                 /*
8464                  * We only pay attention to backrefs that we found a real
8465                  * backref for.
8466                  */
8467                 if (dback->found_ref == 0)
8468                         continue;
8469
8470                 /*
8471                  * For now we only catch when the bytes don't match, not the
8472                  * bytenr.  We can easily do this at the same time, but I want
8473                  * to have a fs image to test on before we just add repair
8474                  * functionality willy-nilly so we know we won't screw up the
8475                  * repair.
8476                  */
8477
8478                 entry = find_entry(&entries, dback->disk_bytenr,
8479                                    dback->bytes);
8480                 if (!entry) {
8481                         entry = malloc(sizeof(struct extent_entry));
8482                         if (!entry) {
8483                                 ret = -ENOMEM;
8484                                 goto out;
8485                         }
8486                         memset(entry, 0, sizeof(*entry));
8487                         entry->bytenr = dback->disk_bytenr;
8488                         entry->bytes = dback->bytes;
8489                         list_add_tail(&entry->list, &entries);
8490                         nr_entries++;
8491                 }
8492
8493                 /*
8494                  * If we only have on entry we may think the entries agree when
8495                  * in reality they don't so we have to do some extra checking.
8496                  */
8497                 if (dback->disk_bytenr != rec->start ||
8498                     dback->bytes != rec->nr || back->broken)
8499                         mismatch = 1;
8500
8501                 if (back->broken) {
8502                         entry->broken++;
8503                         broken_entries++;
8504                 }
8505
8506                 entry->count++;
8507         }
8508
8509         /* Yay all the backrefs agree, carry on good sir */
8510         if (nr_entries <= 1 && !mismatch)
8511                 goto out;
8512
8513         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8514                 "%Lu\n", rec->start);
8515
8516         /*
8517          * First we want to see if the backrefs can agree amongst themselves who
8518          * is right, so figure out which one of the entries has the highest
8519          * count.
8520          */
8521         best = find_most_right_entry(&entries);
8522
8523         /*
8524          * Ok so we may have an even split between what the backrefs think, so
8525          * this is where we use the extent ref to see what it thinks.
8526          */
8527         if (!best) {
8528                 entry = find_entry(&entries, rec->start, rec->nr);
8529                 if (!entry && (!broken_entries || !rec->found_rec)) {
8530                         fprintf(stderr, "Backrefs don't agree with each other "
8531                                 "and extent record doesn't agree with anybody,"
8532                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8533                                 rec->start, rec->nr);
8534                         ret = -EINVAL;
8535                         goto out;
8536                 } else if (!entry) {
8537                         /*
8538                          * Ok our backrefs were broken, we'll assume this is the
8539                          * correct value and add an entry for this range.
8540                          */
8541                         entry = malloc(sizeof(struct extent_entry));
8542                         if (!entry) {
8543                                 ret = -ENOMEM;
8544                                 goto out;
8545                         }
8546                         memset(entry, 0, sizeof(*entry));
8547                         entry->bytenr = rec->start;
8548                         entry->bytes = rec->nr;
8549                         list_add_tail(&entry->list, &entries);
8550                         nr_entries++;
8551                 }
8552                 entry->count++;
8553                 best = find_most_right_entry(&entries);
8554                 if (!best) {
8555                         fprintf(stderr, "Backrefs and extent record evenly "
8556                                 "split on who is right, this is going to "
8557                                 "require user input to fix bytenr %Lu bytes "
8558                                 "%Lu\n", rec->start, rec->nr);
8559                         ret = -EINVAL;
8560                         goto out;
8561                 }
8562         }
8563
8564         /*
8565          * I don't think this can happen currently as we'll abort() if we catch
8566          * this case higher up, but in case somebody removes that we still can't
8567          * deal with it properly here yet, so just bail out of that's the case.
8568          */
8569         if (best->bytenr != rec->start) {
8570                 fprintf(stderr, "Extent start and backref starts don't match, "
8571                         "please use btrfs-image on this file system and send "
8572                         "it to a btrfs developer so they can make fsck fix "
8573                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8574                         rec->start, rec->nr);
8575                 ret = -EINVAL;
8576                 goto out;
8577         }
8578
8579         /*
8580          * Ok great we all agreed on an extent record, let's go find the real
8581          * references and fix up the ones that don't match.
8582          */
8583         list_for_each_entry(back, &rec->backrefs, list) {
8584                 if (back->full_backref || !back->is_data)
8585                         continue;
8586
8587                 dback = to_data_backref(back);
8588
8589                 /*
8590                  * Still ignoring backrefs that don't have a real ref attached
8591                  * to them.
8592                  */
8593                 if (dback->found_ref == 0)
8594                         continue;
8595
8596                 if (dback->bytes == best->bytes &&
8597                     dback->disk_bytenr == best->bytenr)
8598                         continue;
8599
8600                 ret = repair_ref(info, path, dback, best);
8601                 if (ret)
8602                         goto out;
8603         }
8604
8605         /*
8606          * Ok we messed with the actual refs, which means we need to drop our
8607          * entire cache and go back and rescan.  I know this is a huge pain and
8608          * adds a lot of extra work, but it's the only way to be safe.  Once all
8609          * the backrefs agree we may not need to do anything to the extent
8610          * record itself.
8611          */
8612         ret = -EAGAIN;
8613 out:
8614         while (!list_empty(&entries)) {
8615                 entry = list_entry(entries.next, struct extent_entry, list);
8616                 list_del_init(&entry->list);
8617                 free(entry);
8618         }
8619         return ret;
8620 }
8621
8622 static int process_duplicates(struct cache_tree *extent_cache,
8623                               struct extent_record *rec)
8624 {
8625         struct extent_record *good, *tmp;
8626         struct cache_extent *cache;
8627         int ret;
8628
8629         /*
8630          * If we found a extent record for this extent then return, or if we
8631          * have more than one duplicate we are likely going to need to delete
8632          * something.
8633          */
8634         if (rec->found_rec || rec->num_duplicates > 1)
8635                 return 0;
8636
8637         /* Shouldn't happen but just in case */
8638         BUG_ON(!rec->num_duplicates);
8639
8640         /*
8641          * So this happens if we end up with a backref that doesn't match the
8642          * actual extent entry.  So either the backref is bad or the extent
8643          * entry is bad.  Either way we want to have the extent_record actually
8644          * reflect what we found in the extent_tree, so we need to take the
8645          * duplicate out and use that as the extent_record since the only way we
8646          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8647          */
8648         remove_cache_extent(extent_cache, &rec->cache);
8649
8650         good = to_extent_record(rec->dups.next);
8651         list_del_init(&good->list);
8652         INIT_LIST_HEAD(&good->backrefs);
8653         INIT_LIST_HEAD(&good->dups);
8654         good->cache.start = good->start;
8655         good->cache.size = good->nr;
8656         good->content_checked = 0;
8657         good->owner_ref_checked = 0;
8658         good->num_duplicates = 0;
8659         good->refs = rec->refs;
8660         list_splice_init(&rec->backrefs, &good->backrefs);
8661         while (1) {
8662                 cache = lookup_cache_extent(extent_cache, good->start,
8663                                             good->nr);
8664                 if (!cache)
8665                         break;
8666                 tmp = container_of(cache, struct extent_record, cache);
8667
8668                 /*
8669                  * If we find another overlapping extent and it's found_rec is
8670                  * set then it's a duplicate and we need to try and delete
8671                  * something.
8672                  */
8673                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8674                         if (list_empty(&good->list))
8675                                 list_add_tail(&good->list,
8676                                               &duplicate_extents);
8677                         good->num_duplicates += tmp->num_duplicates + 1;
8678                         list_splice_init(&tmp->dups, &good->dups);
8679                         list_del_init(&tmp->list);
8680                         list_add_tail(&tmp->list, &good->dups);
8681                         remove_cache_extent(extent_cache, &tmp->cache);
8682                         continue;
8683                 }
8684
8685                 /*
8686                  * Ok we have another non extent item backed extent rec, so lets
8687                  * just add it to this extent and carry on like we did above.
8688                  */
8689                 good->refs += tmp->refs;
8690                 list_splice_init(&tmp->backrefs, &good->backrefs);
8691                 remove_cache_extent(extent_cache, &tmp->cache);
8692                 free(tmp);
8693         }
8694         ret = insert_cache_extent(extent_cache, &good->cache);
8695         BUG_ON(ret);
8696         free(rec);
8697         return good->num_duplicates ? 0 : 1;
8698 }
8699
8700 static int delete_duplicate_records(struct btrfs_root *root,
8701                                     struct extent_record *rec)
8702 {
8703         struct btrfs_trans_handle *trans;
8704         LIST_HEAD(delete_list);
8705         struct btrfs_path path;
8706         struct extent_record *tmp, *good, *n;
8707         int nr_del = 0;
8708         int ret = 0, err;
8709         struct btrfs_key key;
8710
8711         btrfs_init_path(&path);
8712
8713         good = rec;
8714         /* Find the record that covers all of the duplicates. */
8715         list_for_each_entry(tmp, &rec->dups, list) {
8716                 if (good->start < tmp->start)
8717                         continue;
8718                 if (good->nr > tmp->nr)
8719                         continue;
8720
8721                 if (tmp->start + tmp->nr < good->start + good->nr) {
8722                         fprintf(stderr, "Ok we have overlapping extents that "
8723                                 "aren't completely covered by each other, this "
8724                                 "is going to require more careful thought.  "
8725                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8726                                 tmp->start, tmp->nr, good->start, good->nr);
8727                         abort();
8728                 }
8729                 good = tmp;
8730         }
8731
8732         if (good != rec)
8733                 list_add_tail(&rec->list, &delete_list);
8734
8735         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8736                 if (tmp == good)
8737                         continue;
8738                 list_move_tail(&tmp->list, &delete_list);
8739         }
8740
8741         root = root->fs_info->extent_root;
8742         trans = btrfs_start_transaction(root, 1);
8743         if (IS_ERR(trans)) {
8744                 ret = PTR_ERR(trans);
8745                 goto out;
8746         }
8747
8748         list_for_each_entry(tmp, &delete_list, list) {
8749                 if (tmp->found_rec == 0)
8750                         continue;
8751                 key.objectid = tmp->start;
8752                 key.type = BTRFS_EXTENT_ITEM_KEY;
8753                 key.offset = tmp->nr;
8754
8755                 /* Shouldn't happen but just in case */
8756                 if (tmp->metadata) {
8757                         fprintf(stderr, "Well this shouldn't happen, extent "
8758                                 "record overlaps but is metadata? "
8759                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8760                         abort();
8761                 }
8762
8763                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8764                 if (ret) {
8765                         if (ret > 0)
8766                                 ret = -EINVAL;
8767                         break;
8768                 }
8769                 ret = btrfs_del_item(trans, root, &path);
8770                 if (ret)
8771                         break;
8772                 btrfs_release_path(&path);
8773                 nr_del++;
8774         }
8775         err = btrfs_commit_transaction(trans, root);
8776         if (err && !ret)
8777                 ret = err;
8778 out:
8779         while (!list_empty(&delete_list)) {
8780                 tmp = to_extent_record(delete_list.next);
8781                 list_del_init(&tmp->list);
8782                 if (tmp == rec)
8783                         continue;
8784                 free(tmp);
8785         }
8786
8787         while (!list_empty(&rec->dups)) {
8788                 tmp = to_extent_record(rec->dups.next);
8789                 list_del_init(&tmp->list);
8790                 free(tmp);
8791         }
8792
8793         btrfs_release_path(&path);
8794
8795         if (!ret && !nr_del)
8796                 rec->num_duplicates = 0;
8797
8798         return ret ? ret : nr_del;
8799 }
8800
8801 static int find_possible_backrefs(struct btrfs_fs_info *info,
8802                                   struct btrfs_path *path,
8803                                   struct cache_tree *extent_cache,
8804                                   struct extent_record *rec)
8805 {
8806         struct btrfs_root *root;
8807         struct extent_backref *back;
8808         struct data_backref *dback;
8809         struct cache_extent *cache;
8810         struct btrfs_file_extent_item *fi;
8811         struct btrfs_key key;
8812         u64 bytenr, bytes;
8813         int ret;
8814
8815         list_for_each_entry(back, &rec->backrefs, list) {
8816                 /* Don't care about full backrefs (poor unloved backrefs) */
8817                 if (back->full_backref || !back->is_data)
8818                         continue;
8819
8820                 dback = to_data_backref(back);
8821
8822                 /* We found this one, we don't need to do a lookup */
8823                 if (dback->found_ref)
8824                         continue;
8825
8826                 key.objectid = dback->root;
8827                 key.type = BTRFS_ROOT_ITEM_KEY;
8828                 key.offset = (u64)-1;
8829
8830                 root = btrfs_read_fs_root(info, &key);
8831
8832                 /* No root, definitely a bad ref, skip */
8833                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8834                         continue;
8835                 /* Other err, exit */
8836                 if (IS_ERR(root))
8837                         return PTR_ERR(root);
8838
8839                 key.objectid = dback->owner;
8840                 key.type = BTRFS_EXTENT_DATA_KEY;
8841                 key.offset = dback->offset;
8842                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8843                 if (ret) {
8844                         btrfs_release_path(path);
8845                         if (ret < 0)
8846                                 return ret;
8847                         /* Didn't find it, we can carry on */
8848                         ret = 0;
8849                         continue;
8850                 }
8851
8852                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8853                                     struct btrfs_file_extent_item);
8854                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8855                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8856                 btrfs_release_path(path);
8857                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8858                 if (cache) {
8859                         struct extent_record *tmp;
8860                         tmp = container_of(cache, struct extent_record, cache);
8861
8862                         /*
8863                          * If we found an extent record for the bytenr for this
8864                          * particular backref then we can't add it to our
8865                          * current extent record.  We only want to add backrefs
8866                          * that don't have a corresponding extent item in the
8867                          * extent tree since they likely belong to this record
8868                          * and we need to fix it if it doesn't match bytenrs.
8869                          */
8870                         if  (tmp->found_rec)
8871                                 continue;
8872                 }
8873
8874                 dback->found_ref += 1;
8875                 dback->disk_bytenr = bytenr;
8876                 dback->bytes = bytes;
8877
8878                 /*
8879                  * Set this so the verify backref code knows not to trust the
8880                  * values in this backref.
8881                  */
8882                 back->broken = 1;
8883         }
8884
8885         return 0;
8886 }
8887
8888 /*
8889  * Record orphan data ref into corresponding root.
8890  *
8891  * Return 0 if the extent item contains data ref and recorded.
8892  * Return 1 if the extent item contains no useful data ref
8893  *   On that case, it may contains only shared_dataref or metadata backref
8894  *   or the file extent exists(this should be handled by the extent bytenr
8895  *   recovery routine)
8896  * Return <0 if something goes wrong.
8897  */
8898 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8899                                       struct extent_record *rec)
8900 {
8901         struct btrfs_key key;
8902         struct btrfs_root *dest_root;
8903         struct extent_backref *back;
8904         struct data_backref *dback;
8905         struct orphan_data_extent *orphan;
8906         struct btrfs_path path;
8907         int recorded_data_ref = 0;
8908         int ret = 0;
8909
8910         if (rec->metadata)
8911                 return 1;
8912         btrfs_init_path(&path);
8913         list_for_each_entry(back, &rec->backrefs, list) {
8914                 if (back->full_backref || !back->is_data ||
8915                     !back->found_extent_tree)
8916                         continue;
8917                 dback = to_data_backref(back);
8918                 if (dback->found_ref)
8919                         continue;
8920                 key.objectid = dback->root;
8921                 key.type = BTRFS_ROOT_ITEM_KEY;
8922                 key.offset = (u64)-1;
8923
8924                 dest_root = btrfs_read_fs_root(fs_info, &key);
8925
8926                 /* For non-exist root we just skip it */
8927                 if (IS_ERR(dest_root) || !dest_root)
8928                         continue;
8929
8930                 key.objectid = dback->owner;
8931                 key.type = BTRFS_EXTENT_DATA_KEY;
8932                 key.offset = dback->offset;
8933
8934                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8935                 btrfs_release_path(&path);
8936                 /*
8937                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8938                  * we need to record it for inode/file extent rebuild.
8939                  * For ret > 0, we record it only for file extent rebuild.
8940                  * For ret == 0, the file extent exists but only bytenr
8941                  * mismatch, let the original bytenr fix routine to handle,
8942                  * don't record it.
8943                  */
8944                 if (ret == 0)
8945                         continue;
8946                 ret = 0;
8947                 orphan = malloc(sizeof(*orphan));
8948                 if (!orphan) {
8949                         ret = -ENOMEM;
8950                         goto out;
8951                 }
8952                 INIT_LIST_HEAD(&orphan->list);
8953                 orphan->root = dback->root;
8954                 orphan->objectid = dback->owner;
8955                 orphan->offset = dback->offset;
8956                 orphan->disk_bytenr = rec->cache.start;
8957                 orphan->disk_len = rec->cache.size;
8958                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8959                 recorded_data_ref = 1;
8960         }
8961 out:
8962         btrfs_release_path(&path);
8963         if (!ret)
8964                 return !recorded_data_ref;
8965         else
8966                 return ret;
8967 }
8968
8969 /*
8970  * when an incorrect extent item is found, this will delete
8971  * all of the existing entries for it and recreate them
8972  * based on what the tree scan found.
8973  */
8974 static int fixup_extent_refs(struct btrfs_fs_info *info,
8975                              struct cache_tree *extent_cache,
8976                              struct extent_record *rec)
8977 {
8978         struct btrfs_trans_handle *trans = NULL;
8979         int ret;
8980         struct btrfs_path path;
8981         struct list_head *cur = rec->backrefs.next;
8982         struct cache_extent *cache;
8983         struct extent_backref *back;
8984         int allocated = 0;
8985         u64 flags = 0;
8986
8987         if (rec->flag_block_full_backref)
8988                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8989
8990         btrfs_init_path(&path);
8991         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8992                 /*
8993                  * Sometimes the backrefs themselves are so broken they don't
8994                  * get attached to any meaningful rec, so first go back and
8995                  * check any of our backrefs that we couldn't find and throw
8996                  * them into the list if we find the backref so that
8997                  * verify_backrefs can figure out what to do.
8998                  */
8999                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9000                 if (ret < 0)
9001                         goto out;
9002         }
9003
9004         /* step one, make sure all of the backrefs agree */
9005         ret = verify_backrefs(info, &path, rec);
9006         if (ret < 0)
9007                 goto out;
9008
9009         trans = btrfs_start_transaction(info->extent_root, 1);
9010         if (IS_ERR(trans)) {
9011                 ret = PTR_ERR(trans);
9012                 goto out;
9013         }
9014
9015         /* step two, delete all the existing records */
9016         ret = delete_extent_records(trans, info->extent_root, &path,
9017                                     rec->start);
9018
9019         if (ret < 0)
9020                 goto out;
9021
9022         /* was this block corrupt?  If so, don't add references to it */
9023         cache = lookup_cache_extent(info->corrupt_blocks,
9024                                     rec->start, rec->max_size);
9025         if (cache) {
9026                 ret = 0;
9027                 goto out;
9028         }
9029
9030         /* step three, recreate all the refs we did find */
9031         while(cur != &rec->backrefs) {
9032                 back = to_extent_backref(cur);
9033                 cur = cur->next;
9034
9035                 /*
9036                  * if we didn't find any references, don't create a
9037                  * new extent record
9038                  */
9039                 if (!back->found_ref)
9040                         continue;
9041
9042                 rec->bad_full_backref = 0;
9043                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9044                 allocated = 1;
9045
9046                 if (ret)
9047                         goto out;
9048         }
9049 out:
9050         if (trans) {
9051                 int err = btrfs_commit_transaction(trans, info->extent_root);
9052                 if (!ret)
9053                         ret = err;
9054         }
9055
9056         if (!ret)
9057                 fprintf(stderr, "Repaired extent references for %llu\n",
9058                                 (unsigned long long)rec->start);
9059
9060         btrfs_release_path(&path);
9061         return ret;
9062 }
9063
9064 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9065                               struct extent_record *rec)
9066 {
9067         struct btrfs_trans_handle *trans;
9068         struct btrfs_root *root = fs_info->extent_root;
9069         struct btrfs_path path;
9070         struct btrfs_extent_item *ei;
9071         struct btrfs_key key;
9072         u64 flags;
9073         int ret = 0;
9074
9075         key.objectid = rec->start;
9076         if (rec->metadata) {
9077                 key.type = BTRFS_METADATA_ITEM_KEY;
9078                 key.offset = rec->info_level;
9079         } else {
9080                 key.type = BTRFS_EXTENT_ITEM_KEY;
9081                 key.offset = rec->max_size;
9082         }
9083
9084         trans = btrfs_start_transaction(root, 0);
9085         if (IS_ERR(trans))
9086                 return PTR_ERR(trans);
9087
9088         btrfs_init_path(&path);
9089         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9090         if (ret < 0) {
9091                 btrfs_release_path(&path);
9092                 btrfs_commit_transaction(trans, root);
9093                 return ret;
9094         } else if (ret) {
9095                 fprintf(stderr, "Didn't find extent for %llu\n",
9096                         (unsigned long long)rec->start);
9097                 btrfs_release_path(&path);
9098                 btrfs_commit_transaction(trans, root);
9099                 return -ENOENT;
9100         }
9101
9102         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9103                             struct btrfs_extent_item);
9104         flags = btrfs_extent_flags(path.nodes[0], ei);
9105         if (rec->flag_block_full_backref) {
9106                 fprintf(stderr, "setting full backref on %llu\n",
9107                         (unsigned long long)key.objectid);
9108                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9109         } else {
9110                 fprintf(stderr, "clearing full backref on %llu\n",
9111                         (unsigned long long)key.objectid);
9112                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9113         }
9114         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9115         btrfs_mark_buffer_dirty(path.nodes[0]);
9116         btrfs_release_path(&path);
9117         ret = btrfs_commit_transaction(trans, root);
9118         if (!ret)
9119                 fprintf(stderr, "Repaired extent flags for %llu\n",
9120                                 (unsigned long long)rec->start);
9121
9122         return ret;
9123 }
9124
9125 /* right now we only prune from the extent allocation tree */
9126 static int prune_one_block(struct btrfs_trans_handle *trans,
9127                            struct btrfs_fs_info *info,
9128                            struct btrfs_corrupt_block *corrupt)
9129 {
9130         int ret;
9131         struct btrfs_path path;
9132         struct extent_buffer *eb;
9133         u64 found;
9134         int slot;
9135         int nritems;
9136         int level = corrupt->level + 1;
9137
9138         btrfs_init_path(&path);
9139 again:
9140         /* we want to stop at the parent to our busted block */
9141         path.lowest_level = level;
9142
9143         ret = btrfs_search_slot(trans, info->extent_root,
9144                                 &corrupt->key, &path, -1, 1);
9145
9146         if (ret < 0)
9147                 goto out;
9148
9149         eb = path.nodes[level];
9150         if (!eb) {
9151                 ret = -ENOENT;
9152                 goto out;
9153         }
9154
9155         /*
9156          * hopefully the search gave us the block we want to prune,
9157          * lets try that first
9158          */
9159         slot = path.slots[level];
9160         found =  btrfs_node_blockptr(eb, slot);
9161         if (found == corrupt->cache.start)
9162                 goto del_ptr;
9163
9164         nritems = btrfs_header_nritems(eb);
9165
9166         /* the search failed, lets scan this node and hope we find it */
9167         for (slot = 0; slot < nritems; slot++) {
9168                 found =  btrfs_node_blockptr(eb, slot);
9169                 if (found == corrupt->cache.start)
9170                         goto del_ptr;
9171         }
9172         /*
9173          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9174          * to this block
9175          */
9176         if (eb == info->extent_root->node) {
9177                 ret = -ENOENT;
9178                 goto out;
9179         } else {
9180                 level++;
9181                 btrfs_release_path(&path);
9182                 goto again;
9183         }
9184
9185 del_ptr:
9186         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9187         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9188
9189 out:
9190         btrfs_release_path(&path);
9191         return ret;
9192 }
9193
9194 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9195 {
9196         struct btrfs_trans_handle *trans = NULL;
9197         struct cache_extent *cache;
9198         struct btrfs_corrupt_block *corrupt;
9199
9200         while (1) {
9201                 cache = search_cache_extent(info->corrupt_blocks, 0);
9202                 if (!cache)
9203                         break;
9204                 if (!trans) {
9205                         trans = btrfs_start_transaction(info->extent_root, 1);
9206                         if (IS_ERR(trans))
9207                                 return PTR_ERR(trans);
9208                 }
9209                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9210                 prune_one_block(trans, info, corrupt);
9211                 remove_cache_extent(info->corrupt_blocks, cache);
9212         }
9213         if (trans)
9214                 return btrfs_commit_transaction(trans, info->extent_root);
9215         return 0;
9216 }
9217
9218 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9219 {
9220         struct btrfs_block_group_cache *cache;
9221         u64 start, end;
9222         int ret;
9223
9224         while (1) {
9225                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9226                                             &start, &end, EXTENT_DIRTY);
9227                 if (ret)
9228                         break;
9229                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9230         }
9231
9232         start = 0;
9233         while (1) {
9234                 cache = btrfs_lookup_first_block_group(fs_info, start);
9235                 if (!cache)
9236                         break;
9237                 if (cache->cached)
9238                         cache->cached = 0;
9239                 start = cache->key.objectid + cache->key.offset;
9240         }
9241 }
9242
9243 static int check_extent_refs(struct btrfs_root *root,
9244                              struct cache_tree *extent_cache)
9245 {
9246         struct extent_record *rec;
9247         struct cache_extent *cache;
9248         int ret = 0;
9249         int had_dups = 0;
9250
9251         if (repair) {
9252                 /*
9253                  * if we're doing a repair, we have to make sure
9254                  * we don't allocate from the problem extents.
9255                  * In the worst case, this will be all the
9256                  * extents in the FS
9257                  */
9258                 cache = search_cache_extent(extent_cache, 0);
9259                 while(cache) {
9260                         rec = container_of(cache, struct extent_record, cache);
9261                         set_extent_dirty(root->fs_info->excluded_extents,
9262                                          rec->start,
9263                                          rec->start + rec->max_size - 1);
9264                         cache = next_cache_extent(cache);
9265                 }
9266
9267                 /* pin down all the corrupted blocks too */
9268                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9269                 while(cache) {
9270                         set_extent_dirty(root->fs_info->excluded_extents,
9271                                          cache->start,
9272                                          cache->start + cache->size - 1);
9273                         cache = next_cache_extent(cache);
9274                 }
9275                 prune_corrupt_blocks(root->fs_info);
9276                 reset_cached_block_groups(root->fs_info);
9277         }
9278
9279         reset_cached_block_groups(root->fs_info);
9280
9281         /*
9282          * We need to delete any duplicate entries we find first otherwise we
9283          * could mess up the extent tree when we have backrefs that actually
9284          * belong to a different extent item and not the weird duplicate one.
9285          */
9286         while (repair && !list_empty(&duplicate_extents)) {
9287                 rec = to_extent_record(duplicate_extents.next);
9288                 list_del_init(&rec->list);
9289
9290                 /* Sometimes we can find a backref before we find an actual
9291                  * extent, so we need to process it a little bit to see if there
9292                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9293                  * if this is a backref screwup.  If we need to delete stuff
9294                  * process_duplicates() will return 0, otherwise it will return
9295                  * 1 and we
9296                  */
9297                 if (process_duplicates(extent_cache, rec))
9298                         continue;
9299                 ret = delete_duplicate_records(root, rec);
9300                 if (ret < 0)
9301                         return ret;
9302                 /*
9303                  * delete_duplicate_records will return the number of entries
9304                  * deleted, so if it's greater than 0 then we know we actually
9305                  * did something and we need to remove.
9306                  */
9307                 if (ret)
9308                         had_dups = 1;
9309         }
9310
9311         if (had_dups)
9312                 return -EAGAIN;
9313
9314         while(1) {
9315                 int cur_err = 0;
9316                 int fix = 0;
9317
9318                 cache = search_cache_extent(extent_cache, 0);
9319                 if (!cache)
9320                         break;
9321                 rec = container_of(cache, struct extent_record, cache);
9322                 if (rec->num_duplicates) {
9323                         fprintf(stderr, "extent item %llu has multiple extent "
9324                                 "items\n", (unsigned long long)rec->start);
9325                         cur_err = 1;
9326                 }
9327
9328                 if (rec->refs != rec->extent_item_refs) {
9329                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9330                                 (unsigned long long)rec->start,
9331                                 (unsigned long long)rec->nr);
9332                         fprintf(stderr, "extent item %llu, found %llu\n",
9333                                 (unsigned long long)rec->extent_item_refs,
9334                                 (unsigned long long)rec->refs);
9335                         ret = record_orphan_data_extents(root->fs_info, rec);
9336                         if (ret < 0)
9337                                 goto repair_abort;
9338                         fix = ret;
9339                         cur_err = 1;
9340                 }
9341                 if (all_backpointers_checked(rec, 1)) {
9342                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9343                                 (unsigned long long)rec->start,
9344                                 (unsigned long long)rec->nr);
9345                         fix = 1;
9346                         cur_err = 1;
9347                 }
9348                 if (!rec->owner_ref_checked) {
9349                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9350                                 (unsigned long long)rec->start,
9351                                 (unsigned long long)rec->nr);
9352                         fix = 1;
9353                         cur_err = 1;
9354                 }
9355
9356                 if (repair && fix) {
9357                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9358                         if (ret)
9359                                 goto repair_abort;
9360                 }
9361
9362
9363                 if (rec->bad_full_backref) {
9364                         fprintf(stderr, "bad full backref, on [%llu]\n",
9365                                 (unsigned long long)rec->start);
9366                         if (repair) {
9367                                 ret = fixup_extent_flags(root->fs_info, rec);
9368                                 if (ret)
9369                                         goto repair_abort;
9370                                 fix = 1;
9371                         }
9372                         cur_err = 1;
9373                 }
9374                 /*
9375                  * Although it's not a extent ref's problem, we reuse this
9376                  * routine for error reporting.
9377                  * No repair function yet.
9378                  */
9379                 if (rec->crossing_stripes) {
9380                         fprintf(stderr,
9381                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9382                                 rec->start, rec->start + rec->max_size);
9383                         cur_err = 1;
9384                 }
9385
9386                 if (rec->wrong_chunk_type) {
9387                         fprintf(stderr,
9388                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9389                                 rec->start, rec->start + rec->max_size);
9390                         cur_err = 1;
9391                 }
9392
9393                 remove_cache_extent(extent_cache, cache);
9394                 free_all_extent_backrefs(rec);
9395                 if (!init_extent_tree && repair && (!cur_err || fix))
9396                         clear_extent_dirty(root->fs_info->excluded_extents,
9397                                            rec->start,
9398                                            rec->start + rec->max_size - 1);
9399                 free(rec);
9400         }
9401 repair_abort:
9402         if (repair) {
9403                 if (ret && ret != -EAGAIN) {
9404                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9405                         exit(1);
9406                 } else if (!ret) {
9407                         struct btrfs_trans_handle *trans;
9408
9409                         root = root->fs_info->extent_root;
9410                         trans = btrfs_start_transaction(root, 1);
9411                         if (IS_ERR(trans)) {
9412                                 ret = PTR_ERR(trans);
9413                                 goto repair_abort;
9414                         }
9415
9416                         btrfs_fix_block_accounting(trans, root);
9417                         ret = btrfs_commit_transaction(trans, root);
9418                         if (ret)
9419                                 goto repair_abort;
9420                 }
9421                 return ret;
9422         }
9423         return 0;
9424 }
9425
9426 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9427 {
9428         u64 stripe_size;
9429
9430         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9431                 stripe_size = length;
9432                 stripe_size /= num_stripes;
9433         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9434                 stripe_size = length * 2;
9435                 stripe_size /= num_stripes;
9436         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9437                 stripe_size = length;
9438                 stripe_size /= (num_stripes - 1);
9439         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9440                 stripe_size = length;
9441                 stripe_size /= (num_stripes - 2);
9442         } else {
9443                 stripe_size = length;
9444         }
9445         return stripe_size;
9446 }
9447
9448 /*
9449  * Check the chunk with its block group/dev list ref:
9450  * Return 0 if all refs seems valid.
9451  * Return 1 if part of refs seems valid, need later check for rebuild ref
9452  * like missing block group and needs to search extent tree to rebuild them.
9453  * Return -1 if essential refs are missing and unable to rebuild.
9454  */
9455 static int check_chunk_refs(struct chunk_record *chunk_rec,
9456                             struct block_group_tree *block_group_cache,
9457                             struct device_extent_tree *dev_extent_cache,
9458                             int silent)
9459 {
9460         struct cache_extent *block_group_item;
9461         struct block_group_record *block_group_rec;
9462         struct cache_extent *dev_extent_item;
9463         struct device_extent_record *dev_extent_rec;
9464         u64 devid;
9465         u64 offset;
9466         u64 length;
9467         int metadump_v2 = 0;
9468         int i;
9469         int ret = 0;
9470
9471         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9472                                                chunk_rec->offset,
9473                                                chunk_rec->length);
9474         if (block_group_item) {
9475                 block_group_rec = container_of(block_group_item,
9476                                                struct block_group_record,
9477                                                cache);
9478                 if (chunk_rec->length != block_group_rec->offset ||
9479                     chunk_rec->offset != block_group_rec->objectid ||
9480                     (!metadump_v2 &&
9481                      chunk_rec->type_flags != block_group_rec->flags)) {
9482                         if (!silent)
9483                                 fprintf(stderr,
9484                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9485                                         chunk_rec->objectid,
9486                                         chunk_rec->type,
9487                                         chunk_rec->offset,
9488                                         chunk_rec->length,
9489                                         chunk_rec->offset,
9490                                         chunk_rec->type_flags,
9491                                         block_group_rec->objectid,
9492                                         block_group_rec->type,
9493                                         block_group_rec->offset,
9494                                         block_group_rec->offset,
9495                                         block_group_rec->objectid,
9496                                         block_group_rec->flags);
9497                         ret = -1;
9498                 } else {
9499                         list_del_init(&block_group_rec->list);
9500                         chunk_rec->bg_rec = block_group_rec;
9501                 }
9502         } else {
9503                 if (!silent)
9504                         fprintf(stderr,
9505                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9506                                 chunk_rec->objectid,
9507                                 chunk_rec->type,
9508                                 chunk_rec->offset,
9509                                 chunk_rec->length,
9510                                 chunk_rec->offset,
9511                                 chunk_rec->type_flags);
9512                 ret = 1;
9513         }
9514
9515         if (metadump_v2)
9516                 return ret;
9517
9518         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9519                                     chunk_rec->num_stripes);
9520         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9521                 devid = chunk_rec->stripes[i].devid;
9522                 offset = chunk_rec->stripes[i].offset;
9523                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9524                                                        devid, offset, length);
9525                 if (dev_extent_item) {
9526                         dev_extent_rec = container_of(dev_extent_item,
9527                                                 struct device_extent_record,
9528                                                 cache);
9529                         if (dev_extent_rec->objectid != devid ||
9530                             dev_extent_rec->offset != offset ||
9531                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9532                             dev_extent_rec->length != length) {
9533                                 if (!silent)
9534                                         fprintf(stderr,
9535                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9536                                                 chunk_rec->objectid,
9537                                                 chunk_rec->type,
9538                                                 chunk_rec->offset,
9539                                                 chunk_rec->stripes[i].devid,
9540                                                 chunk_rec->stripes[i].offset,
9541                                                 dev_extent_rec->objectid,
9542                                                 dev_extent_rec->offset,
9543                                                 dev_extent_rec->length);
9544                                 ret = -1;
9545                         } else {
9546                                 list_move(&dev_extent_rec->chunk_list,
9547                                           &chunk_rec->dextents);
9548                         }
9549                 } else {
9550                         if (!silent)
9551                                 fprintf(stderr,
9552                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9553                                         chunk_rec->objectid,
9554                                         chunk_rec->type,
9555                                         chunk_rec->offset,
9556                                         chunk_rec->stripes[i].devid,
9557                                         chunk_rec->stripes[i].offset);
9558                         ret = -1;
9559                 }
9560         }
9561         return ret;
9562 }
9563
9564 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9565 int check_chunks(struct cache_tree *chunk_cache,
9566                  struct block_group_tree *block_group_cache,
9567                  struct device_extent_tree *dev_extent_cache,
9568                  struct list_head *good, struct list_head *bad,
9569                  struct list_head *rebuild, int silent)
9570 {
9571         struct cache_extent *chunk_item;
9572         struct chunk_record *chunk_rec;
9573         struct block_group_record *bg_rec;
9574         struct device_extent_record *dext_rec;
9575         int err;
9576         int ret = 0;
9577
9578         chunk_item = first_cache_extent(chunk_cache);
9579         while (chunk_item) {
9580                 chunk_rec = container_of(chunk_item, struct chunk_record,
9581                                          cache);
9582                 err = check_chunk_refs(chunk_rec, block_group_cache,
9583                                        dev_extent_cache, silent);
9584                 if (err < 0)
9585                         ret = err;
9586                 if (err == 0 && good)
9587                         list_add_tail(&chunk_rec->list, good);
9588                 if (err > 0 && rebuild)
9589                         list_add_tail(&chunk_rec->list, rebuild);
9590                 if (err < 0 && bad)
9591                         list_add_tail(&chunk_rec->list, bad);
9592                 chunk_item = next_cache_extent(chunk_item);
9593         }
9594
9595         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9596                 if (!silent)
9597                         fprintf(stderr,
9598                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9599                                 bg_rec->objectid,
9600                                 bg_rec->offset,
9601                                 bg_rec->flags);
9602                 if (!ret)
9603                         ret = 1;
9604         }
9605
9606         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9607                             chunk_list) {
9608                 if (!silent)
9609                         fprintf(stderr,
9610                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9611                                 dext_rec->objectid,
9612                                 dext_rec->offset,
9613                                 dext_rec->length);
9614                 if (!ret)
9615                         ret = 1;
9616         }
9617         return ret;
9618 }
9619
9620
9621 static int check_device_used(struct device_record *dev_rec,
9622                              struct device_extent_tree *dext_cache)
9623 {
9624         struct cache_extent *cache;
9625         struct device_extent_record *dev_extent_rec;
9626         u64 total_byte = 0;
9627
9628         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9629         while (cache) {
9630                 dev_extent_rec = container_of(cache,
9631                                               struct device_extent_record,
9632                                               cache);
9633                 if (dev_extent_rec->objectid != dev_rec->devid)
9634                         break;
9635
9636                 list_del_init(&dev_extent_rec->device_list);
9637                 total_byte += dev_extent_rec->length;
9638                 cache = next_cache_extent(cache);
9639         }
9640
9641         if (total_byte != dev_rec->byte_used) {
9642                 fprintf(stderr,
9643                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9644                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9645                         dev_rec->type, dev_rec->offset);
9646                 return -1;
9647         } else {
9648                 return 0;
9649         }
9650 }
9651
9652 /* check btrfs_dev_item -> btrfs_dev_extent */
9653 static int check_devices(struct rb_root *dev_cache,
9654                          struct device_extent_tree *dev_extent_cache)
9655 {
9656         struct rb_node *dev_node;
9657         struct device_record *dev_rec;
9658         struct device_extent_record *dext_rec;
9659         int err;
9660         int ret = 0;
9661
9662         dev_node = rb_first(dev_cache);
9663         while (dev_node) {
9664                 dev_rec = container_of(dev_node, struct device_record, node);
9665                 err = check_device_used(dev_rec, dev_extent_cache);
9666                 if (err)
9667                         ret = err;
9668
9669                 dev_node = rb_next(dev_node);
9670         }
9671         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9672                             device_list) {
9673                 fprintf(stderr,
9674                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9675                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9676                 if (!ret)
9677                         ret = 1;
9678         }
9679         return ret;
9680 }
9681
9682 static int add_root_item_to_list(struct list_head *head,
9683                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9684                                   u8 level, u8 drop_level,
9685                                   int level_size, struct btrfs_key *drop_key)
9686 {
9687
9688         struct root_item_record *ri_rec;
9689         ri_rec = malloc(sizeof(*ri_rec));
9690         if (!ri_rec)
9691                 return -ENOMEM;
9692         ri_rec->bytenr = bytenr;
9693         ri_rec->objectid = objectid;
9694         ri_rec->level = level;
9695         ri_rec->level_size = level_size;
9696         ri_rec->drop_level = drop_level;
9697         ri_rec->last_snapshot = last_snapshot;
9698         if (drop_key)
9699                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9700         list_add_tail(&ri_rec->list, head);
9701
9702         return 0;
9703 }
9704
9705 static void free_root_item_list(struct list_head *list)
9706 {
9707         struct root_item_record *ri_rec;
9708
9709         while (!list_empty(list)) {
9710                 ri_rec = list_first_entry(list, struct root_item_record,
9711                                           list);
9712                 list_del_init(&ri_rec->list);
9713                 free(ri_rec);
9714         }
9715 }
9716
9717 static int deal_root_from_list(struct list_head *list,
9718                                struct btrfs_root *root,
9719                                struct block_info *bits,
9720                                int bits_nr,
9721                                struct cache_tree *pending,
9722                                struct cache_tree *seen,
9723                                struct cache_tree *reada,
9724                                struct cache_tree *nodes,
9725                                struct cache_tree *extent_cache,
9726                                struct cache_tree *chunk_cache,
9727                                struct rb_root *dev_cache,
9728                                struct block_group_tree *block_group_cache,
9729                                struct device_extent_tree *dev_extent_cache)
9730 {
9731         int ret = 0;
9732         u64 last;
9733
9734         while (!list_empty(list)) {
9735                 struct root_item_record *rec;
9736                 struct extent_buffer *buf;
9737                 rec = list_entry(list->next,
9738                                  struct root_item_record, list);
9739                 last = 0;
9740                 buf = read_tree_block(root->fs_info->tree_root,
9741                                       rec->bytenr, rec->level_size, 0);
9742                 if (!extent_buffer_uptodate(buf)) {
9743                         free_extent_buffer(buf);
9744                         ret = -EIO;
9745                         break;
9746                 }
9747                 ret = add_root_to_pending(buf, extent_cache, pending,
9748                                     seen, nodes, rec->objectid);
9749                 if (ret < 0)
9750                         break;
9751                 /*
9752                  * To rebuild extent tree, we need deal with snapshot
9753                  * one by one, otherwise we deal with node firstly which
9754                  * can maximize readahead.
9755                  */
9756                 while (1) {
9757                         ret = run_next_block(root, bits, bits_nr, &last,
9758                                              pending, seen, reada, nodes,
9759                                              extent_cache, chunk_cache,
9760                                              dev_cache, block_group_cache,
9761                                              dev_extent_cache, rec);
9762                         if (ret != 0)
9763                                 break;
9764                 }
9765                 free_extent_buffer(buf);
9766                 list_del(&rec->list);
9767                 free(rec);
9768                 if (ret < 0)
9769                         break;
9770         }
9771         while (ret >= 0) {
9772                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9773                                      reada, nodes, extent_cache, chunk_cache,
9774                                      dev_cache, block_group_cache,
9775                                      dev_extent_cache, NULL);
9776                 if (ret != 0) {
9777                         if (ret > 0)
9778                                 ret = 0;
9779                         break;
9780                 }
9781         }
9782         return ret;
9783 }
9784
9785 static int check_chunks_and_extents(struct btrfs_root *root)
9786 {
9787         struct rb_root dev_cache;
9788         struct cache_tree chunk_cache;
9789         struct block_group_tree block_group_cache;
9790         struct device_extent_tree dev_extent_cache;
9791         struct cache_tree extent_cache;
9792         struct cache_tree seen;
9793         struct cache_tree pending;
9794         struct cache_tree reada;
9795         struct cache_tree nodes;
9796         struct extent_io_tree excluded_extents;
9797         struct cache_tree corrupt_blocks;
9798         struct btrfs_path path;
9799         struct btrfs_key key;
9800         struct btrfs_key found_key;
9801         int ret, err = 0;
9802         struct block_info *bits;
9803         int bits_nr;
9804         struct extent_buffer *leaf;
9805         int slot;
9806         struct btrfs_root_item ri;
9807         struct list_head dropping_trees;
9808         struct list_head normal_trees;
9809         struct btrfs_root *root1;
9810         u64 objectid;
9811         u32 level_size;
9812         u8 level;
9813
9814         dev_cache = RB_ROOT;
9815         cache_tree_init(&chunk_cache);
9816         block_group_tree_init(&block_group_cache);
9817         device_extent_tree_init(&dev_extent_cache);
9818
9819         cache_tree_init(&extent_cache);
9820         cache_tree_init(&seen);
9821         cache_tree_init(&pending);
9822         cache_tree_init(&nodes);
9823         cache_tree_init(&reada);
9824         cache_tree_init(&corrupt_blocks);
9825         extent_io_tree_init(&excluded_extents);
9826         INIT_LIST_HEAD(&dropping_trees);
9827         INIT_LIST_HEAD(&normal_trees);
9828
9829         if (repair) {
9830                 root->fs_info->excluded_extents = &excluded_extents;
9831                 root->fs_info->fsck_extent_cache = &extent_cache;
9832                 root->fs_info->free_extent_hook = free_extent_hook;
9833                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9834         }
9835
9836         bits_nr = 1024;
9837         bits = malloc(bits_nr * sizeof(struct block_info));
9838         if (!bits) {
9839                 perror("malloc");
9840                 exit(1);
9841         }
9842
9843         if (ctx.progress_enabled) {
9844                 ctx.tp = TASK_EXTENTS;
9845                 task_start(ctx.info);
9846         }
9847
9848 again:
9849         root1 = root->fs_info->tree_root;
9850         level = btrfs_header_level(root1->node);
9851         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9852                                     root1->node->start, 0, level, 0,
9853                                     root1->nodesize, NULL);
9854         if (ret < 0)
9855                 goto out;
9856         root1 = root->fs_info->chunk_root;
9857         level = btrfs_header_level(root1->node);
9858         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9859                                     root1->node->start, 0, level, 0,
9860                                     root1->nodesize, NULL);
9861         if (ret < 0)
9862                 goto out;
9863         btrfs_init_path(&path);
9864         key.offset = 0;
9865         key.objectid = 0;
9866         key.type = BTRFS_ROOT_ITEM_KEY;
9867         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9868                                         &key, &path, 0, 0);
9869         if (ret < 0)
9870                 goto out;
9871         while(1) {
9872                 leaf = path.nodes[0];
9873                 slot = path.slots[0];
9874                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9875                         ret = btrfs_next_leaf(root, &path);
9876                         if (ret != 0)
9877                                 break;
9878                         leaf = path.nodes[0];
9879                         slot = path.slots[0];
9880                 }
9881                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9882                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9883                         unsigned long offset;
9884                         u64 last_snapshot;
9885
9886                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9887                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9888                         last_snapshot = btrfs_root_last_snapshot(&ri);
9889                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9890                                 level = btrfs_root_level(&ri);
9891                                 level_size = root->nodesize;
9892                                 ret = add_root_item_to_list(&normal_trees,
9893                                                 found_key.objectid,
9894                                                 btrfs_root_bytenr(&ri),
9895                                                 last_snapshot, level,
9896                                                 0, level_size, NULL);
9897                                 if (ret < 0)
9898                                         goto out;
9899                         } else {
9900                                 level = btrfs_root_level(&ri);
9901                                 level_size = root->nodesize;
9902                                 objectid = found_key.objectid;
9903                                 btrfs_disk_key_to_cpu(&found_key,
9904                                                       &ri.drop_progress);
9905                                 ret = add_root_item_to_list(&dropping_trees,
9906                                                 objectid,
9907                                                 btrfs_root_bytenr(&ri),
9908                                                 last_snapshot, level,
9909                                                 ri.drop_level,
9910                                                 level_size, &found_key);
9911                                 if (ret < 0)
9912                                         goto out;
9913                         }
9914                 }
9915                 path.slots[0]++;
9916         }
9917         btrfs_release_path(&path);
9918
9919         /*
9920          * check_block can return -EAGAIN if it fixes something, please keep
9921          * this in mind when dealing with return values from these functions, if
9922          * we get -EAGAIN we want to fall through and restart the loop.
9923          */
9924         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9925                                   &seen, &reada, &nodes, &extent_cache,
9926                                   &chunk_cache, &dev_cache, &block_group_cache,
9927                                   &dev_extent_cache);
9928         if (ret < 0) {
9929                 if (ret == -EAGAIN)
9930                         goto loop;
9931                 goto out;
9932         }
9933         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9934                                   &pending, &seen, &reada, &nodes,
9935                                   &extent_cache, &chunk_cache, &dev_cache,
9936                                   &block_group_cache, &dev_extent_cache);
9937         if (ret < 0) {
9938                 if (ret == -EAGAIN)
9939                         goto loop;
9940                 goto out;
9941         }
9942
9943         ret = check_chunks(&chunk_cache, &block_group_cache,
9944                            &dev_extent_cache, NULL, NULL, NULL, 0);
9945         if (ret) {
9946                 if (ret == -EAGAIN)
9947                         goto loop;
9948                 err = ret;
9949         }
9950
9951         ret = check_extent_refs(root, &extent_cache);
9952         if (ret < 0) {
9953                 if (ret == -EAGAIN)
9954                         goto loop;
9955                 goto out;
9956         }
9957
9958         ret = check_devices(&dev_cache, &dev_extent_cache);
9959         if (ret && err)
9960                 ret = err;
9961
9962 out:
9963         task_stop(ctx.info);
9964         if (repair) {
9965                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9966                 extent_io_tree_cleanup(&excluded_extents);
9967                 root->fs_info->fsck_extent_cache = NULL;
9968                 root->fs_info->free_extent_hook = NULL;
9969                 root->fs_info->corrupt_blocks = NULL;
9970                 root->fs_info->excluded_extents = NULL;
9971         }
9972         free(bits);
9973         free_chunk_cache_tree(&chunk_cache);
9974         free_device_cache_tree(&dev_cache);
9975         free_block_group_tree(&block_group_cache);
9976         free_device_extent_tree(&dev_extent_cache);
9977         free_extent_cache_tree(&seen);
9978         free_extent_cache_tree(&pending);
9979         free_extent_cache_tree(&reada);
9980         free_extent_cache_tree(&nodes);
9981         free_root_item_list(&normal_trees);
9982         free_root_item_list(&dropping_trees);
9983         return ret;
9984 loop:
9985         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9986         free_extent_cache_tree(&seen);
9987         free_extent_cache_tree(&pending);
9988         free_extent_cache_tree(&reada);
9989         free_extent_cache_tree(&nodes);
9990         free_chunk_cache_tree(&chunk_cache);
9991         free_block_group_tree(&block_group_cache);
9992         free_device_cache_tree(&dev_cache);
9993         free_device_extent_tree(&dev_extent_cache);
9994         free_extent_record_cache(&extent_cache);
9995         free_root_item_list(&normal_trees);
9996         free_root_item_list(&dropping_trees);
9997         extent_io_tree_cleanup(&excluded_extents);
9998         goto again;
9999 }
10000
10001 /*
10002  * Check backrefs of a tree block given by @bytenr or @eb.
10003  *
10004  * @root:       the root containing the @bytenr or @eb
10005  * @eb:         tree block extent buffer, can be NULL
10006  * @bytenr:     bytenr of the tree block to search
10007  * @level:      tree level of the tree block
10008  * @owner:      owner of the tree block
10009  *
10010  * Return >0 for any error found and output error message
10011  * Return 0 for no error found
10012  */
10013 static int check_tree_block_ref(struct btrfs_root *root,
10014                                 struct extent_buffer *eb, u64 bytenr,
10015                                 int level, u64 owner)
10016 {
10017         struct btrfs_key key;
10018         struct btrfs_root *extent_root = root->fs_info->extent_root;
10019         struct btrfs_path path;
10020         struct btrfs_extent_item *ei;
10021         struct btrfs_extent_inline_ref *iref;
10022         struct extent_buffer *leaf;
10023         unsigned long end;
10024         unsigned long ptr;
10025         int slot;
10026         int skinny_level;
10027         int type;
10028         u32 nodesize = root->nodesize;
10029         u32 item_size;
10030         u64 offset;
10031         int tree_reloc_root = 0;
10032         int found_ref = 0;
10033         int err = 0;
10034         int ret;
10035
10036         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10037             btrfs_header_bytenr(root->node) == bytenr)
10038                 tree_reloc_root = 1;
10039
10040         btrfs_init_path(&path);
10041         key.objectid = bytenr;
10042         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10043                 key.type = BTRFS_METADATA_ITEM_KEY;
10044         else
10045                 key.type = BTRFS_EXTENT_ITEM_KEY;
10046         key.offset = (u64)-1;
10047
10048         /* Search for the backref in extent tree */
10049         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10050         if (ret < 0) {
10051                 err |= BACKREF_MISSING;
10052                 goto out;
10053         }
10054         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10055         if (ret) {
10056                 err |= BACKREF_MISSING;
10057                 goto out;
10058         }
10059
10060         leaf = path.nodes[0];
10061         slot = path.slots[0];
10062         btrfs_item_key_to_cpu(leaf, &key, slot);
10063
10064         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10065
10066         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10067                 skinny_level = (int)key.offset;
10068                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10069         } else {
10070                 struct btrfs_tree_block_info *info;
10071
10072                 info = (struct btrfs_tree_block_info *)(ei + 1);
10073                 skinny_level = btrfs_tree_block_level(leaf, info);
10074                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10075         }
10076
10077         if (eb) {
10078                 u64 header_gen;
10079                 u64 extent_gen;
10080
10081                 if (!(btrfs_extent_flags(leaf, ei) &
10082                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10083                         error(
10084                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10085                                 key.objectid, nodesize,
10086                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10087                         err = BACKREF_MISMATCH;
10088                 }
10089                 header_gen = btrfs_header_generation(eb);
10090                 extent_gen = btrfs_extent_generation(leaf, ei);
10091                 if (header_gen != extent_gen) {
10092                         error(
10093         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10094                                 key.objectid, nodesize, header_gen,
10095                                 extent_gen);
10096                         err = BACKREF_MISMATCH;
10097                 }
10098                 if (level != skinny_level) {
10099                         error(
10100                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10101                                 key.objectid, nodesize, level, skinny_level);
10102                         err = BACKREF_MISMATCH;
10103                 }
10104                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10105                         error(
10106                         "extent[%llu %u] is referred by other roots than %llu",
10107                                 key.objectid, nodesize, root->objectid);
10108                         err = BACKREF_MISMATCH;
10109                 }
10110         }
10111
10112         /*
10113          * Iterate the extent/metadata item to find the exact backref
10114          */
10115         item_size = btrfs_item_size_nr(leaf, slot);
10116         ptr = (unsigned long)iref;
10117         end = (unsigned long)ei + item_size;
10118         while (ptr < end) {
10119                 iref = (struct btrfs_extent_inline_ref *)ptr;
10120                 type = btrfs_extent_inline_ref_type(leaf, iref);
10121                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10122
10123                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10124                         (offset == root->objectid || offset == owner)) {
10125                         found_ref = 1;
10126                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10127                         /*
10128                          * Backref of tree reloc root points to itself, no need
10129                          * to check backref any more.
10130                          */
10131                         if (tree_reloc_root)
10132                                 found_ref = 1;
10133                         else
10134                         /* Check if the backref points to valid referencer */
10135                                 found_ref = !check_tree_block_ref(root, NULL,
10136                                                 offset, level + 1, owner);
10137                 }
10138
10139                 if (found_ref)
10140                         break;
10141                 ptr += btrfs_extent_inline_ref_size(type);
10142         }
10143
10144         /*
10145          * Inlined extent item doesn't have what we need, check
10146          * TREE_BLOCK_REF_KEY
10147          */
10148         if (!found_ref) {
10149                 btrfs_release_path(&path);
10150                 key.objectid = bytenr;
10151                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10152                 key.offset = root->objectid;
10153
10154                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10155                 if (!ret)
10156                         found_ref = 1;
10157         }
10158         if (!found_ref)
10159                 err |= BACKREF_MISSING;
10160 out:
10161         btrfs_release_path(&path);
10162         if (eb && (err & BACKREF_MISSING))
10163                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10164                         bytenr, nodesize, owner, level);
10165         return err;
10166 }
10167
10168 /*
10169  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10170  *
10171  * Return >0 any error found and output error message
10172  * Return 0 for no error found
10173  */
10174 static int check_extent_data_item(struct btrfs_root *root,
10175                                   struct extent_buffer *eb, int slot)
10176 {
10177         struct btrfs_file_extent_item *fi;
10178         struct btrfs_path path;
10179         struct btrfs_root *extent_root = root->fs_info->extent_root;
10180         struct btrfs_key fi_key;
10181         struct btrfs_key dbref_key;
10182         struct extent_buffer *leaf;
10183         struct btrfs_extent_item *ei;
10184         struct btrfs_extent_inline_ref *iref;
10185         struct btrfs_extent_data_ref *dref;
10186         u64 owner;
10187         u64 disk_bytenr;
10188         u64 disk_num_bytes;
10189         u64 extent_num_bytes;
10190         u64 extent_flags;
10191         u32 item_size;
10192         unsigned long end;
10193         unsigned long ptr;
10194         int type;
10195         u64 ref_root;
10196         int found_dbackref = 0;
10197         int err = 0;
10198         int ret;
10199
10200         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10201         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10202
10203         /* Nothing to check for hole and inline data extents */
10204         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10205             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10206                 return 0;
10207
10208         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10209         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10210         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10211
10212         /* Check unaligned disk_num_bytes and num_bytes */
10213         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10214                 error(
10215 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10216                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10217                         root->sectorsize);
10218                 err |= BYTES_UNALIGNED;
10219         } else {
10220                 data_bytes_allocated += disk_num_bytes;
10221         }
10222         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10223                 error(
10224 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10225                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10226                         root->sectorsize);
10227                 err |= BYTES_UNALIGNED;
10228         } else {
10229                 data_bytes_referenced += extent_num_bytes;
10230         }
10231         owner = btrfs_header_owner(eb);
10232
10233         /* Check the extent item of the file extent in extent tree */
10234         btrfs_init_path(&path);
10235         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10236         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10237         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10238
10239         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10240         if (ret)
10241                 goto out;
10242
10243         leaf = path.nodes[0];
10244         slot = path.slots[0];
10245         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10246
10247         extent_flags = btrfs_extent_flags(leaf, ei);
10248
10249         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10250                 error(
10251                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10252                     disk_bytenr, disk_num_bytes,
10253                     BTRFS_EXTENT_FLAG_DATA);
10254                 err |= BACKREF_MISMATCH;
10255         }
10256
10257         /* Check data backref inside that extent item */
10258         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10259         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10260         ptr = (unsigned long)iref;
10261         end = (unsigned long)ei + item_size;
10262         while (ptr < end) {
10263                 iref = (struct btrfs_extent_inline_ref *)ptr;
10264                 type = btrfs_extent_inline_ref_type(leaf, iref);
10265                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10266
10267                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10268                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10269                         if (ref_root == owner || ref_root == root->objectid)
10270                                 found_dbackref = 1;
10271                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10272                         found_dbackref = !check_tree_block_ref(root, NULL,
10273                                 btrfs_extent_inline_ref_offset(leaf, iref),
10274                                 0, owner);
10275                 }
10276
10277                 if (found_dbackref)
10278                         break;
10279                 ptr += btrfs_extent_inline_ref_size(type);
10280         }
10281
10282         if (!found_dbackref) {
10283                 btrfs_release_path(&path);
10284
10285                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10286                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10287                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10288                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10289                                 fi_key.objectid, fi_key.offset);
10290
10291                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10292                                         &dbref_key, &path, 0, 0);
10293                 if (!ret) {
10294                         found_dbackref = 1;
10295                         goto out;
10296                 }
10297
10298                 btrfs_release_path(&path);
10299
10300                 /*
10301                  * Neither inlined nor EXTENT_DATA_REF found, try
10302                  * SHARED_DATA_REF as last chance.
10303                  */
10304                 dbref_key.objectid = disk_bytenr;
10305                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10306                 dbref_key.offset = eb->start;
10307
10308                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10309                                         &dbref_key, &path, 0, 0);
10310                 if (!ret) {
10311                         found_dbackref = 1;
10312                         goto out;
10313                 }
10314         }
10315
10316 out:
10317         if (!found_dbackref)
10318                 err |= BACKREF_MISSING;
10319         btrfs_release_path(&path);
10320         if (err & BACKREF_MISSING) {
10321                 error("data extent[%llu %llu] backref lost",
10322                       disk_bytenr, disk_num_bytes);
10323         }
10324         return err;
10325 }
10326
10327 /*
10328  * Get real tree block level for the case like shared block
10329  * Return >= 0 as tree level
10330  * Return <0 for error
10331  */
10332 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10333 {
10334         struct extent_buffer *eb;
10335         struct btrfs_path path;
10336         struct btrfs_key key;
10337         struct btrfs_extent_item *ei;
10338         u64 flags;
10339         u64 transid;
10340         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10341         u8 backref_level;
10342         u8 header_level;
10343         int ret;
10344
10345         /* Search extent tree for extent generation and level */
10346         key.objectid = bytenr;
10347         key.type = BTRFS_METADATA_ITEM_KEY;
10348         key.offset = (u64)-1;
10349
10350         btrfs_init_path(&path);
10351         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10352         if (ret < 0)
10353                 goto release_out;
10354         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10355         if (ret < 0)
10356                 goto release_out;
10357         if (ret > 0) {
10358                 ret = -ENOENT;
10359                 goto release_out;
10360         }
10361
10362         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10363         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10364                             struct btrfs_extent_item);
10365         flags = btrfs_extent_flags(path.nodes[0], ei);
10366         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10367                 ret = -ENOENT;
10368                 goto release_out;
10369         }
10370
10371         /* Get transid for later read_tree_block() check */
10372         transid = btrfs_extent_generation(path.nodes[0], ei);
10373
10374         /* Get backref level as one source */
10375         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10376                 backref_level = key.offset;
10377         } else {
10378                 struct btrfs_tree_block_info *info;
10379
10380                 info = (struct btrfs_tree_block_info *)(ei + 1);
10381                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10382         }
10383         btrfs_release_path(&path);
10384
10385         /* Get level from tree block as an alternative source */
10386         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10387         if (!extent_buffer_uptodate(eb)) {
10388                 free_extent_buffer(eb);
10389                 return -EIO;
10390         }
10391         header_level = btrfs_header_level(eb);
10392         free_extent_buffer(eb);
10393
10394         if (header_level != backref_level)
10395                 return -EIO;
10396         return header_level;
10397
10398 release_out:
10399         btrfs_release_path(&path);
10400         return ret;
10401 }
10402
10403 /*
10404  * Check if a tree block backref is valid (points to a valid tree block)
10405  * if level == -1, level will be resolved
10406  * Return >0 for any error found and print error message
10407  */
10408 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10409                                     u64 bytenr, int level)
10410 {
10411         struct btrfs_root *root;
10412         struct btrfs_key key;
10413         struct btrfs_path path;
10414         struct extent_buffer *eb;
10415         struct extent_buffer *node;
10416         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10417         int err = 0;
10418         int ret;
10419
10420         /* Query level for level == -1 special case */
10421         if (level == -1)
10422                 level = query_tree_block_level(fs_info, bytenr);
10423         if (level < 0) {
10424                 err |= REFERENCER_MISSING;
10425                 goto out;
10426         }
10427
10428         key.objectid = root_id;
10429         key.type = BTRFS_ROOT_ITEM_KEY;
10430         key.offset = (u64)-1;
10431
10432         root = btrfs_read_fs_root(fs_info, &key);
10433         if (IS_ERR(root)) {
10434                 err |= REFERENCER_MISSING;
10435                 goto out;
10436         }
10437
10438         /* Read out the tree block to get item/node key */
10439         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10440         if (!extent_buffer_uptodate(eb)) {
10441                 err |= REFERENCER_MISSING;
10442                 free_extent_buffer(eb);
10443                 goto out;
10444         }
10445
10446         /* Empty tree, no need to check key */
10447         if (!btrfs_header_nritems(eb) && !level) {
10448                 free_extent_buffer(eb);
10449                 goto out;
10450         }
10451
10452         if (level)
10453                 btrfs_node_key_to_cpu(eb, &key, 0);
10454         else
10455                 btrfs_item_key_to_cpu(eb, &key, 0);
10456
10457         free_extent_buffer(eb);
10458
10459         btrfs_init_path(&path);
10460         path.lowest_level = level;
10461         /* Search with the first key, to ensure we can reach it */
10462         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10463         if (ret < 0) {
10464                 err |= REFERENCER_MISSING;
10465                 goto release_out;
10466         }
10467
10468         node = path.nodes[level];
10469         if (btrfs_header_bytenr(node) != bytenr) {
10470                 error(
10471         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10472                         bytenr, nodesize, bytenr,
10473                         btrfs_header_bytenr(node));
10474                 err |= REFERENCER_MISMATCH;
10475         }
10476         if (btrfs_header_level(node) != level) {
10477                 error(
10478         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10479                         bytenr, nodesize, level,
10480                         btrfs_header_level(node));
10481                 err |= REFERENCER_MISMATCH;
10482         }
10483
10484 release_out:
10485         btrfs_release_path(&path);
10486 out:
10487         if (err & REFERENCER_MISSING) {
10488                 if (level < 0)
10489                         error("extent [%llu %d] lost referencer (owner: %llu)",
10490                                 bytenr, nodesize, root_id);
10491                 else
10492                         error(
10493                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10494                                 bytenr, nodesize, root_id, level);
10495         }
10496
10497         return err;
10498 }
10499
10500 /*
10501  * Check if tree block @eb is tree reloc root.
10502  * Return 0 if it's not or any problem happens
10503  * Return 1 if it's a tree reloc root
10504  */
10505 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10506                                  struct extent_buffer *eb)
10507 {
10508         struct btrfs_root *tree_reloc_root;
10509         struct btrfs_key key;
10510         u64 bytenr = btrfs_header_bytenr(eb);
10511         u64 owner = btrfs_header_owner(eb);
10512         int ret = 0;
10513
10514         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10515         key.offset = owner;
10516         key.type = BTRFS_ROOT_ITEM_KEY;
10517
10518         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10519         if (IS_ERR(tree_reloc_root))
10520                 return 0;
10521
10522         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10523                 ret = 1;
10524         btrfs_free_fs_root(tree_reloc_root);
10525         return ret;
10526 }
10527
10528 /*
10529  * Check referencer for shared block backref
10530  * If level == -1, this function will resolve the level.
10531  */
10532 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10533                                      u64 parent, u64 bytenr, int level)
10534 {
10535         struct extent_buffer *eb;
10536         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10537         u32 nr;
10538         int found_parent = 0;
10539         int i;
10540
10541         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10542         if (!extent_buffer_uptodate(eb))
10543                 goto out;
10544
10545         if (level == -1)
10546                 level = query_tree_block_level(fs_info, bytenr);
10547         if (level < 0)
10548                 goto out;
10549
10550         /* It's possible it's a tree reloc root */
10551         if (parent == bytenr) {
10552                 if (is_tree_reloc_root(fs_info, eb))
10553                         found_parent = 1;
10554                 goto out;
10555         }
10556
10557         if (level + 1 != btrfs_header_level(eb))
10558                 goto out;
10559
10560         nr = btrfs_header_nritems(eb);
10561         for (i = 0; i < nr; i++) {
10562                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10563                         found_parent = 1;
10564                         break;
10565                 }
10566         }
10567 out:
10568         free_extent_buffer(eb);
10569         if (!found_parent) {
10570                 error(
10571         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10572                         bytenr, nodesize, parent, level);
10573                 return REFERENCER_MISSING;
10574         }
10575         return 0;
10576 }
10577
10578 /*
10579  * Check referencer for normal (inlined) data ref
10580  * If len == 0, it will be resolved by searching in extent tree
10581  */
10582 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10583                                      u64 root_id, u64 objectid, u64 offset,
10584                                      u64 bytenr, u64 len, u32 count)
10585 {
10586         struct btrfs_root *root;
10587         struct btrfs_root *extent_root = fs_info->extent_root;
10588         struct btrfs_key key;
10589         struct btrfs_path path;
10590         struct extent_buffer *leaf;
10591         struct btrfs_file_extent_item *fi;
10592         u32 found_count = 0;
10593         int slot;
10594         int ret = 0;
10595
10596         if (!len) {
10597                 key.objectid = bytenr;
10598                 key.type = BTRFS_EXTENT_ITEM_KEY;
10599                 key.offset = (u64)-1;
10600
10601                 btrfs_init_path(&path);
10602                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10603                 if (ret < 0)
10604                         goto out;
10605                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10606                 if (ret)
10607                         goto out;
10608                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10609                 if (key.objectid != bytenr ||
10610                     key.type != BTRFS_EXTENT_ITEM_KEY)
10611                         goto out;
10612                 len = key.offset;
10613                 btrfs_release_path(&path);
10614         }
10615         key.objectid = root_id;
10616         key.type = BTRFS_ROOT_ITEM_KEY;
10617         key.offset = (u64)-1;
10618         btrfs_init_path(&path);
10619
10620         root = btrfs_read_fs_root(fs_info, &key);
10621         if (IS_ERR(root))
10622                 goto out;
10623
10624         key.objectid = objectid;
10625         key.type = BTRFS_EXTENT_DATA_KEY;
10626         /*
10627          * It can be nasty as data backref offset is
10628          * file offset - file extent offset, which is smaller or
10629          * equal to original backref offset.  The only special case is
10630          * overflow.  So we need to special check and do further search.
10631          */
10632         key.offset = offset & (1ULL << 63) ? 0 : offset;
10633
10634         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10635         if (ret < 0)
10636                 goto out;
10637
10638         /*
10639          * Search afterwards to get correct one
10640          * NOTE: As we must do a comprehensive check on the data backref to
10641          * make sure the dref count also matches, we must iterate all file
10642          * extents for that inode.
10643          */
10644         while (1) {
10645                 leaf = path.nodes[0];
10646                 slot = path.slots[0];
10647
10648                 if (slot >= btrfs_header_nritems(leaf))
10649                         goto next;
10650                 btrfs_item_key_to_cpu(leaf, &key, slot);
10651                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10652                         break;
10653                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10654                 /*
10655                  * Except normal disk bytenr and disk num bytes, we still
10656                  * need to do extra check on dbackref offset as
10657                  * dbackref offset = file_offset - file_extent_offset
10658                  */
10659                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10660                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10661                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10662                     offset)
10663                         found_count++;
10664
10665 next:
10666                 ret = btrfs_next_item(root, &path);
10667                 if (ret)
10668                         break;
10669         }
10670 out:
10671         btrfs_release_path(&path);
10672         if (found_count != count) {
10673                 error(
10674 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10675                         bytenr, len, root_id, objectid, offset, count, found_count);
10676                 return REFERENCER_MISSING;
10677         }
10678         return 0;
10679 }
10680
10681 /*
10682  * Check if the referencer of a shared data backref exists
10683  */
10684 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10685                                      u64 parent, u64 bytenr)
10686 {
10687         struct extent_buffer *eb;
10688         struct btrfs_key key;
10689         struct btrfs_file_extent_item *fi;
10690         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10691         u32 nr;
10692         int found_parent = 0;
10693         int i;
10694
10695         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10696         if (!extent_buffer_uptodate(eb))
10697                 goto out;
10698
10699         nr = btrfs_header_nritems(eb);
10700         for (i = 0; i < nr; i++) {
10701                 btrfs_item_key_to_cpu(eb, &key, i);
10702                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10703                         continue;
10704
10705                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10706                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10707                         continue;
10708
10709                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10710                         found_parent = 1;
10711                         break;
10712                 }
10713         }
10714
10715 out:
10716         free_extent_buffer(eb);
10717         if (!found_parent) {
10718                 error("shared extent %llu referencer lost (parent: %llu)",
10719                         bytenr, parent);
10720                 return REFERENCER_MISSING;
10721         }
10722         return 0;
10723 }
10724
10725 /*
10726  * This function will check a given extent item, including its backref and
10727  * itself (like crossing stripe boundary and type)
10728  *
10729  * Since we don't use extent_record anymore, introduce new error bit
10730  */
10731 static int check_extent_item(struct btrfs_fs_info *fs_info,
10732                              struct extent_buffer *eb, int slot)
10733 {
10734         struct btrfs_extent_item *ei;
10735         struct btrfs_extent_inline_ref *iref;
10736         struct btrfs_extent_data_ref *dref;
10737         unsigned long end;
10738         unsigned long ptr;
10739         int type;
10740         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10741         u32 item_size = btrfs_item_size_nr(eb, slot);
10742         u64 flags;
10743         u64 offset;
10744         int metadata = 0;
10745         int level;
10746         struct btrfs_key key;
10747         int ret;
10748         int err = 0;
10749
10750         btrfs_item_key_to_cpu(eb, &key, slot);
10751         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10752                 bytes_used += key.offset;
10753         else
10754                 bytes_used += nodesize;
10755
10756         if (item_size < sizeof(*ei)) {
10757                 /*
10758                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10759                  * old thing when on disk format is still un-determined.
10760                  * No need to care about it anymore
10761                  */
10762                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10763                 return -ENOTTY;
10764         }
10765
10766         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10767         flags = btrfs_extent_flags(eb, ei);
10768
10769         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10770                 metadata = 1;
10771         if (metadata && check_crossing_stripes(global_info, key.objectid,
10772                                                eb->len)) {
10773                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10774                       key.objectid, key.objectid + nodesize);
10775                 err |= CROSSING_STRIPE_BOUNDARY;
10776         }
10777
10778         ptr = (unsigned long)(ei + 1);
10779
10780         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10781                 /* Old EXTENT_ITEM metadata */
10782                 struct btrfs_tree_block_info *info;
10783
10784                 info = (struct btrfs_tree_block_info *)ptr;
10785                 level = btrfs_tree_block_level(eb, info);
10786                 ptr += sizeof(struct btrfs_tree_block_info);
10787         } else {
10788                 /* New METADATA_ITEM */
10789                 level = key.offset;
10790         }
10791         end = (unsigned long)ei + item_size;
10792
10793 next:
10794         /* Reached extent item end normally */
10795         if (ptr == end)
10796                 goto out;
10797
10798         /* Beyond extent item end, wrong item size */
10799         if (ptr > end) {
10800                 err |= ITEM_SIZE_MISMATCH;
10801                 error("extent item at bytenr %llu slot %d has wrong size",
10802                         eb->start, slot);
10803                 goto out;
10804         }
10805
10806         /* Now check every backref in this extent item */
10807         iref = (struct btrfs_extent_inline_ref *)ptr;
10808         type = btrfs_extent_inline_ref_type(eb, iref);
10809         offset = btrfs_extent_inline_ref_offset(eb, iref);
10810         switch (type) {
10811         case BTRFS_TREE_BLOCK_REF_KEY:
10812                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10813                                                level);
10814                 err |= ret;
10815                 break;
10816         case BTRFS_SHARED_BLOCK_REF_KEY:
10817                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10818                                                  level);
10819                 err |= ret;
10820                 break;
10821         case BTRFS_EXTENT_DATA_REF_KEY:
10822                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10823                 ret = check_extent_data_backref(fs_info,
10824                                 btrfs_extent_data_ref_root(eb, dref),
10825                                 btrfs_extent_data_ref_objectid(eb, dref),
10826                                 btrfs_extent_data_ref_offset(eb, dref),
10827                                 key.objectid, key.offset,
10828                                 btrfs_extent_data_ref_count(eb, dref));
10829                 err |= ret;
10830                 break;
10831         case BTRFS_SHARED_DATA_REF_KEY:
10832                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10833                 err |= ret;
10834                 break;
10835         default:
10836                 error("extent[%llu %d %llu] has unknown ref type: %d",
10837                         key.objectid, key.type, key.offset, type);
10838                 err |= UNKNOWN_TYPE;
10839                 goto out;
10840         }
10841
10842         ptr += btrfs_extent_inline_ref_size(type);
10843         goto next;
10844
10845 out:
10846         return err;
10847 }
10848
10849 /*
10850  * Check if a dev extent item is referred correctly by its chunk
10851  */
10852 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10853                                  struct extent_buffer *eb, int slot)
10854 {
10855         struct btrfs_root *chunk_root = fs_info->chunk_root;
10856         struct btrfs_dev_extent *ptr;
10857         struct btrfs_path path;
10858         struct btrfs_key chunk_key;
10859         struct btrfs_key devext_key;
10860         struct btrfs_chunk *chunk;
10861         struct extent_buffer *l;
10862         int num_stripes;
10863         u64 length;
10864         int i;
10865         int found_chunk = 0;
10866         int ret;
10867
10868         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10869         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10870         length = btrfs_dev_extent_length(eb, ptr);
10871
10872         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10873         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10874         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10875
10876         btrfs_init_path(&path);
10877         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10878         if (ret)
10879                 goto out;
10880
10881         l = path.nodes[0];
10882         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10883         if (btrfs_chunk_length(l, chunk) != length)
10884                 goto out;
10885
10886         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10887         for (i = 0; i < num_stripes; i++) {
10888                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10889                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10890
10891                 if (devid == devext_key.objectid &&
10892                     offset == devext_key.offset) {
10893                         found_chunk = 1;
10894                         break;
10895                 }
10896         }
10897 out:
10898         btrfs_release_path(&path);
10899         if (!found_chunk) {
10900                 error(
10901                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10902                         devext_key.objectid, devext_key.offset, length);
10903                 return REFERENCER_MISSING;
10904         }
10905         return 0;
10906 }
10907
10908 /*
10909  * Check if the used space is correct with the dev item
10910  */
10911 static int check_dev_item(struct btrfs_fs_info *fs_info,
10912                           struct extent_buffer *eb, int slot)
10913 {
10914         struct btrfs_root *dev_root = fs_info->dev_root;
10915         struct btrfs_dev_item *dev_item;
10916         struct btrfs_path path;
10917         struct btrfs_key key;
10918         struct btrfs_dev_extent *ptr;
10919         u64 dev_id;
10920         u64 used;
10921         u64 total = 0;
10922         int ret;
10923
10924         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10925         dev_id = btrfs_device_id(eb, dev_item);
10926         used = btrfs_device_bytes_used(eb, dev_item);
10927
10928         key.objectid = dev_id;
10929         key.type = BTRFS_DEV_EXTENT_KEY;
10930         key.offset = 0;
10931
10932         btrfs_init_path(&path);
10933         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10934         if (ret < 0) {
10935                 btrfs_item_key_to_cpu(eb, &key, slot);
10936                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10937                         key.objectid, key.type, key.offset);
10938                 btrfs_release_path(&path);
10939                 return REFERENCER_MISSING;
10940         }
10941
10942         /* Iterate dev_extents to calculate the used space of a device */
10943         while (1) {
10944                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10945                         goto next;
10946
10947                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10948                 if (key.objectid > dev_id)
10949                         break;
10950                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10951                         goto next;
10952
10953                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10954                                      struct btrfs_dev_extent);
10955                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10956 next:
10957                 ret = btrfs_next_item(dev_root, &path);
10958                 if (ret)
10959                         break;
10960         }
10961         btrfs_release_path(&path);
10962
10963         if (used != total) {
10964                 btrfs_item_key_to_cpu(eb, &key, slot);
10965                 error(
10966 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10967                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10968                         BTRFS_DEV_EXTENT_KEY, dev_id);
10969                 return ACCOUNTING_MISMATCH;
10970         }
10971         return 0;
10972 }
10973
10974 /*
10975  * Check a block group item with its referener (chunk) and its used space
10976  * with extent/metadata item
10977  */
10978 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10979                                   struct extent_buffer *eb, int slot)
10980 {
10981         struct btrfs_root *extent_root = fs_info->extent_root;
10982         struct btrfs_root *chunk_root = fs_info->chunk_root;
10983         struct btrfs_block_group_item *bi;
10984         struct btrfs_block_group_item bg_item;
10985         struct btrfs_path path;
10986         struct btrfs_key bg_key;
10987         struct btrfs_key chunk_key;
10988         struct btrfs_key extent_key;
10989         struct btrfs_chunk *chunk;
10990         struct extent_buffer *leaf;
10991         struct btrfs_extent_item *ei;
10992         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10993         u64 flags;
10994         u64 bg_flags;
10995         u64 used;
10996         u64 total = 0;
10997         int ret;
10998         int err = 0;
10999
11000         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11001         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11002         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11003         used = btrfs_block_group_used(&bg_item);
11004         bg_flags = btrfs_block_group_flags(&bg_item);
11005
11006         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11007         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11008         chunk_key.offset = bg_key.objectid;
11009
11010         btrfs_init_path(&path);
11011         /* Search for the referencer chunk */
11012         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11013         if (ret) {
11014                 error(
11015                 "block group[%llu %llu] did not find the related chunk item",
11016                         bg_key.objectid, bg_key.offset);
11017                 err |= REFERENCER_MISSING;
11018         } else {
11019                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11020                                         struct btrfs_chunk);
11021                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11022                                                 bg_key.offset) {
11023                         error(
11024         "block group[%llu %llu] related chunk item length does not match",
11025                                 bg_key.objectid, bg_key.offset);
11026                         err |= REFERENCER_MISMATCH;
11027                 }
11028         }
11029         btrfs_release_path(&path);
11030
11031         /* Search from the block group bytenr */
11032         extent_key.objectid = bg_key.objectid;
11033         extent_key.type = 0;
11034         extent_key.offset = 0;
11035
11036         btrfs_init_path(&path);
11037         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11038         if (ret < 0)
11039                 goto out;
11040
11041         /* Iterate extent tree to account used space */
11042         while (1) {
11043                 leaf = path.nodes[0];
11044
11045                 /* Search slot can point to the last item beyond leaf nritems */
11046                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11047                         goto next;
11048
11049                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11050                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11051                         break;
11052
11053                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11054                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11055                         goto next;
11056                 if (extent_key.objectid < bg_key.objectid)
11057                         goto next;
11058
11059                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11060                         total += nodesize;
11061                 else
11062                         total += extent_key.offset;
11063
11064                 ei = btrfs_item_ptr(leaf, path.slots[0],
11065                                     struct btrfs_extent_item);
11066                 flags = btrfs_extent_flags(leaf, ei);
11067                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11068                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11069                                 error(
11070                         "bad extent[%llu, %llu) type mismatch with chunk",
11071                                         extent_key.objectid,
11072                                         extent_key.objectid + extent_key.offset);
11073                                 err |= CHUNK_TYPE_MISMATCH;
11074                         }
11075                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11076                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11077                                     BTRFS_BLOCK_GROUP_METADATA))) {
11078                                 error(
11079                         "bad extent[%llu, %llu) type mismatch with chunk",
11080                                         extent_key.objectid,
11081                                         extent_key.objectid + nodesize);
11082                                 err |= CHUNK_TYPE_MISMATCH;
11083                         }
11084                 }
11085 next:
11086                 ret = btrfs_next_item(extent_root, &path);
11087                 if (ret)
11088                         break;
11089         }
11090
11091 out:
11092         btrfs_release_path(&path);
11093
11094         if (total != used) {
11095                 error(
11096                 "block group[%llu %llu] used %llu but extent items used %llu",
11097                         bg_key.objectid, bg_key.offset, used, total);
11098                 err |= ACCOUNTING_MISMATCH;
11099         }
11100         return err;
11101 }
11102
11103 /*
11104  * Check a chunk item.
11105  * Including checking all referred dev_extents and block group
11106  */
11107 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11108                             struct extent_buffer *eb, int slot)
11109 {
11110         struct btrfs_root *extent_root = fs_info->extent_root;
11111         struct btrfs_root *dev_root = fs_info->dev_root;
11112         struct btrfs_path path;
11113         struct btrfs_key chunk_key;
11114         struct btrfs_key bg_key;
11115         struct btrfs_key devext_key;
11116         struct btrfs_chunk *chunk;
11117         struct extent_buffer *leaf;
11118         struct btrfs_block_group_item *bi;
11119         struct btrfs_block_group_item bg_item;
11120         struct btrfs_dev_extent *ptr;
11121         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11122         u64 length;
11123         u64 chunk_end;
11124         u64 type;
11125         u64 profile;
11126         int num_stripes;
11127         u64 offset;
11128         u64 objectid;
11129         int i;
11130         int ret;
11131         int err = 0;
11132
11133         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11134         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11135         length = btrfs_chunk_length(eb, chunk);
11136         chunk_end = chunk_key.offset + length;
11137         if (!IS_ALIGNED(length, sectorsize)) {
11138                 error("chunk[%llu %llu) not aligned to %u",
11139                         chunk_key.offset, chunk_end, sectorsize);
11140                 err |= BYTES_UNALIGNED;
11141                 goto out;
11142         }
11143
11144         type = btrfs_chunk_type(eb, chunk);
11145         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11146         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11147                 error("chunk[%llu %llu) has no chunk type",
11148                         chunk_key.offset, chunk_end);
11149                 err |= UNKNOWN_TYPE;
11150         }
11151         if (profile && (profile & (profile - 1))) {
11152                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11153                         chunk_key.offset, chunk_end, profile);
11154                 err |= UNKNOWN_TYPE;
11155         }
11156
11157         bg_key.objectid = chunk_key.offset;
11158         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11159         bg_key.offset = length;
11160
11161         btrfs_init_path(&path);
11162         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11163         if (ret) {
11164                 error(
11165                 "chunk[%llu %llu) did not find the related block group item",
11166                         chunk_key.offset, chunk_end);
11167                 err |= REFERENCER_MISSING;
11168         } else{
11169                 leaf = path.nodes[0];
11170                 bi = btrfs_item_ptr(leaf, path.slots[0],
11171                                     struct btrfs_block_group_item);
11172                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11173                                    sizeof(bg_item));
11174                 if (btrfs_block_group_flags(&bg_item) != type) {
11175                         error(
11176 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11177                                 chunk_key.offset, chunk_end, type,
11178                                 btrfs_block_group_flags(&bg_item));
11179                         err |= REFERENCER_MISSING;
11180                 }
11181         }
11182
11183         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11184         for (i = 0; i < num_stripes; i++) {
11185                 btrfs_release_path(&path);
11186                 btrfs_init_path(&path);
11187                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11188                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11189                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11190
11191                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11192                                         0, 0);
11193                 if (ret)
11194                         goto not_match_dev;
11195
11196                 leaf = path.nodes[0];
11197                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11198                                      struct btrfs_dev_extent);
11199                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11200                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11201                 if (objectid != chunk_key.objectid ||
11202                     offset != chunk_key.offset ||
11203                     btrfs_dev_extent_length(leaf, ptr) != length)
11204                         goto not_match_dev;
11205                 continue;
11206 not_match_dev:
11207                 err |= BACKREF_MISSING;
11208                 error(
11209                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11210                         chunk_key.objectid, chunk_end, i);
11211                 continue;
11212         }
11213         btrfs_release_path(&path);
11214 out:
11215         return err;
11216 }
11217
11218 /*
11219  * Main entry function to check known items and update related accounting info
11220  */
11221 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11222 {
11223         struct btrfs_fs_info *fs_info = root->fs_info;
11224         struct btrfs_key key;
11225         int slot = 0;
11226         int type;
11227         struct btrfs_extent_data_ref *dref;
11228         int ret;
11229         int err = 0;
11230
11231 next:
11232         btrfs_item_key_to_cpu(eb, &key, slot);
11233         type = key.type;
11234
11235         switch (type) {
11236         case BTRFS_EXTENT_DATA_KEY:
11237                 ret = check_extent_data_item(root, eb, slot);
11238                 err |= ret;
11239                 break;
11240         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11241                 ret = check_block_group_item(fs_info, eb, slot);
11242                 err |= ret;
11243                 break;
11244         case BTRFS_DEV_ITEM_KEY:
11245                 ret = check_dev_item(fs_info, eb, slot);
11246                 err |= ret;
11247                 break;
11248         case BTRFS_CHUNK_ITEM_KEY:
11249                 ret = check_chunk_item(fs_info, eb, slot);
11250                 err |= ret;
11251                 break;
11252         case BTRFS_DEV_EXTENT_KEY:
11253                 ret = check_dev_extent_item(fs_info, eb, slot);
11254                 err |= ret;
11255                 break;
11256         case BTRFS_EXTENT_ITEM_KEY:
11257         case BTRFS_METADATA_ITEM_KEY:
11258                 ret = check_extent_item(fs_info, eb, slot);
11259                 err |= ret;
11260                 break;
11261         case BTRFS_EXTENT_CSUM_KEY:
11262                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11263                 break;
11264         case BTRFS_TREE_BLOCK_REF_KEY:
11265                 ret = check_tree_block_backref(fs_info, key.offset,
11266                                                key.objectid, -1);
11267                 err |= ret;
11268                 break;
11269         case BTRFS_EXTENT_DATA_REF_KEY:
11270                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11271                 ret = check_extent_data_backref(fs_info,
11272                                 btrfs_extent_data_ref_root(eb, dref),
11273                                 btrfs_extent_data_ref_objectid(eb, dref),
11274                                 btrfs_extent_data_ref_offset(eb, dref),
11275                                 key.objectid, 0,
11276                                 btrfs_extent_data_ref_count(eb, dref));
11277                 err |= ret;
11278                 break;
11279         case BTRFS_SHARED_BLOCK_REF_KEY:
11280                 ret = check_shared_block_backref(fs_info, key.offset,
11281                                                  key.objectid, -1);
11282                 err |= ret;
11283                 break;
11284         case BTRFS_SHARED_DATA_REF_KEY:
11285                 ret = check_shared_data_backref(fs_info, key.offset,
11286                                                 key.objectid);
11287                 err |= ret;
11288                 break;
11289         default:
11290                 break;
11291         }
11292
11293         if (++slot < btrfs_header_nritems(eb))
11294                 goto next;
11295
11296         return err;
11297 }
11298
11299 /*
11300  * Helper function for later fs/subvol tree check.  To determine if a tree
11301  * block should be checked.
11302  * This function will ensure only the direct referencer with lowest rootid to
11303  * check a fs/subvolume tree block.
11304  *
11305  * Backref check at extent tree would detect errors like missing subvolume
11306  * tree, so we can do aggressive check to reduce duplicated checks.
11307  */
11308 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11309 {
11310         struct btrfs_root *extent_root = root->fs_info->extent_root;
11311         struct btrfs_key key;
11312         struct btrfs_path path;
11313         struct extent_buffer *leaf;
11314         int slot;
11315         struct btrfs_extent_item *ei;
11316         unsigned long ptr;
11317         unsigned long end;
11318         int type;
11319         u32 item_size;
11320         u64 offset;
11321         struct btrfs_extent_inline_ref *iref;
11322         int ret;
11323
11324         btrfs_init_path(&path);
11325         key.objectid = btrfs_header_bytenr(eb);
11326         key.type = BTRFS_METADATA_ITEM_KEY;
11327         key.offset = (u64)-1;
11328
11329         /*
11330          * Any failure in backref resolving means we can't determine
11331          * whom the tree block belongs to.
11332          * So in that case, we need to check that tree block
11333          */
11334         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11335         if (ret < 0)
11336                 goto need_check;
11337
11338         ret = btrfs_previous_extent_item(extent_root, &path,
11339                                          btrfs_header_bytenr(eb));
11340         if (ret)
11341                 goto need_check;
11342
11343         leaf = path.nodes[0];
11344         slot = path.slots[0];
11345         btrfs_item_key_to_cpu(leaf, &key, slot);
11346         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11347
11348         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11349                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11350         } else {
11351                 struct btrfs_tree_block_info *info;
11352
11353                 info = (struct btrfs_tree_block_info *)(ei + 1);
11354                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11355         }
11356
11357         item_size = btrfs_item_size_nr(leaf, slot);
11358         ptr = (unsigned long)iref;
11359         end = (unsigned long)ei + item_size;
11360         while (ptr < end) {
11361                 iref = (struct btrfs_extent_inline_ref *)ptr;
11362                 type = btrfs_extent_inline_ref_type(leaf, iref);
11363                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11364
11365                 /*
11366                  * We only check the tree block if current root is
11367                  * the lowest referencer of it.
11368                  */
11369                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11370                     offset < root->objectid) {
11371                         btrfs_release_path(&path);
11372                         return 0;
11373                 }
11374
11375                 ptr += btrfs_extent_inline_ref_size(type);
11376         }
11377         /*
11378          * Normally we should also check keyed tree block ref, but that may be
11379          * very time consuming.  Inlined ref should already make us skip a lot
11380          * of refs now.  So skip search keyed tree block ref.
11381          */
11382
11383 need_check:
11384         btrfs_release_path(&path);
11385         return 1;
11386 }
11387
11388 /*
11389  * Traversal function for tree block. We will do:
11390  * 1) Skip shared fs/subvolume tree blocks
11391  * 2) Update related bytes accounting
11392  * 3) Pre-order traversal
11393  */
11394 static int traverse_tree_block(struct btrfs_root *root,
11395                                 struct extent_buffer *node)
11396 {
11397         struct extent_buffer *eb;
11398         struct btrfs_key key;
11399         struct btrfs_key drop_key;
11400         int level;
11401         u64 nr;
11402         int i;
11403         int err = 0;
11404         int ret;
11405
11406         /*
11407          * Skip shared fs/subvolume tree block, in that case they will
11408          * be checked by referencer with lowest rootid
11409          */
11410         if (is_fstree(root->objectid) && !should_check(root, node))
11411                 return 0;
11412
11413         /* Update bytes accounting */
11414         total_btree_bytes += node->len;
11415         if (fs_root_objectid(btrfs_header_owner(node)))
11416                 total_fs_tree_bytes += node->len;
11417         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11418                 total_extent_tree_bytes += node->len;
11419         if (!found_old_backref &&
11420             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11421             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11422             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11423                 found_old_backref = 1;
11424
11425         /* pre-order tranversal, check itself first */
11426         level = btrfs_header_level(node);
11427         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11428                                    btrfs_header_level(node),
11429                                    btrfs_header_owner(node));
11430         err |= ret;
11431         if (err)
11432                 error(
11433         "check %s failed root %llu bytenr %llu level %d, force continue check",
11434                         level ? "node":"leaf", root->objectid,
11435                         btrfs_header_bytenr(node), btrfs_header_level(node));
11436
11437         if (!level) {
11438                 btree_space_waste += btrfs_leaf_free_space(root, node);
11439                 ret = check_leaf_items(root, node);
11440                 err |= ret;
11441                 return err;
11442         }
11443
11444         nr = btrfs_header_nritems(node);
11445         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11446         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11447                 sizeof(struct btrfs_key_ptr);
11448
11449         /* Then check all its children */
11450         for (i = 0; i < nr; i++) {
11451                 u64 blocknr = btrfs_node_blockptr(node, i);
11452
11453                 btrfs_node_key_to_cpu(node, &key, i);
11454                 if (level == root->root_item.drop_level &&
11455                     is_dropped_key(&key, &drop_key))
11456                         continue;
11457
11458                 /*
11459                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11460                  * to call the function itself.
11461                  */
11462                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11463                 if (extent_buffer_uptodate(eb)) {
11464                         ret = traverse_tree_block(root, eb);
11465                         err |= ret;
11466                 }
11467                 free_extent_buffer(eb);
11468         }
11469
11470         return err;
11471 }
11472
11473 /*
11474  * Low memory usage version check_chunks_and_extents.
11475  */
11476 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11477 {
11478         struct btrfs_path path;
11479         struct btrfs_key key;
11480         struct btrfs_root *root1;
11481         struct btrfs_root *cur_root;
11482         int err = 0;
11483         int ret;
11484
11485         root1 = root->fs_info->chunk_root;
11486         ret = traverse_tree_block(root1, root1->node);
11487         err |= ret;
11488
11489         root1 = root->fs_info->tree_root;
11490         ret = traverse_tree_block(root1, root1->node);
11491         err |= ret;
11492
11493         btrfs_init_path(&path);
11494         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11495         key.offset = 0;
11496         key.type = BTRFS_ROOT_ITEM_KEY;
11497
11498         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11499         if (ret) {
11500                 error("cannot find extent treet in tree_root");
11501                 goto out;
11502         }
11503
11504         while (1) {
11505                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11506                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11507                         goto next;
11508                 key.offset = (u64)-1;
11509
11510                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11511                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11512                                         &key);
11513                 else
11514                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11515                 if (IS_ERR(cur_root) || !cur_root) {
11516                         error("failed to read tree: %lld", key.objectid);
11517                         goto next;
11518                 }
11519
11520                 ret = traverse_tree_block(cur_root, cur_root->node);
11521                 err |= ret;
11522
11523                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11524                         btrfs_free_fs_root(cur_root);
11525 next:
11526                 ret = btrfs_next_item(root1, &path);
11527                 if (ret)
11528                         goto out;
11529         }
11530
11531 out:
11532         btrfs_release_path(&path);
11533         return err;
11534 }
11535
11536 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11537                            struct btrfs_root *root, int overwrite)
11538 {
11539         struct extent_buffer *c;
11540         struct extent_buffer *old = root->node;
11541         int level;
11542         int ret;
11543         struct btrfs_disk_key disk_key = {0,0,0};
11544
11545         level = 0;
11546
11547         if (overwrite) {
11548                 c = old;
11549                 extent_buffer_get(c);
11550                 goto init;
11551         }
11552         c = btrfs_alloc_free_block(trans, root,
11553                                    root->nodesize,
11554                                    root->root_key.objectid,
11555                                    &disk_key, level, 0, 0);
11556         if (IS_ERR(c)) {
11557                 c = old;
11558                 extent_buffer_get(c);
11559                 overwrite = 1;
11560         }
11561 init:
11562         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11563         btrfs_set_header_level(c, level);
11564         btrfs_set_header_bytenr(c, c->start);
11565         btrfs_set_header_generation(c, trans->transid);
11566         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11567         btrfs_set_header_owner(c, root->root_key.objectid);
11568
11569         write_extent_buffer(c, root->fs_info->fsid,
11570                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11571
11572         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11573                             btrfs_header_chunk_tree_uuid(c),
11574                             BTRFS_UUID_SIZE);
11575
11576         btrfs_mark_buffer_dirty(c);
11577         /*
11578          * this case can happen in the following case:
11579          *
11580          * 1.overwrite previous root.
11581          *
11582          * 2.reinit reloc data root, this is because we skip pin
11583          * down reloc data tree before which means we can allocate
11584          * same block bytenr here.
11585          */
11586         if (old->start == c->start) {
11587                 btrfs_set_root_generation(&root->root_item,
11588                                           trans->transid);
11589                 root->root_item.level = btrfs_header_level(root->node);
11590                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11591                                         &root->root_key, &root->root_item);
11592                 if (ret) {
11593                         free_extent_buffer(c);
11594                         return ret;
11595                 }
11596         }
11597         free_extent_buffer(old);
11598         root->node = c;
11599         add_root_to_dirty_list(root);
11600         return 0;
11601 }
11602
11603 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11604                                 struct extent_buffer *eb, int tree_root)
11605 {
11606         struct extent_buffer *tmp;
11607         struct btrfs_root_item *ri;
11608         struct btrfs_key key;
11609         u64 bytenr;
11610         u32 nodesize;
11611         int level = btrfs_header_level(eb);
11612         int nritems;
11613         int ret;
11614         int i;
11615
11616         /*
11617          * If we have pinned this block before, don't pin it again.
11618          * This can not only avoid forever loop with broken filesystem
11619          * but also give us some speedups.
11620          */
11621         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11622                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11623                 return 0;
11624
11625         btrfs_pin_extent(fs_info, eb->start, eb->len);
11626
11627         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11628         nritems = btrfs_header_nritems(eb);
11629         for (i = 0; i < nritems; i++) {
11630                 if (level == 0) {
11631                         btrfs_item_key_to_cpu(eb, &key, i);
11632                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11633                                 continue;
11634                         /* Skip the extent root and reloc roots */
11635                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11636                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11637                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11638                                 continue;
11639                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11640                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11641
11642                         /*
11643                          * If at any point we start needing the real root we
11644                          * will have to build a stump root for the root we are
11645                          * in, but for now this doesn't actually use the root so
11646                          * just pass in extent_root.
11647                          */
11648                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11649                                               nodesize, 0);
11650                         if (!extent_buffer_uptodate(tmp)) {
11651                                 fprintf(stderr, "Error reading root block\n");
11652                                 return -EIO;
11653                         }
11654                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11655                         free_extent_buffer(tmp);
11656                         if (ret)
11657                                 return ret;
11658                 } else {
11659                         bytenr = btrfs_node_blockptr(eb, i);
11660
11661                         /* If we aren't the tree root don't read the block */
11662                         if (level == 1 && !tree_root) {
11663                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11664                                 continue;
11665                         }
11666
11667                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11668                                               nodesize, 0);
11669                         if (!extent_buffer_uptodate(tmp)) {
11670                                 fprintf(stderr, "Error reading tree block\n");
11671                                 return -EIO;
11672                         }
11673                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11674                         free_extent_buffer(tmp);
11675                         if (ret)
11676                                 return ret;
11677                 }
11678         }
11679
11680         return 0;
11681 }
11682
11683 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11684 {
11685         int ret;
11686
11687         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11688         if (ret)
11689                 return ret;
11690
11691         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11692 }
11693
11694 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11695 {
11696         struct btrfs_block_group_cache *cache;
11697         struct btrfs_path path;
11698         struct extent_buffer *leaf;
11699         struct btrfs_chunk *chunk;
11700         struct btrfs_key key;
11701         int ret;
11702         u64 start;
11703
11704         btrfs_init_path(&path);
11705         key.objectid = 0;
11706         key.type = BTRFS_CHUNK_ITEM_KEY;
11707         key.offset = 0;
11708         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11709         if (ret < 0) {
11710                 btrfs_release_path(&path);
11711                 return ret;
11712         }
11713
11714         /*
11715          * We do this in case the block groups were screwed up and had alloc
11716          * bits that aren't actually set on the chunks.  This happens with
11717          * restored images every time and could happen in real life I guess.
11718          */
11719         fs_info->avail_data_alloc_bits = 0;
11720         fs_info->avail_metadata_alloc_bits = 0;
11721         fs_info->avail_system_alloc_bits = 0;
11722
11723         /* First we need to create the in-memory block groups */
11724         while (1) {
11725                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11726                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11727                         if (ret < 0) {
11728                                 btrfs_release_path(&path);
11729                                 return ret;
11730                         }
11731                         if (ret) {
11732                                 ret = 0;
11733                                 break;
11734                         }
11735                 }
11736                 leaf = path.nodes[0];
11737                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11738                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11739                         path.slots[0]++;
11740                         continue;
11741                 }
11742
11743                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11744                 btrfs_add_block_group(fs_info, 0,
11745                                       btrfs_chunk_type(leaf, chunk),
11746                                       key.objectid, key.offset,
11747                                       btrfs_chunk_length(leaf, chunk));
11748                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11749                                  key.offset + btrfs_chunk_length(leaf, chunk));
11750                 path.slots[0]++;
11751         }
11752         start = 0;
11753         while (1) {
11754                 cache = btrfs_lookup_first_block_group(fs_info, start);
11755                 if (!cache)
11756                         break;
11757                 cache->cached = 1;
11758                 start = cache->key.objectid + cache->key.offset;
11759         }
11760
11761         btrfs_release_path(&path);
11762         return 0;
11763 }
11764
11765 static int reset_balance(struct btrfs_trans_handle *trans,
11766                          struct btrfs_fs_info *fs_info)
11767 {
11768         struct btrfs_root *root = fs_info->tree_root;
11769         struct btrfs_path path;
11770         struct extent_buffer *leaf;
11771         struct btrfs_key key;
11772         int del_slot, del_nr = 0;
11773         int ret;
11774         int found = 0;
11775
11776         btrfs_init_path(&path);
11777         key.objectid = BTRFS_BALANCE_OBJECTID;
11778         key.type = BTRFS_BALANCE_ITEM_KEY;
11779         key.offset = 0;
11780         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11781         if (ret) {
11782                 if (ret > 0)
11783                         ret = 0;
11784                 if (!ret)
11785                         goto reinit_data_reloc;
11786                 else
11787                         goto out;
11788         }
11789
11790         ret = btrfs_del_item(trans, root, &path);
11791         if (ret)
11792                 goto out;
11793         btrfs_release_path(&path);
11794
11795         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11796         key.type = BTRFS_ROOT_ITEM_KEY;
11797         key.offset = 0;
11798         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11799         if (ret < 0)
11800                 goto out;
11801         while (1) {
11802                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11803                         if (!found)
11804                                 break;
11805
11806                         if (del_nr) {
11807                                 ret = btrfs_del_items(trans, root, &path,
11808                                                       del_slot, del_nr);
11809                                 del_nr = 0;
11810                                 if (ret)
11811                                         goto out;
11812                         }
11813                         key.offset++;
11814                         btrfs_release_path(&path);
11815
11816                         found = 0;
11817                         ret = btrfs_search_slot(trans, root, &key, &path,
11818                                                 -1, 1);
11819                         if (ret < 0)
11820                                 goto out;
11821                         continue;
11822                 }
11823                 found = 1;
11824                 leaf = path.nodes[0];
11825                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11826                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11827                         break;
11828                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11829                         path.slots[0]++;
11830                         continue;
11831                 }
11832                 if (!del_nr) {
11833                         del_slot = path.slots[0];
11834                         del_nr = 1;
11835                 } else {
11836                         del_nr++;
11837                 }
11838                 path.slots[0]++;
11839         }
11840
11841         if (del_nr) {
11842                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11843                 if (ret)
11844                         goto out;
11845         }
11846         btrfs_release_path(&path);
11847
11848 reinit_data_reloc:
11849         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11850         key.type = BTRFS_ROOT_ITEM_KEY;
11851         key.offset = (u64)-1;
11852         root = btrfs_read_fs_root(fs_info, &key);
11853         if (IS_ERR(root)) {
11854                 fprintf(stderr, "Error reading data reloc tree\n");
11855                 ret = PTR_ERR(root);
11856                 goto out;
11857         }
11858         record_root_in_trans(trans, root);
11859         ret = btrfs_fsck_reinit_root(trans, root, 0);
11860         if (ret)
11861                 goto out;
11862         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11863 out:
11864         btrfs_release_path(&path);
11865         return ret;
11866 }
11867
11868 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11869                               struct btrfs_fs_info *fs_info)
11870 {
11871         u64 start = 0;
11872         int ret;
11873
11874         /*
11875          * The only reason we don't do this is because right now we're just
11876          * walking the trees we find and pinning down their bytes, we don't look
11877          * at any of the leaves.  In order to do mixed groups we'd have to check
11878          * the leaves of any fs roots and pin down the bytes for any file
11879          * extents we find.  Not hard but why do it if we don't have to?
11880          */
11881         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11882                 fprintf(stderr, "We don't support re-initing the extent tree "
11883                         "for mixed block groups yet, please notify a btrfs "
11884                         "developer you want to do this so they can add this "
11885                         "functionality.\n");
11886                 return -EINVAL;
11887         }
11888
11889         /*
11890          * first we need to walk all of the trees except the extent tree and pin
11891          * down the bytes that are in use so we don't overwrite any existing
11892          * metadata.
11893          */
11894         ret = pin_metadata_blocks(fs_info);
11895         if (ret) {
11896                 fprintf(stderr, "error pinning down used bytes\n");
11897                 return ret;
11898         }
11899
11900         /*
11901          * Need to drop all the block groups since we're going to recreate all
11902          * of them again.
11903          */
11904         btrfs_free_block_groups(fs_info);
11905         ret = reset_block_groups(fs_info);
11906         if (ret) {
11907                 fprintf(stderr, "error resetting the block groups\n");
11908                 return ret;
11909         }
11910
11911         /* Ok we can allocate now, reinit the extent root */
11912         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11913         if (ret) {
11914                 fprintf(stderr, "extent root initialization failed\n");
11915                 /*
11916                  * When the transaction code is updated we should end the
11917                  * transaction, but for now progs only knows about commit so
11918                  * just return an error.
11919                  */
11920                 return ret;
11921         }
11922
11923         /*
11924          * Now we have all the in-memory block groups setup so we can make
11925          * allocations properly, and the metadata we care about is safe since we
11926          * pinned all of it above.
11927          */
11928         while (1) {
11929                 struct btrfs_block_group_cache *cache;
11930
11931                 cache = btrfs_lookup_first_block_group(fs_info, start);
11932                 if (!cache)
11933                         break;
11934                 start = cache->key.objectid + cache->key.offset;
11935                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11936                                         &cache->key, &cache->item,
11937                                         sizeof(cache->item));
11938                 if (ret) {
11939                         fprintf(stderr, "Error adding block group\n");
11940                         return ret;
11941                 }
11942                 btrfs_extent_post_op(trans, fs_info->extent_root);
11943         }
11944
11945         ret = reset_balance(trans, fs_info);
11946         if (ret)
11947                 fprintf(stderr, "error resetting the pending balance\n");
11948
11949         return ret;
11950 }
11951
11952 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11953 {
11954         struct btrfs_path path;
11955         struct btrfs_trans_handle *trans;
11956         struct btrfs_key key;
11957         int ret;
11958
11959         printf("Recowing metadata block %llu\n", eb->start);
11960         key.objectid = btrfs_header_owner(eb);
11961         key.type = BTRFS_ROOT_ITEM_KEY;
11962         key.offset = (u64)-1;
11963
11964         root = btrfs_read_fs_root(root->fs_info, &key);
11965         if (IS_ERR(root)) {
11966                 fprintf(stderr, "Couldn't find owner root %llu\n",
11967                         key.objectid);
11968                 return PTR_ERR(root);
11969         }
11970
11971         trans = btrfs_start_transaction(root, 1);
11972         if (IS_ERR(trans))
11973                 return PTR_ERR(trans);
11974
11975         btrfs_init_path(&path);
11976         path.lowest_level = btrfs_header_level(eb);
11977         if (path.lowest_level)
11978                 btrfs_node_key_to_cpu(eb, &key, 0);
11979         else
11980                 btrfs_item_key_to_cpu(eb, &key, 0);
11981
11982         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11983         btrfs_commit_transaction(trans, root);
11984         btrfs_release_path(&path);
11985         return ret;
11986 }
11987
11988 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11989 {
11990         struct btrfs_path path;
11991         struct btrfs_trans_handle *trans;
11992         struct btrfs_key key;
11993         int ret;
11994
11995         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11996                bad->key.type, bad->key.offset);
11997         key.objectid = bad->root_id;
11998         key.type = BTRFS_ROOT_ITEM_KEY;
11999         key.offset = (u64)-1;
12000
12001         root = btrfs_read_fs_root(root->fs_info, &key);
12002         if (IS_ERR(root)) {
12003                 fprintf(stderr, "Couldn't find owner root %llu\n",
12004                         key.objectid);
12005                 return PTR_ERR(root);
12006         }
12007
12008         trans = btrfs_start_transaction(root, 1);
12009         if (IS_ERR(trans))
12010                 return PTR_ERR(trans);
12011
12012         btrfs_init_path(&path);
12013         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12014         if (ret) {
12015                 if (ret > 0)
12016                         ret = 0;
12017                 goto out;
12018         }
12019         ret = btrfs_del_item(trans, root, &path);
12020 out:
12021         btrfs_commit_transaction(trans, root);
12022         btrfs_release_path(&path);
12023         return ret;
12024 }
12025
12026 static int zero_log_tree(struct btrfs_root *root)
12027 {
12028         struct btrfs_trans_handle *trans;
12029         int ret;
12030
12031         trans = btrfs_start_transaction(root, 1);
12032         if (IS_ERR(trans)) {
12033                 ret = PTR_ERR(trans);
12034                 return ret;
12035         }
12036         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12037         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12038         ret = btrfs_commit_transaction(trans, root);
12039         return ret;
12040 }
12041
12042 static int populate_csum(struct btrfs_trans_handle *trans,
12043                          struct btrfs_root *csum_root, char *buf, u64 start,
12044                          u64 len)
12045 {
12046         u64 offset = 0;
12047         u64 sectorsize;
12048         int ret = 0;
12049
12050         while (offset < len) {
12051                 sectorsize = csum_root->sectorsize;
12052                 ret = read_extent_data(csum_root, buf, start + offset,
12053                                        &sectorsize, 0);
12054                 if (ret)
12055                         break;
12056                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12057                                             start + offset, buf, sectorsize);
12058                 if (ret)
12059                         break;
12060                 offset += sectorsize;
12061         }
12062         return ret;
12063 }
12064
12065 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12066                                       struct btrfs_root *csum_root,
12067                                       struct btrfs_root *cur_root)
12068 {
12069         struct btrfs_path path;
12070         struct btrfs_key key;
12071         struct extent_buffer *node;
12072         struct btrfs_file_extent_item *fi;
12073         char *buf = NULL;
12074         u64 start = 0;
12075         u64 len = 0;
12076         int slot = 0;
12077         int ret = 0;
12078
12079         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12080         if (!buf)
12081                 return -ENOMEM;
12082
12083         btrfs_init_path(&path);
12084         key.objectid = 0;
12085         key.offset = 0;
12086         key.type = 0;
12087         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12088         if (ret < 0)
12089                 goto out;
12090         /* Iterate all regular file extents and fill its csum */
12091         while (1) {
12092                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12093
12094                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12095                         goto next;
12096                 node = path.nodes[0];
12097                 slot = path.slots[0];
12098                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12099                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12100                         goto next;
12101                 start = btrfs_file_extent_disk_bytenr(node, fi);
12102                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12103
12104                 ret = populate_csum(trans, csum_root, buf, start, len);
12105                 if (ret == -EEXIST)
12106                         ret = 0;
12107                 if (ret < 0)
12108                         goto out;
12109 next:
12110                 /*
12111                  * TODO: if next leaf is corrupted, jump to nearest next valid
12112                  * leaf.
12113                  */
12114                 ret = btrfs_next_item(cur_root, &path);
12115                 if (ret < 0)
12116                         goto out;
12117                 if (ret > 0) {
12118                         ret = 0;
12119                         goto out;
12120                 }
12121         }
12122
12123 out:
12124         btrfs_release_path(&path);
12125         free(buf);
12126         return ret;
12127 }
12128
12129 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12130                                   struct btrfs_root *csum_root)
12131 {
12132         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12133         struct btrfs_path path;
12134         struct btrfs_root *tree_root = fs_info->tree_root;
12135         struct btrfs_root *cur_root;
12136         struct extent_buffer *node;
12137         struct btrfs_key key;
12138         int slot = 0;
12139         int ret = 0;
12140
12141         btrfs_init_path(&path);
12142         key.objectid = BTRFS_FS_TREE_OBJECTID;
12143         key.offset = 0;
12144         key.type = BTRFS_ROOT_ITEM_KEY;
12145         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12146         if (ret < 0)
12147                 goto out;
12148         if (ret > 0) {
12149                 ret = -ENOENT;
12150                 goto out;
12151         }
12152
12153         while (1) {
12154                 node = path.nodes[0];
12155                 slot = path.slots[0];
12156                 btrfs_item_key_to_cpu(node, &key, slot);
12157                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12158                         goto out;
12159                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12160                         goto next;
12161                 if (!is_fstree(key.objectid))
12162                         goto next;
12163                 key.offset = (u64)-1;
12164
12165                 cur_root = btrfs_read_fs_root(fs_info, &key);
12166                 if (IS_ERR(cur_root) || !cur_root) {
12167                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12168                                 key.objectid);
12169                         goto out;
12170                 }
12171                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12172                                 cur_root);
12173                 if (ret < 0)
12174                         goto out;
12175 next:
12176                 ret = btrfs_next_item(tree_root, &path);
12177                 if (ret > 0) {
12178                         ret = 0;
12179                         goto out;
12180                 }
12181                 if (ret < 0)
12182                         goto out;
12183         }
12184
12185 out:
12186         btrfs_release_path(&path);
12187         return ret;
12188 }
12189
12190 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12191                                       struct btrfs_root *csum_root)
12192 {
12193         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12194         struct btrfs_path path;
12195         struct btrfs_extent_item *ei;
12196         struct extent_buffer *leaf;
12197         char *buf;
12198         struct btrfs_key key;
12199         int ret;
12200
12201         btrfs_init_path(&path);
12202         key.objectid = 0;
12203         key.type = BTRFS_EXTENT_ITEM_KEY;
12204         key.offset = 0;
12205         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12206         if (ret < 0) {
12207                 btrfs_release_path(&path);
12208                 return ret;
12209         }
12210
12211         buf = malloc(csum_root->sectorsize);
12212         if (!buf) {
12213                 btrfs_release_path(&path);
12214                 return -ENOMEM;
12215         }
12216
12217         while (1) {
12218                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12219                         ret = btrfs_next_leaf(extent_root, &path);
12220                         if (ret < 0)
12221                                 break;
12222                         if (ret) {
12223                                 ret = 0;
12224                                 break;
12225                         }
12226                 }
12227                 leaf = path.nodes[0];
12228
12229                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12230                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12231                         path.slots[0]++;
12232                         continue;
12233                 }
12234
12235                 ei = btrfs_item_ptr(leaf, path.slots[0],
12236                                     struct btrfs_extent_item);
12237                 if (!(btrfs_extent_flags(leaf, ei) &
12238                       BTRFS_EXTENT_FLAG_DATA)) {
12239                         path.slots[0]++;
12240                         continue;
12241                 }
12242
12243                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12244                                     key.offset);
12245                 if (ret)
12246                         break;
12247                 path.slots[0]++;
12248         }
12249
12250         btrfs_release_path(&path);
12251         free(buf);
12252         return ret;
12253 }
12254
12255 /*
12256  * Recalculate the csum and put it into the csum tree.
12257  *
12258  * Extent tree init will wipe out all the extent info, so in that case, we
12259  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12260  * will use fs/subvol trees to init the csum tree.
12261  */
12262 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12263                           struct btrfs_root *csum_root,
12264                           int search_fs_tree)
12265 {
12266         if (search_fs_tree)
12267                 return fill_csum_tree_from_fs(trans, csum_root);
12268         else
12269                 return fill_csum_tree_from_extent(trans, csum_root);
12270 }
12271
12272 static void free_roots_info_cache(void)
12273 {
12274         if (!roots_info_cache)
12275                 return;
12276
12277         while (!cache_tree_empty(roots_info_cache)) {
12278                 struct cache_extent *entry;
12279                 struct root_item_info *rii;
12280
12281                 entry = first_cache_extent(roots_info_cache);
12282                 if (!entry)
12283                         break;
12284                 remove_cache_extent(roots_info_cache, entry);
12285                 rii = container_of(entry, struct root_item_info, cache_extent);
12286                 free(rii);
12287         }
12288
12289         free(roots_info_cache);
12290         roots_info_cache = NULL;
12291 }
12292
12293 static int build_roots_info_cache(struct btrfs_fs_info *info)
12294 {
12295         int ret = 0;
12296         struct btrfs_key key;
12297         struct extent_buffer *leaf;
12298         struct btrfs_path path;
12299
12300         if (!roots_info_cache) {
12301                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12302                 if (!roots_info_cache)
12303                         return -ENOMEM;
12304                 cache_tree_init(roots_info_cache);
12305         }
12306
12307         btrfs_init_path(&path);
12308         key.objectid = 0;
12309         key.type = BTRFS_EXTENT_ITEM_KEY;
12310         key.offset = 0;
12311         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12312         if (ret < 0)
12313                 goto out;
12314         leaf = path.nodes[0];
12315
12316         while (1) {
12317                 struct btrfs_key found_key;
12318                 struct btrfs_extent_item *ei;
12319                 struct btrfs_extent_inline_ref *iref;
12320                 int slot = path.slots[0];
12321                 int type;
12322                 u64 flags;
12323                 u64 root_id;
12324                 u8 level;
12325                 struct cache_extent *entry;
12326                 struct root_item_info *rii;
12327
12328                 if (slot >= btrfs_header_nritems(leaf)) {
12329                         ret = btrfs_next_leaf(info->extent_root, &path);
12330                         if (ret < 0) {
12331                                 break;
12332                         } else if (ret) {
12333                                 ret = 0;
12334                                 break;
12335                         }
12336                         leaf = path.nodes[0];
12337                         slot = path.slots[0];
12338                 }
12339
12340                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12341
12342                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12343                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12344                         goto next;
12345
12346                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12347                 flags = btrfs_extent_flags(leaf, ei);
12348
12349                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12350                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12351                         goto next;
12352
12353                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12354                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12355                         level = found_key.offset;
12356                 } else {
12357                         struct btrfs_tree_block_info *binfo;
12358
12359                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12360                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12361                         level = btrfs_tree_block_level(leaf, binfo);
12362                 }
12363
12364                 /*
12365                  * For a root extent, it must be of the following type and the
12366                  * first (and only one) iref in the item.
12367                  */
12368                 type = btrfs_extent_inline_ref_type(leaf, iref);
12369                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12370                         goto next;
12371
12372                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12373                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12374                 if (!entry) {
12375                         rii = malloc(sizeof(struct root_item_info));
12376                         if (!rii) {
12377                                 ret = -ENOMEM;
12378                                 goto out;
12379                         }
12380                         rii->cache_extent.start = root_id;
12381                         rii->cache_extent.size = 1;
12382                         rii->level = (u8)-1;
12383                         entry = &rii->cache_extent;
12384                         ret = insert_cache_extent(roots_info_cache, entry);
12385                         ASSERT(ret == 0);
12386                 } else {
12387                         rii = container_of(entry, struct root_item_info,
12388                                            cache_extent);
12389                 }
12390
12391                 ASSERT(rii->cache_extent.start == root_id);
12392                 ASSERT(rii->cache_extent.size == 1);
12393
12394                 if (level > rii->level || rii->level == (u8)-1) {
12395                         rii->level = level;
12396                         rii->bytenr = found_key.objectid;
12397                         rii->gen = btrfs_extent_generation(leaf, ei);
12398                         rii->node_count = 1;
12399                 } else if (level == rii->level) {
12400                         rii->node_count++;
12401                 }
12402 next:
12403                 path.slots[0]++;
12404         }
12405
12406 out:
12407         btrfs_release_path(&path);
12408
12409         return ret;
12410 }
12411
12412 static int maybe_repair_root_item(struct btrfs_path *path,
12413                                   const struct btrfs_key *root_key,
12414                                   const int read_only_mode)
12415 {
12416         const u64 root_id = root_key->objectid;
12417         struct cache_extent *entry;
12418         struct root_item_info *rii;
12419         struct btrfs_root_item ri;
12420         unsigned long offset;
12421
12422         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12423         if (!entry) {
12424                 fprintf(stderr,
12425                         "Error: could not find extent items for root %llu\n",
12426                         root_key->objectid);
12427                 return -ENOENT;
12428         }
12429
12430         rii = container_of(entry, struct root_item_info, cache_extent);
12431         ASSERT(rii->cache_extent.start == root_id);
12432         ASSERT(rii->cache_extent.size == 1);
12433
12434         if (rii->node_count != 1) {
12435                 fprintf(stderr,
12436                         "Error: could not find btree root extent for root %llu\n",
12437                         root_id);
12438                 return -ENOENT;
12439         }
12440
12441         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12442         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12443
12444         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12445             btrfs_root_level(&ri) != rii->level ||
12446             btrfs_root_generation(&ri) != rii->gen) {
12447
12448                 /*
12449                  * If we're in repair mode but our caller told us to not update
12450                  * the root item, i.e. just check if it needs to be updated, don't
12451                  * print this message, since the caller will call us again shortly
12452                  * for the same root item without read only mode (the caller will
12453                  * open a transaction first).
12454                  */
12455                 if (!(read_only_mode && repair))
12456                         fprintf(stderr,
12457                                 "%sroot item for root %llu,"
12458                                 " current bytenr %llu, current gen %llu, current level %u,"
12459                                 " new bytenr %llu, new gen %llu, new level %u\n",
12460                                 (read_only_mode ? "" : "fixing "),
12461                                 root_id,
12462                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12463                                 btrfs_root_level(&ri),
12464                                 rii->bytenr, rii->gen, rii->level);
12465
12466                 if (btrfs_root_generation(&ri) > rii->gen) {
12467                         fprintf(stderr,
12468                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12469                                 root_id, btrfs_root_generation(&ri), rii->gen);
12470                         return -EINVAL;
12471                 }
12472
12473                 if (!read_only_mode) {
12474                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12475                         btrfs_set_root_level(&ri, rii->level);
12476                         btrfs_set_root_generation(&ri, rii->gen);
12477                         write_extent_buffer(path->nodes[0], &ri,
12478                                             offset, sizeof(ri));
12479                 }
12480
12481                 return 1;
12482         }
12483
12484         return 0;
12485 }
12486
12487 /*
12488  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12489  * caused read-only snapshots to be corrupted if they were created at a moment
12490  * when the source subvolume/snapshot had orphan items. The issue was that the
12491  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12492  * node instead of the post orphan cleanup root node.
12493  * So this function, and its callees, just detects and fixes those cases. Even
12494  * though the regression was for read-only snapshots, this function applies to
12495  * any snapshot/subvolume root.
12496  * This must be run before any other repair code - not doing it so, makes other
12497  * repair code delete or modify backrefs in the extent tree for example, which
12498  * will result in an inconsistent fs after repairing the root items.
12499  */
12500 static int repair_root_items(struct btrfs_fs_info *info)
12501 {
12502         struct btrfs_path path;
12503         struct btrfs_key key;
12504         struct extent_buffer *leaf;
12505         struct btrfs_trans_handle *trans = NULL;
12506         int ret = 0;
12507         int bad_roots = 0;
12508         int need_trans = 0;
12509
12510         btrfs_init_path(&path);
12511
12512         ret = build_roots_info_cache(info);
12513         if (ret)
12514                 goto out;
12515
12516         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12517         key.type = BTRFS_ROOT_ITEM_KEY;
12518         key.offset = 0;
12519
12520 again:
12521         /*
12522          * Avoid opening and committing transactions if a leaf doesn't have
12523          * any root items that need to be fixed, so that we avoid rotating
12524          * backup roots unnecessarily.
12525          */
12526         if (need_trans) {
12527                 trans = btrfs_start_transaction(info->tree_root, 1);
12528                 if (IS_ERR(trans)) {
12529                         ret = PTR_ERR(trans);
12530                         goto out;
12531                 }
12532         }
12533
12534         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12535                                 0, trans ? 1 : 0);
12536         if (ret < 0)
12537                 goto out;
12538         leaf = path.nodes[0];
12539
12540         while (1) {
12541                 struct btrfs_key found_key;
12542
12543                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12544                         int no_more_keys = find_next_key(&path, &key);
12545
12546                         btrfs_release_path(&path);
12547                         if (trans) {
12548                                 ret = btrfs_commit_transaction(trans,
12549                                                                info->tree_root);
12550                                 trans = NULL;
12551                                 if (ret < 0)
12552                                         goto out;
12553                         }
12554                         need_trans = 0;
12555                         if (no_more_keys)
12556                                 break;
12557                         goto again;
12558                 }
12559
12560                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12561
12562                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12563                         goto next;
12564                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12565                         goto next;
12566
12567                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12568                 if (ret < 0)
12569                         goto out;
12570                 if (ret) {
12571                         if (!trans && repair) {
12572                                 need_trans = 1;
12573                                 key = found_key;
12574                                 btrfs_release_path(&path);
12575                                 goto again;
12576                         }
12577                         bad_roots++;
12578                 }
12579 next:
12580                 path.slots[0]++;
12581         }
12582         ret = 0;
12583 out:
12584         free_roots_info_cache();
12585         btrfs_release_path(&path);
12586         if (trans)
12587                 btrfs_commit_transaction(trans, info->tree_root);
12588         if (ret < 0)
12589                 return ret;
12590
12591         return bad_roots;
12592 }
12593
12594 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12595 {
12596         struct btrfs_trans_handle *trans;
12597         struct btrfs_block_group_cache *bg_cache;
12598         u64 current = 0;
12599         int ret = 0;
12600
12601         /* Clear all free space cache inodes and its extent data */
12602         while (1) {
12603                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12604                 if (!bg_cache)
12605                         break;
12606                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12607                 if (ret < 0)
12608                         return ret;
12609                 current = bg_cache->key.objectid + bg_cache->key.offset;
12610         }
12611
12612         /* Don't forget to set cache_generation to -1 */
12613         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12614         if (IS_ERR(trans)) {
12615                 error("failed to update super block cache generation");
12616                 return PTR_ERR(trans);
12617         }
12618         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12619         btrfs_commit_transaction(trans, fs_info->tree_root);
12620
12621         return ret;
12622 }
12623
12624 const char * const cmd_check_usage[] = {
12625         "btrfs check [options] <device>",
12626         "Check structural integrity of a filesystem (unmounted).",
12627         "Check structural integrity of an unmounted filesystem. Verify internal",
12628         "trees' consistency and item connectivity. In the repair mode try to",
12629         "fix the problems found. ",
12630         "WARNING: the repair mode is considered dangerous",
12631         "",
12632         "-s|--super <superblock>     use this superblock copy",
12633         "-b|--backup                 use the first valid backup root copy",
12634         "--repair                    try to repair the filesystem",
12635         "--readonly                  run in read-only mode (default)",
12636         "--init-csum-tree            create a new CRC tree",
12637         "--init-extent-tree          create a new extent tree",
12638         "--mode <MODE>               allows choice of memory/IO trade-offs",
12639         "                            where MODE is one of:",
12640         "                            original - read inodes and extents to memory (requires",
12641         "                                       more memory, does less IO)",
12642         "                            lowmem   - try to use less memory but read blocks again",
12643         "                                       when needed",
12644         "--check-data-csum           verify checksums of data blocks",
12645         "-Q|--qgroup-report          print a report on qgroup consistency",
12646         "-E|--subvol-extents <subvolid>",
12647         "                            print subvolume extents and sharing state",
12648         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12649         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12650         "-p|--progress               indicate progress",
12651         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12652         NULL
12653 };
12654
12655 int cmd_check(int argc, char **argv)
12656 {
12657         struct cache_tree root_cache;
12658         struct btrfs_root *root;
12659         struct btrfs_fs_info *info;
12660         u64 bytenr = 0;
12661         u64 subvolid = 0;
12662         u64 tree_root_bytenr = 0;
12663         u64 chunk_root_bytenr = 0;
12664         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12665         int ret;
12666         int err = 0;
12667         u64 num;
12668         int init_csum_tree = 0;
12669         int readonly = 0;
12670         int clear_space_cache = 0;
12671         int qgroup_report = 0;
12672         int qgroups_repaired = 0;
12673         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12674
12675         while(1) {
12676                 int c;
12677                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12678                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12679                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12680                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12681                 static const struct option long_options[] = {
12682                         { "super", required_argument, NULL, 's' },
12683                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12684                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12685                         { "init-csum-tree", no_argument, NULL,
12686                                 GETOPT_VAL_INIT_CSUM },
12687                         { "init-extent-tree", no_argument, NULL,
12688                                 GETOPT_VAL_INIT_EXTENT },
12689                         { "check-data-csum", no_argument, NULL,
12690                                 GETOPT_VAL_CHECK_CSUM },
12691                         { "backup", no_argument, NULL, 'b' },
12692                         { "subvol-extents", required_argument, NULL, 'E' },
12693                         { "qgroup-report", no_argument, NULL, 'Q' },
12694                         { "tree-root", required_argument, NULL, 'r' },
12695                         { "chunk-root", required_argument, NULL,
12696                                 GETOPT_VAL_CHUNK_TREE },
12697                         { "progress", no_argument, NULL, 'p' },
12698                         { "mode", required_argument, NULL,
12699                                 GETOPT_VAL_MODE },
12700                         { "clear-space-cache", required_argument, NULL,
12701                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12702                         { NULL, 0, NULL, 0}
12703                 };
12704
12705                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12706                 if (c < 0)
12707                         break;
12708                 switch(c) {
12709                         case 'a': /* ignored */ break;
12710                         case 'b':
12711                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12712                                 break;
12713                         case 's':
12714                                 num = arg_strtou64(optarg);
12715                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12716                                         error(
12717                                         "super mirror should be less than %d",
12718                                                 BTRFS_SUPER_MIRROR_MAX);
12719                                         exit(1);
12720                                 }
12721                                 bytenr = btrfs_sb_offset(((int)num));
12722                                 printf("using SB copy %llu, bytenr %llu\n", num,
12723                                        (unsigned long long)bytenr);
12724                                 break;
12725                         case 'Q':
12726                                 qgroup_report = 1;
12727                                 break;
12728                         case 'E':
12729                                 subvolid = arg_strtou64(optarg);
12730                                 break;
12731                         case 'r':
12732                                 tree_root_bytenr = arg_strtou64(optarg);
12733                                 break;
12734                         case GETOPT_VAL_CHUNK_TREE:
12735                                 chunk_root_bytenr = arg_strtou64(optarg);
12736                                 break;
12737                         case 'p':
12738                                 ctx.progress_enabled = true;
12739                                 break;
12740                         case '?':
12741                         case 'h':
12742                                 usage(cmd_check_usage);
12743                         case GETOPT_VAL_REPAIR:
12744                                 printf("enabling repair mode\n");
12745                                 repair = 1;
12746                                 ctree_flags |= OPEN_CTREE_WRITES;
12747                                 break;
12748                         case GETOPT_VAL_READONLY:
12749                                 readonly = 1;
12750                                 break;
12751                         case GETOPT_VAL_INIT_CSUM:
12752                                 printf("Creating a new CRC tree\n");
12753                                 init_csum_tree = 1;
12754                                 repair = 1;
12755                                 ctree_flags |= OPEN_CTREE_WRITES;
12756                                 break;
12757                         case GETOPT_VAL_INIT_EXTENT:
12758                                 init_extent_tree = 1;
12759                                 ctree_flags |= (OPEN_CTREE_WRITES |
12760                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12761                                 repair = 1;
12762                                 break;
12763                         case GETOPT_VAL_CHECK_CSUM:
12764                                 check_data_csum = 1;
12765                                 break;
12766                         case GETOPT_VAL_MODE:
12767                                 check_mode = parse_check_mode(optarg);
12768                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12769                                         error("unknown mode: %s", optarg);
12770                                         exit(1);
12771                                 }
12772                                 break;
12773                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12774                                 if (strcmp(optarg, "v1") == 0) {
12775                                         clear_space_cache = 1;
12776                                 } else if (strcmp(optarg, "v2") == 0) {
12777                                         clear_space_cache = 2;
12778                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12779                                 } else {
12780                                         error(
12781                 "invalid argument to --clear-space-cache, must be v1 or v2");
12782                                         exit(1);
12783                                 }
12784                                 ctree_flags |= OPEN_CTREE_WRITES;
12785                                 break;
12786                 }
12787         }
12788
12789         if (check_argc_exact(argc - optind, 1))
12790                 usage(cmd_check_usage);
12791
12792         if (ctx.progress_enabled) {
12793                 ctx.tp = TASK_NOTHING;
12794                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12795         }
12796
12797         /* This check is the only reason for --readonly to exist */
12798         if (readonly && repair) {
12799                 error("repair options are not compatible with --readonly");
12800                 exit(1);
12801         }
12802
12803         /*
12804          * Not supported yet
12805          */
12806         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12807                 error("low memory mode doesn't support repair yet");
12808                 exit(1);
12809         }
12810
12811         radix_tree_init();
12812         cache_tree_init(&root_cache);
12813
12814         if((ret = check_mounted(argv[optind])) < 0) {
12815                 error("could not check mount status: %s", strerror(-ret));
12816                 err |= !!ret;
12817                 goto err_out;
12818         } else if(ret) {
12819                 error("%s is currently mounted, aborting", argv[optind]);
12820                 ret = -EBUSY;
12821                 err |= !!ret;
12822                 goto err_out;
12823         }
12824
12825         /* only allow partial opening under repair mode */
12826         if (repair)
12827                 ctree_flags |= OPEN_CTREE_PARTIAL;
12828
12829         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12830                                   chunk_root_bytenr, ctree_flags);
12831         if (!info) {
12832                 error("cannot open file system");
12833                 ret = -EIO;
12834                 err |= !!ret;
12835                 goto err_out;
12836         }
12837
12838         global_info = info;
12839         root = info->fs_root;
12840         if (clear_space_cache == 1) {
12841                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12842                         error(
12843                 "free space cache v2 detected, use --clear-space-cache v2");
12844                         ret = 1;
12845                         goto close_out;
12846                 }
12847                 printf("Clearing free space cache\n");
12848                 ret = clear_free_space_cache(info);
12849                 if (ret) {
12850                         error("failed to clear free space cache");
12851                         ret = 1;
12852                 } else {
12853                         printf("Free space cache cleared\n");
12854                 }
12855                 goto close_out;
12856         } else if (clear_space_cache == 2) {
12857                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12858                         printf("no free space cache v2 to clear\n");
12859                         ret = 0;
12860                         goto close_out;
12861                 }
12862                 printf("Clear free space cache v2\n");
12863                 ret = btrfs_clear_free_space_tree(info);
12864                 if (ret) {
12865                         error("failed to clear free space cache v2: %d", ret);
12866                         ret = 1;
12867                 } else {
12868                         printf("free space cache v2 cleared\n");
12869                 }
12870                 goto close_out;
12871         }
12872
12873         /*
12874          * repair mode will force us to commit transaction which
12875          * will make us fail to load log tree when mounting.
12876          */
12877         if (repair && btrfs_super_log_root(info->super_copy)) {
12878                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12879                 if (!ret) {
12880                         ret = 1;
12881                         err |= !!ret;
12882                         goto close_out;
12883                 }
12884                 ret = zero_log_tree(root);
12885                 err |= !!ret;
12886                 if (ret) {
12887                         error("failed to zero log tree: %d", ret);
12888                         goto close_out;
12889                 }
12890         }
12891
12892         uuid_unparse(info->super_copy->fsid, uuidbuf);
12893         if (qgroup_report) {
12894                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12895                        uuidbuf);
12896                 ret = qgroup_verify_all(info);
12897                 err |= !!ret;
12898                 if (ret == 0)
12899                         report_qgroups(1);
12900                 goto close_out;
12901         }
12902         if (subvolid) {
12903                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12904                        subvolid, argv[optind], uuidbuf);
12905                 ret = print_extent_state(info, subvolid);
12906                 err |= !!ret;
12907                 goto close_out;
12908         }
12909         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12910
12911         if (!extent_buffer_uptodate(info->tree_root->node) ||
12912             !extent_buffer_uptodate(info->dev_root->node) ||
12913             !extent_buffer_uptodate(info->chunk_root->node)) {
12914                 error("critical roots corrupted, unable to check the filesystem");
12915                 err |= !!ret;
12916                 ret = -EIO;
12917                 goto close_out;
12918         }
12919
12920         if (init_extent_tree || init_csum_tree) {
12921                 struct btrfs_trans_handle *trans;
12922
12923                 trans = btrfs_start_transaction(info->extent_root, 0);
12924                 if (IS_ERR(trans)) {
12925                         error("error starting transaction");
12926                         ret = PTR_ERR(trans);
12927                         err |= !!ret;
12928                         goto close_out;
12929                 }
12930
12931                 if (init_extent_tree) {
12932                         printf("Creating a new extent tree\n");
12933                         ret = reinit_extent_tree(trans, info);
12934                         err |= !!ret;
12935                         if (ret)
12936                                 goto close_out;
12937                 }
12938
12939                 if (init_csum_tree) {
12940                         printf("Reinitialize checksum tree\n");
12941                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12942                         if (ret) {
12943                                 error("checksum tree initialization failed: %d",
12944                                                 ret);
12945                                 ret = -EIO;
12946                                 err |= !!ret;
12947                                 goto close_out;
12948                         }
12949
12950                         ret = fill_csum_tree(trans, info->csum_root,
12951                                              init_extent_tree);
12952                         err |= !!ret;
12953                         if (ret) {
12954                                 error("checksum tree refilling failed: %d", ret);
12955                                 return -EIO;
12956                         }
12957                 }
12958                 /*
12959                  * Ok now we commit and run the normal fsck, which will add
12960                  * extent entries for all of the items it finds.
12961                  */
12962                 ret = btrfs_commit_transaction(trans, info->extent_root);
12963                 err |= !!ret;
12964                 if (ret)
12965                         goto close_out;
12966         }
12967         if (!extent_buffer_uptodate(info->extent_root->node)) {
12968                 error("critical: extent_root, unable to check the filesystem");
12969                 ret = -EIO;
12970                 err |= !!ret;
12971                 goto close_out;
12972         }
12973         if (!extent_buffer_uptodate(info->csum_root->node)) {
12974                 error("critical: csum_root, unable to check the filesystem");
12975                 ret = -EIO;
12976                 err |= !!ret;
12977                 goto close_out;
12978         }
12979
12980         if (!ctx.progress_enabled)
12981                 fprintf(stderr, "checking extents\n");
12982         if (check_mode == CHECK_MODE_LOWMEM)
12983                 ret = check_chunks_and_extents_v2(root);
12984         else
12985                 ret = check_chunks_and_extents(root);
12986         err |= !!ret;
12987         if (ret)
12988                 error(
12989                 "errors found in extent allocation tree or chunk allocation");
12990
12991         ret = repair_root_items(info);
12992         err |= !!ret;
12993         if (ret < 0) {
12994                 error("failed to repair root items: %s", strerror(-ret));
12995                 goto close_out;
12996         }
12997         if (repair) {
12998                 fprintf(stderr, "Fixed %d roots.\n", ret);
12999                 ret = 0;
13000         } else if (ret > 0) {
13001                 fprintf(stderr,
13002                        "Found %d roots with an outdated root item.\n",
13003                        ret);
13004                 fprintf(stderr,
13005                         "Please run a filesystem check with the option --repair to fix them.\n");
13006                 ret = 1;
13007                 err |= !!ret;
13008                 goto close_out;
13009         }
13010
13011         if (!ctx.progress_enabled) {
13012                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13013                         fprintf(stderr, "checking free space tree\n");
13014                 else
13015                         fprintf(stderr, "checking free space cache\n");
13016         }
13017         ret = check_space_cache(root);
13018         err |= !!ret;
13019         if (ret) {
13020                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13021                         error("errors found in free space tree");
13022                 else
13023                         error("errors found in free space cache");
13024                 goto out;
13025         }
13026
13027         /*
13028          * We used to have to have these hole extents in between our real
13029          * extents so if we don't have this flag set we need to make sure there
13030          * are no gaps in the file extents for inodes, otherwise we can just
13031          * ignore it when this happens.
13032          */
13033         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13034         if (!ctx.progress_enabled)
13035                 fprintf(stderr, "checking fs roots\n");
13036         if (check_mode == CHECK_MODE_LOWMEM)
13037                 ret = check_fs_roots_v2(root->fs_info);
13038         else
13039                 ret = check_fs_roots(root, &root_cache);
13040         err |= !!ret;
13041         if (ret) {
13042                 error("errors found in fs roots");
13043                 goto out;
13044         }
13045
13046         fprintf(stderr, "checking csums\n");
13047         ret = check_csums(root);
13048         err |= !!ret;
13049         if (ret) {
13050                 error("errors found in csum tree");
13051                 goto out;
13052         }
13053
13054         fprintf(stderr, "checking root refs\n");
13055         /* For low memory mode, check_fs_roots_v2 handles root refs */
13056         if (check_mode != CHECK_MODE_LOWMEM) {
13057                 ret = check_root_refs(root, &root_cache);
13058                 err |= !!ret;
13059                 if (ret) {
13060                         error("errors found in root refs");
13061                         goto out;
13062                 }
13063         }
13064
13065         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13066                 struct extent_buffer *eb;
13067
13068                 eb = list_first_entry(&root->fs_info->recow_ebs,
13069                                       struct extent_buffer, recow);
13070                 list_del_init(&eb->recow);
13071                 ret = recow_extent_buffer(root, eb);
13072                 err |= !!ret;
13073                 if (ret) {
13074                         error("fails to fix transid errors");
13075                         break;
13076                 }
13077         }
13078
13079         while (!list_empty(&delete_items)) {
13080                 struct bad_item *bad;
13081
13082                 bad = list_first_entry(&delete_items, struct bad_item, list);
13083                 list_del_init(&bad->list);
13084                 if (repair) {
13085                         ret = delete_bad_item(root, bad);
13086                         err |= !!ret;
13087                 }
13088                 free(bad);
13089         }
13090
13091         if (info->quota_enabled) {
13092                 fprintf(stderr, "checking quota groups\n");
13093                 ret = qgroup_verify_all(info);
13094                 err |= !!ret;
13095                 if (ret) {
13096                         error("failed to check quota groups");
13097                         goto out;
13098                 }
13099                 report_qgroups(0);
13100                 ret = repair_qgroups(info, &qgroups_repaired);
13101                 err |= !!ret;
13102                 if (err) {
13103                         error("failed to repair quota groups");
13104                         goto out;
13105                 }
13106                 ret = 0;
13107         }
13108
13109         if (!list_empty(&root->fs_info->recow_ebs)) {
13110                 error("transid errors in file system");
13111                 ret = 1;
13112                 err |= !!ret;
13113         }
13114 out:
13115         if (found_old_backref) { /*
13116                  * there was a disk format change when mixed
13117                  * backref was in testing tree. The old format
13118                  * existed about one week.
13119                  */
13120                 printf("\n * Found old mixed backref format. "
13121                        "The old format is not supported! *"
13122                        "\n * Please mount the FS in readonly mode, "
13123                        "backup data and re-format the FS. *\n\n");
13124                 err |= 1;
13125         }
13126         printf("found %llu bytes used, ",
13127                (unsigned long long)bytes_used);
13128         if (err)
13129                 printf("error(s) found\n");
13130         else
13131                 printf("no error found\n");
13132         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13133         printf("total tree bytes: %llu\n",
13134                (unsigned long long)total_btree_bytes);
13135         printf("total fs tree bytes: %llu\n",
13136                (unsigned long long)total_fs_tree_bytes);
13137         printf("total extent tree bytes: %llu\n",
13138                (unsigned long long)total_extent_tree_bytes);
13139         printf("btree space waste bytes: %llu\n",
13140                (unsigned long long)btree_space_waste);
13141         printf("file data blocks allocated: %llu\n referenced %llu\n",
13142                 (unsigned long long)data_bytes_allocated,
13143                 (unsigned long long)data_bytes_referenced);
13144
13145         free_qgroup_counts();
13146         free_root_recs_tree(&root_cache);
13147 close_out:
13148         close_ctree(root);
13149 err_out:
13150         if (ctx.progress_enabled)
13151                 task_deinit(ctx.info);
13152
13153         return err;
13154 }