btrfs-progs: check: Avoid reading beyond item boundary for inode_ref
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572
1573                 /* inode_ref + namelen should not cross item boundary */
1574                 if (cur + sizeof(*ref) + name_len > total ||
1575                     name_len > BTRFS_NAME_LEN) {
1576                         if (total < cur + sizeof(*ref))
1577                                 break;
1578
1579                         /* Still try to read out the remaining part */
1580                         len = min_t(u32, total - cur - sizeof(*ref),
1581                                     BTRFS_NAME_LEN);
1582                         error = REF_ERR_NAME_TOO_LONG;
1583                 } else {
1584                         len = name_len;
1585                         error = 0;
1586                 }
1587
1588                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1589                 add_inode_backref(inode_cache, key->objectid, key->offset,
1590                                   index, namebuf, len, 0, key->type, error);
1591
1592                 len = sizeof(*ref) + name_len;
1593                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1594                 cur += len;
1595         }
1596         return 0;
1597 }
1598
1599 static int process_inode_extref(struct extent_buffer *eb,
1600                                 int slot, struct btrfs_key *key,
1601                                 struct shared_node *active_node)
1602 {
1603         u32 total;
1604         u32 cur = 0;
1605         u32 len;
1606         u32 name_len;
1607         u64 index;
1608         u64 parent;
1609         int error;
1610         struct cache_tree *inode_cache;
1611         struct btrfs_inode_extref *extref;
1612         char namebuf[BTRFS_NAME_LEN];
1613
1614         inode_cache = &active_node->inode_cache;
1615
1616         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1617         total = btrfs_item_size_nr(eb, slot);
1618         while (cur < total) {
1619                 name_len = btrfs_inode_extref_name_len(eb, extref);
1620                 index = btrfs_inode_extref_index(eb, extref);
1621                 parent = btrfs_inode_extref_parent(eb, extref);
1622                 if (name_len <= BTRFS_NAME_LEN) {
1623                         len = name_len;
1624                         error = 0;
1625                 } else {
1626                         len = BTRFS_NAME_LEN;
1627                         error = REF_ERR_NAME_TOO_LONG;
1628                 }
1629                 read_extent_buffer(eb, namebuf,
1630                                    (unsigned long)(extref + 1), len);
1631                 add_inode_backref(inode_cache, key->objectid, parent,
1632                                   index, namebuf, len, 0, key->type, error);
1633
1634                 len = sizeof(*extref) + name_len;
1635                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1636                 cur += len;
1637         }
1638         return 0;
1639
1640 }
1641
1642 static int count_csum_range(struct btrfs_root *root, u64 start,
1643                             u64 len, u64 *found)
1644 {
1645         struct btrfs_key key;
1646         struct btrfs_path path;
1647         struct extent_buffer *leaf;
1648         int ret;
1649         size_t size;
1650         *found = 0;
1651         u64 csum_end;
1652         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1653
1654         btrfs_init_path(&path);
1655
1656         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1657         key.offset = start;
1658         key.type = BTRFS_EXTENT_CSUM_KEY;
1659
1660         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1661                                 &key, &path, 0, 0);
1662         if (ret < 0)
1663                 goto out;
1664         if (ret > 0 && path.slots[0] > 0) {
1665                 leaf = path.nodes[0];
1666                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1667                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1668                     key.type == BTRFS_EXTENT_CSUM_KEY)
1669                         path.slots[0]--;
1670         }
1671
1672         while (len > 0) {
1673                 leaf = path.nodes[0];
1674                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1675                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1676                         if (ret > 0)
1677                                 break;
1678                         else if (ret < 0)
1679                                 goto out;
1680                         leaf = path.nodes[0];
1681                 }
1682
1683                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1684                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1685                     key.type != BTRFS_EXTENT_CSUM_KEY)
1686                         break;
1687
1688                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1689                 if (key.offset >= start + len)
1690                         break;
1691
1692                 if (key.offset > start)
1693                         start = key.offset;
1694
1695                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1696                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1697                 if (csum_end > start) {
1698                         size = min(csum_end - start, len);
1699                         len -= size;
1700                         start += size;
1701                         *found += size;
1702                 }
1703
1704                 path.slots[0]++;
1705         }
1706 out:
1707         btrfs_release_path(&path);
1708         if (ret < 0)
1709                 return ret;
1710         return 0;
1711 }
1712
1713 static int process_file_extent(struct btrfs_root *root,
1714                                 struct extent_buffer *eb,
1715                                 int slot, struct btrfs_key *key,
1716                                 struct shared_node *active_node)
1717 {
1718         struct inode_record *rec;
1719         struct btrfs_file_extent_item *fi;
1720         u64 num_bytes = 0;
1721         u64 disk_bytenr = 0;
1722         u64 extent_offset = 0;
1723         u64 mask = root->sectorsize - 1;
1724         int extent_type;
1725         int ret;
1726
1727         rec = active_node->current;
1728         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1729         rec->found_file_extent = 1;
1730
1731         if (rec->extent_start == (u64)-1) {
1732                 rec->extent_start = key->offset;
1733                 rec->extent_end = key->offset;
1734         }
1735
1736         if (rec->extent_end > key->offset)
1737                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1738         else if (rec->extent_end < key->offset) {
1739                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1740                                            key->offset - rec->extent_end);
1741                 if (ret < 0)
1742                         return ret;
1743         }
1744
1745         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1746         extent_type = btrfs_file_extent_type(eb, fi);
1747
1748         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1749                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1750                 if (num_bytes == 0)
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 rec->found_size += num_bytes;
1753                 num_bytes = (num_bytes + mask) & ~mask;
1754         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1755                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1756                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1757                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1758                 extent_offset = btrfs_file_extent_offset(eb, fi);
1759                 if (num_bytes == 0 || (num_bytes & mask))
1760                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1761                 if (num_bytes + extent_offset >
1762                     btrfs_file_extent_ram_bytes(eb, fi))
1763                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1765                     (btrfs_file_extent_compression(eb, fi) ||
1766                      btrfs_file_extent_encryption(eb, fi) ||
1767                      btrfs_file_extent_other_encoding(eb, fi)))
1768                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769                 if (disk_bytenr > 0)
1770                         rec->found_size += num_bytes;
1771         } else {
1772                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1773         }
1774         rec->extent_end = key->offset + num_bytes;
1775
1776         /*
1777          * The data reloc tree will copy full extents into its inode and then
1778          * copy the corresponding csums.  Because the extent it copied could be
1779          * a preallocated extent that hasn't been written to yet there may be no
1780          * csums to copy, ergo we won't have csums for our file extent.  This is
1781          * ok so just don't bother checking csums if the inode belongs to the
1782          * data reloc tree.
1783          */
1784         if (disk_bytenr > 0 &&
1785             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1786                 u64 found;
1787                 if (btrfs_file_extent_compression(eb, fi))
1788                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1789                 else
1790                         disk_bytenr += extent_offset;
1791
1792                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1793                 if (ret < 0)
1794                         return ret;
1795                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1796                         if (found > 0)
1797                                 rec->found_csum_item = 1;
1798                         if (found < num_bytes)
1799                                 rec->some_csum_missing = 1;
1800                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1801                         if (found > 0)
1802                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1803                 }
1804         }
1805         return 0;
1806 }
1807
1808 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1809                             struct walk_control *wc)
1810 {
1811         struct btrfs_key key;
1812         u32 nritems;
1813         int i;
1814         int ret = 0;
1815         struct cache_tree *inode_cache;
1816         struct shared_node *active_node;
1817
1818         if (wc->root_level == wc->active_node &&
1819             btrfs_root_refs(&root->root_item) == 0)
1820                 return 0;
1821
1822         active_node = wc->nodes[wc->active_node];
1823         inode_cache = &active_node->inode_cache;
1824         nritems = btrfs_header_nritems(eb);
1825         for (i = 0; i < nritems; i++) {
1826                 btrfs_item_key_to_cpu(eb, &key, i);
1827
1828                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1829                         continue;
1830                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1831                         continue;
1832
1833                 if (active_node->current == NULL ||
1834                     active_node->current->ino < key.objectid) {
1835                         if (active_node->current) {
1836                                 active_node->current->checked = 1;
1837                                 maybe_free_inode_rec(inode_cache,
1838                                                      active_node->current);
1839                         }
1840                         active_node->current = get_inode_rec(inode_cache,
1841                                                              key.objectid, 1);
1842                         BUG_ON(IS_ERR(active_node->current));
1843                 }
1844                 switch (key.type) {
1845                 case BTRFS_DIR_ITEM_KEY:
1846                 case BTRFS_DIR_INDEX_KEY:
1847                         ret = process_dir_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_INODE_REF_KEY:
1850                         ret = process_inode_ref(eb, i, &key, active_node);
1851                         break;
1852                 case BTRFS_INODE_EXTREF_KEY:
1853                         ret = process_inode_extref(eb, i, &key, active_node);
1854                         break;
1855                 case BTRFS_INODE_ITEM_KEY:
1856                         ret = process_inode_item(eb, i, &key, active_node);
1857                         break;
1858                 case BTRFS_EXTENT_DATA_KEY:
1859                         ret = process_file_extent(root, eb, i, &key,
1860                                                   active_node);
1861                         break;
1862                 default:
1863                         break;
1864                 };
1865         }
1866         return ret;
1867 }
1868
1869 struct node_refs {
1870         u64 bytenr[BTRFS_MAX_LEVEL];
1871         u64 refs[BTRFS_MAX_LEVEL];
1872         int need_check[BTRFS_MAX_LEVEL];
1873 };
1874
1875 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1876                              struct node_refs *nrefs, u64 level);
1877 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1878                             unsigned int ext_ref);
1879
1880 /*
1881  * Returns >0  Found error, not fatal, should continue
1882  * Returns <0  Fatal error, must exit the whole check
1883  * Returns 0   No errors found
1884  */
1885 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1886                                struct node_refs *nrefs, int *level, int ext_ref)
1887 {
1888         struct extent_buffer *cur = path->nodes[0];
1889         struct btrfs_key key;
1890         u64 cur_bytenr;
1891         u32 nritems;
1892         u64 first_ino = 0;
1893         int root_level = btrfs_header_level(root->node);
1894         int i;
1895         int ret = 0; /* Final return value */
1896         int err = 0; /* Positive error bitmap */
1897
1898         cur_bytenr = cur->start;
1899
1900         /* skip to first inode item or the first inode number change */
1901         nritems = btrfs_header_nritems(cur);
1902         for (i = 0; i < nritems; i++) {
1903                 btrfs_item_key_to_cpu(cur, &key, i);
1904                 if (i == 0)
1905                         first_ino = key.objectid;
1906                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1907                     (first_ino && first_ino != key.objectid))
1908                         break;
1909         }
1910         if (i == nritems) {
1911                 path->slots[0] = nritems;
1912                 return 0;
1913         }
1914         path->slots[0] = i;
1915
1916 again:
1917         err |= check_inode_item(root, path, ext_ref);
1918
1919         if (err & LAST_ITEM)
1920                 goto out;
1921
1922         /* still have inode items in thie leaf */
1923         if (cur->start == cur_bytenr)
1924                 goto again;
1925
1926         /*
1927          * we have switched to another leaf, above nodes may
1928          * have changed, here walk down the path, if a node
1929          * or leaf is shared, check whether we can skip this
1930          * node or leaf.
1931          */
1932         for (i = root_level; i >= 0; i--) {
1933                 if (path->nodes[i]->start == nrefs->bytenr[i])
1934                         continue;
1935
1936                 ret = update_nodes_refs(root,
1937                                 path->nodes[i]->start,
1938                                 nrefs, i);
1939                 if (ret)
1940                         goto out;
1941
1942                 if (!nrefs->need_check[i]) {
1943                         *level += 1;
1944                         break;
1945                 }
1946         }
1947
1948         for (i = 0; i < *level; i++) {
1949                 free_extent_buffer(path->nodes[i]);
1950                 path->nodes[i] = NULL;
1951         }
1952 out:
1953         err &= ~LAST_ITEM;
1954         if (err && !ret)
1955                 ret = err;
1956         return ret;
1957 }
1958
1959 static void reada_walk_down(struct btrfs_root *root,
1960                             struct extent_buffer *node, int slot)
1961 {
1962         u64 bytenr;
1963         u64 ptr_gen;
1964         u32 nritems;
1965         u32 blocksize;
1966         int i;
1967         int level;
1968
1969         level = btrfs_header_level(node);
1970         if (level != 1)
1971                 return;
1972
1973         nritems = btrfs_header_nritems(node);
1974         blocksize = root->nodesize;
1975         for (i = slot; i < nritems; i++) {
1976                 bytenr = btrfs_node_blockptr(node, i);
1977                 ptr_gen = btrfs_node_ptr_generation(node, i);
1978                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1979         }
1980 }
1981
1982 /*
1983  * Check the child node/leaf by the following condition:
1984  * 1. the first item key of the node/leaf should be the same with the one
1985  *    in parent.
1986  * 2. block in parent node should match the child node/leaf.
1987  * 3. generation of parent node and child's header should be consistent.
1988  *
1989  * Or the child node/leaf pointed by the key in parent is not valid.
1990  *
1991  * We hope to check leaf owner too, but since subvol may share leaves,
1992  * which makes leaf owner check not so strong, key check should be
1993  * sufficient enough for that case.
1994  */
1995 static int check_child_node(struct extent_buffer *parent, int slot,
1996                             struct extent_buffer *child)
1997 {
1998         struct btrfs_key parent_key;
1999         struct btrfs_key child_key;
2000         int ret = 0;
2001
2002         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2003         if (btrfs_header_level(child) == 0)
2004                 btrfs_item_key_to_cpu(child, &child_key, 0);
2005         else
2006                 btrfs_node_key_to_cpu(child, &child_key, 0);
2007
2008         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2009                 ret = -EINVAL;
2010                 fprintf(stderr,
2011                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2012                         parent_key.objectid, parent_key.type, parent_key.offset,
2013                         child_key.objectid, child_key.type, child_key.offset);
2014         }
2015         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2016                 ret = -EINVAL;
2017                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2018                         btrfs_node_blockptr(parent, slot),
2019                         btrfs_header_bytenr(child));
2020         }
2021         if (btrfs_node_ptr_generation(parent, slot) !=
2022             btrfs_header_generation(child)) {
2023                 ret = -EINVAL;
2024                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2025                         btrfs_header_generation(child),
2026                         btrfs_node_ptr_generation(parent, slot));
2027         }
2028         return ret;
2029 }
2030
2031 /*
2032  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2033  * in every fs or file tree check. Here we find its all root ids, and only check
2034  * it in the fs or file tree which has the smallest root id.
2035  */
2036 static int need_check(struct btrfs_root *root, struct ulist *roots)
2037 {
2038         struct rb_node *node;
2039         struct ulist_node *u;
2040
2041         if (roots->nnodes == 1)
2042                 return 1;
2043
2044         node = rb_first(&roots->root);
2045         u = rb_entry(node, struct ulist_node, rb_node);
2046         /*
2047          * current root id is not smallest, we skip it and let it be checked
2048          * in the fs or file tree who hash the smallest root id.
2049          */
2050         if (root->objectid != u->val)
2051                 return 0;
2052
2053         return 1;
2054 }
2055
2056 /*
2057  * for a tree node or leaf, we record its reference count, so later if we still
2058  * process this node or leaf, don't need to compute its reference count again.
2059  */
2060 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2061                              struct node_refs *nrefs, u64 level)
2062 {
2063         int check, ret;
2064         u64 refs;
2065         struct ulist *roots;
2066
2067         if (nrefs->bytenr[level] != bytenr) {
2068                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2069                                        level, 1, &refs, NULL);
2070                 if (ret < 0)
2071                         return ret;
2072
2073                 nrefs->bytenr[level] = bytenr;
2074                 nrefs->refs[level] = refs;
2075                 if (refs > 1) {
2076                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2077                                                    0, &roots);
2078                         if (ret)
2079                                 return -EIO;
2080
2081                         check = need_check(root, roots);
2082                         ulist_free(roots);
2083                         nrefs->need_check[level] = check;
2084                 } else {
2085                         nrefs->need_check[level] = 1;
2086                 }
2087         }
2088
2089         return 0;
2090 }
2091
2092 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2093                           struct walk_control *wc, int *level,
2094                           struct node_refs *nrefs)
2095 {
2096         enum btrfs_tree_block_status status;
2097         u64 bytenr;
2098         u64 ptr_gen;
2099         struct extent_buffer *next;
2100         struct extent_buffer *cur;
2101         u32 blocksize;
2102         int ret, err = 0;
2103         u64 refs;
2104
2105         WARN_ON(*level < 0);
2106         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2107
2108         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2109                 refs = nrefs->refs[*level];
2110                 ret = 0;
2111         } else {
2112                 ret = btrfs_lookup_extent_info(NULL, root,
2113                                        path->nodes[*level]->start,
2114                                        *level, 1, &refs, NULL);
2115                 if (ret < 0) {
2116                         err = ret;
2117                         goto out;
2118                 }
2119                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2120                 nrefs->refs[*level] = refs;
2121         }
2122
2123         if (refs > 1) {
2124                 ret = enter_shared_node(root, path->nodes[*level]->start,
2125                                         refs, wc, *level);
2126                 if (ret > 0) {
2127                         err = ret;
2128                         goto out;
2129                 }
2130         }
2131
2132         while (*level >= 0) {
2133                 WARN_ON(*level < 0);
2134                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2135                 cur = path->nodes[*level];
2136
2137                 if (btrfs_header_level(cur) != *level)
2138                         WARN_ON(1);
2139
2140                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2141                         break;
2142                 if (*level == 0) {
2143                         ret = process_one_leaf(root, cur, wc);
2144                         if (ret < 0)
2145                                 err = ret;
2146                         break;
2147                 }
2148                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2149                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2150                 blocksize = root->nodesize;
2151
2152                 if (bytenr == nrefs->bytenr[*level - 1]) {
2153                         refs = nrefs->refs[*level - 1];
2154                 } else {
2155                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2156                                         *level - 1, 1, &refs, NULL);
2157                         if (ret < 0) {
2158                                 refs = 0;
2159                         } else {
2160                                 nrefs->bytenr[*level - 1] = bytenr;
2161                                 nrefs->refs[*level - 1] = refs;
2162                         }
2163                 }
2164
2165                 if (refs > 1) {
2166                         ret = enter_shared_node(root, bytenr, refs,
2167                                                 wc, *level - 1);
2168                         if (ret > 0) {
2169                                 path->slots[*level]++;
2170                                 continue;
2171                         }
2172                 }
2173
2174                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2175                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2176                         free_extent_buffer(next);
2177                         reada_walk_down(root, cur, path->slots[*level]);
2178                         next = read_tree_block(root, bytenr, blocksize,
2179                                                ptr_gen);
2180                         if (!extent_buffer_uptodate(next)) {
2181                                 struct btrfs_key node_key;
2182
2183                                 btrfs_node_key_to_cpu(path->nodes[*level],
2184                                                       &node_key,
2185                                                       path->slots[*level]);
2186                                 btrfs_add_corrupt_extent_record(root->fs_info,
2187                                                 &node_key,
2188                                                 path->nodes[*level]->start,
2189                                                 root->nodesize, *level);
2190                                 err = -EIO;
2191                                 goto out;
2192                         }
2193                 }
2194
2195                 ret = check_child_node(cur, path->slots[*level], next);
2196                 if (ret) {
2197                         free_extent_buffer(next);
2198                         err = ret;
2199                         goto out;
2200                 }
2201
2202                 if (btrfs_is_leaf(next))
2203                         status = btrfs_check_leaf(root, NULL, next);
2204                 else
2205                         status = btrfs_check_node(root, NULL, next);
2206                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2207                         free_extent_buffer(next);
2208                         err = -EIO;
2209                         goto out;
2210                 }
2211
2212                 *level = *level - 1;
2213                 free_extent_buffer(path->nodes[*level]);
2214                 path->nodes[*level] = next;
2215                 path->slots[*level] = 0;
2216         }
2217 out:
2218         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2219         return err;
2220 }
2221
2222 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2223                             unsigned int ext_ref);
2224
2225 /*
2226  * Returns >0  Found error, should continue
2227  * Returns <0  Fatal error, must exit the whole check
2228  * Returns 0   No errors found
2229  */
2230 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2231                              int *level, struct node_refs *nrefs, int ext_ref)
2232 {
2233         enum btrfs_tree_block_status status;
2234         u64 bytenr;
2235         u64 ptr_gen;
2236         struct extent_buffer *next;
2237         struct extent_buffer *cur;
2238         u32 blocksize;
2239         int ret;
2240
2241         WARN_ON(*level < 0);
2242         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243
2244         ret = update_nodes_refs(root, path->nodes[*level]->start,
2245                                 nrefs, *level);
2246         if (ret < 0)
2247                 return ret;
2248
2249         while (*level >= 0) {
2250                 WARN_ON(*level < 0);
2251                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2252                 cur = path->nodes[*level];
2253
2254                 if (btrfs_header_level(cur) != *level)
2255                         WARN_ON(1);
2256
2257                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2258                         break;
2259                 /* Don't forgot to check leaf/node validation */
2260                 if (*level == 0) {
2261                         ret = btrfs_check_leaf(root, NULL, cur);
2262                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2263                                 ret = -EIO;
2264                                 break;
2265                         }
2266                         ret = process_one_leaf_v2(root, path, nrefs,
2267                                                   level, ext_ref);
2268                         break;
2269                 } else {
2270                         ret = btrfs_check_node(root, NULL, cur);
2271                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2272                                 ret = -EIO;
2273                                 break;
2274                         }
2275                 }
2276                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2277                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2278                 blocksize = root->nodesize;
2279
2280                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2281                 if (ret)
2282                         break;
2283                 if (!nrefs->need_check[*level - 1]) {
2284                         path->slots[*level]++;
2285                         continue;
2286                 }
2287
2288                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2289                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2290                         free_extent_buffer(next);
2291                         reada_walk_down(root, cur, path->slots[*level]);
2292                         next = read_tree_block(root, bytenr, blocksize,
2293                                                ptr_gen);
2294                         if (!extent_buffer_uptodate(next)) {
2295                                 struct btrfs_key node_key;
2296
2297                                 btrfs_node_key_to_cpu(path->nodes[*level],
2298                                                       &node_key,
2299                                                       path->slots[*level]);
2300                                 btrfs_add_corrupt_extent_record(root->fs_info,
2301                                                 &node_key,
2302                                                 path->nodes[*level]->start,
2303                                                 root->nodesize, *level);
2304                                 ret = -EIO;
2305                                 break;
2306                         }
2307                 }
2308
2309                 ret = check_child_node(cur, path->slots[*level], next);
2310                 if (ret < 0) 
2311                         break;
2312
2313                 if (btrfs_is_leaf(next))
2314                         status = btrfs_check_leaf(root, NULL, next);
2315                 else
2316                         status = btrfs_check_node(root, NULL, next);
2317                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2318                         free_extent_buffer(next);
2319                         ret = -EIO;
2320                         break;
2321                 }
2322
2323                 *level = *level - 1;
2324                 free_extent_buffer(path->nodes[*level]);
2325                 path->nodes[*level] = next;
2326                 path->slots[*level] = 0;
2327         }
2328         return ret;
2329 }
2330
2331 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2332                         struct walk_control *wc, int *level)
2333 {
2334         int i;
2335         struct extent_buffer *leaf;
2336
2337         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2338                 leaf = path->nodes[i];
2339                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2340                         path->slots[i]++;
2341                         *level = i;
2342                         return 0;
2343                 } else {
2344                         free_extent_buffer(path->nodes[*level]);
2345                         path->nodes[*level] = NULL;
2346                         BUG_ON(*level > wc->active_node);
2347                         if (*level == wc->active_node)
2348                                 leave_shared_node(root, wc, *level);
2349                         *level = i + 1;
2350                 }
2351         }
2352         return 1;
2353 }
2354
2355 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2356                            int *level)
2357 {
2358         int i;
2359         struct extent_buffer *leaf;
2360
2361         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2362                 leaf = path->nodes[i];
2363                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2364                         path->slots[i]++;
2365                         *level = i;
2366                         return 0;
2367                 } else {
2368                         free_extent_buffer(path->nodes[*level]);
2369                         path->nodes[*level] = NULL;
2370                         *level = i + 1;
2371                 }
2372         }
2373         return 1;
2374 }
2375
2376 static int check_root_dir(struct inode_record *rec)
2377 {
2378         struct inode_backref *backref;
2379         int ret = -1;
2380
2381         if (!rec->found_inode_item || rec->errors)
2382                 goto out;
2383         if (rec->nlink != 1 || rec->found_link != 0)
2384                 goto out;
2385         if (list_empty(&rec->backrefs))
2386                 goto out;
2387         backref = to_inode_backref(rec->backrefs.next);
2388         if (!backref->found_inode_ref)
2389                 goto out;
2390         if (backref->index != 0 || backref->namelen != 2 ||
2391             memcmp(backref->name, "..", 2))
2392                 goto out;
2393         if (backref->found_dir_index || backref->found_dir_item)
2394                 goto out;
2395         ret = 0;
2396 out:
2397         return ret;
2398 }
2399
2400 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2401                               struct btrfs_root *root, struct btrfs_path *path,
2402                               struct inode_record *rec)
2403 {
2404         struct btrfs_inode_item *ei;
2405         struct btrfs_key key;
2406         int ret;
2407
2408         key.objectid = rec->ino;
2409         key.type = BTRFS_INODE_ITEM_KEY;
2410         key.offset = (u64)-1;
2411
2412         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2413         if (ret < 0)
2414                 goto out;
2415         if (ret) {
2416                 if (!path->slots[0]) {
2417                         ret = -ENOENT;
2418                         goto out;
2419                 }
2420                 path->slots[0]--;
2421                 ret = 0;
2422         }
2423         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2424         if (key.objectid != rec->ino) {
2425                 ret = -ENOENT;
2426                 goto out;
2427         }
2428
2429         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2430                             struct btrfs_inode_item);
2431         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2432         btrfs_mark_buffer_dirty(path->nodes[0]);
2433         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2434         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2435                root->root_key.objectid);
2436 out:
2437         btrfs_release_path(path);
2438         return ret;
2439 }
2440
2441 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2442                                     struct btrfs_root *root,
2443                                     struct btrfs_path *path,
2444                                     struct inode_record *rec)
2445 {
2446         int ret;
2447
2448         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2449         btrfs_release_path(path);
2450         if (!ret)
2451                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2452         return ret;
2453 }
2454
2455 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2456                                struct btrfs_root *root,
2457                                struct btrfs_path *path,
2458                                struct inode_record *rec)
2459 {
2460         struct btrfs_inode_item *ei;
2461         struct btrfs_key key;
2462         int ret = 0;
2463
2464         key.objectid = rec->ino;
2465         key.type = BTRFS_INODE_ITEM_KEY;
2466         key.offset = 0;
2467
2468         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2469         if (ret) {
2470                 if (ret > 0)
2471                         ret = -ENOENT;
2472                 goto out;
2473         }
2474
2475         /* Since ret == 0, no need to check anything */
2476         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2477                             struct btrfs_inode_item);
2478         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2479         btrfs_mark_buffer_dirty(path->nodes[0]);
2480         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2481         printf("reset nbytes for ino %llu root %llu\n",
2482                rec->ino, root->root_key.objectid);
2483 out:
2484         btrfs_release_path(path);
2485         return ret;
2486 }
2487
2488 static int add_missing_dir_index(struct btrfs_root *root,
2489                                  struct cache_tree *inode_cache,
2490                                  struct inode_record *rec,
2491                                  struct inode_backref *backref)
2492 {
2493         struct btrfs_path path;
2494         struct btrfs_trans_handle *trans;
2495         struct btrfs_dir_item *dir_item;
2496         struct extent_buffer *leaf;
2497         struct btrfs_key key;
2498         struct btrfs_disk_key disk_key;
2499         struct inode_record *dir_rec;
2500         unsigned long name_ptr;
2501         u32 data_size = sizeof(*dir_item) + backref->namelen;
2502         int ret;
2503
2504         trans = btrfs_start_transaction(root, 1);
2505         if (IS_ERR(trans))
2506                 return PTR_ERR(trans);
2507
2508         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2509                 (unsigned long long)rec->ino);
2510
2511         btrfs_init_path(&path);
2512         key.objectid = backref->dir;
2513         key.type = BTRFS_DIR_INDEX_KEY;
2514         key.offset = backref->index;
2515         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2516         BUG_ON(ret);
2517
2518         leaf = path.nodes[0];
2519         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2520
2521         disk_key.objectid = cpu_to_le64(rec->ino);
2522         disk_key.type = BTRFS_INODE_ITEM_KEY;
2523         disk_key.offset = 0;
2524
2525         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2526         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2527         btrfs_set_dir_data_len(leaf, dir_item, 0);
2528         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2529         name_ptr = (unsigned long)(dir_item + 1);
2530         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2531         btrfs_mark_buffer_dirty(leaf);
2532         btrfs_release_path(&path);
2533         btrfs_commit_transaction(trans, root);
2534
2535         backref->found_dir_index = 1;
2536         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2537         BUG_ON(IS_ERR(dir_rec));
2538         if (!dir_rec)
2539                 return 0;
2540         dir_rec->found_size += backref->namelen;
2541         if (dir_rec->found_size == dir_rec->isize &&
2542             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2543                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2544         if (dir_rec->found_size != dir_rec->isize)
2545                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2546
2547         return 0;
2548 }
2549
2550 static int delete_dir_index(struct btrfs_root *root,
2551                             struct inode_backref *backref)
2552 {
2553         struct btrfs_trans_handle *trans;
2554         struct btrfs_dir_item *di;
2555         struct btrfs_path path;
2556         int ret = 0;
2557
2558         trans = btrfs_start_transaction(root, 1);
2559         if (IS_ERR(trans))
2560                 return PTR_ERR(trans);
2561
2562         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2563                 (unsigned long long)backref->dir,
2564                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2565                 (unsigned long long)root->objectid);
2566
2567         btrfs_init_path(&path);
2568         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2569                                     backref->name, backref->namelen,
2570                                     backref->index, -1);
2571         if (IS_ERR(di)) {
2572                 ret = PTR_ERR(di);
2573                 btrfs_release_path(&path);
2574                 btrfs_commit_transaction(trans, root);
2575                 if (ret == -ENOENT)
2576                         return 0;
2577                 return ret;
2578         }
2579
2580         if (!di)
2581                 ret = btrfs_del_item(trans, root, &path);
2582         else
2583                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2584         BUG_ON(ret);
2585         btrfs_release_path(&path);
2586         btrfs_commit_transaction(trans, root);
2587         return ret;
2588 }
2589
2590 static int create_inode_item(struct btrfs_root *root,
2591                              struct inode_record *rec,
2592                              int root_dir)
2593 {
2594         struct btrfs_trans_handle *trans;
2595         struct btrfs_inode_item inode_item;
2596         time_t now = time(NULL);
2597         int ret;
2598
2599         trans = btrfs_start_transaction(root, 1);
2600         if (IS_ERR(trans)) {
2601                 ret = PTR_ERR(trans);
2602                 return ret;
2603         }
2604
2605         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2606                 "be incomplete, please check permissions and content after "
2607                 "the fsck completes.\n", (unsigned long long)root->objectid,
2608                 (unsigned long long)rec->ino);
2609
2610         memset(&inode_item, 0, sizeof(inode_item));
2611         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2612         if (root_dir)
2613                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2614         else
2615                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2616         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2617         if (rec->found_dir_item) {
2618                 if (rec->found_file_extent)
2619                         fprintf(stderr, "root %llu inode %llu has both a dir "
2620                                 "item and extents, unsure if it is a dir or a "
2621                                 "regular file so setting it as a directory\n",
2622                                 (unsigned long long)root->objectid,
2623                                 (unsigned long long)rec->ino);
2624                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2625                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2626         } else if (!rec->found_dir_item) {
2627                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2628                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2629         }
2630         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2631         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2632         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2633         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2634         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2635         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2636         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2637         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2638
2639         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2640         BUG_ON(ret);
2641         btrfs_commit_transaction(trans, root);
2642         return 0;
2643 }
2644
2645 static int repair_inode_backrefs(struct btrfs_root *root,
2646                                  struct inode_record *rec,
2647                                  struct cache_tree *inode_cache,
2648                                  int delete)
2649 {
2650         struct inode_backref *tmp, *backref;
2651         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2652         int ret = 0;
2653         int repaired = 0;
2654
2655         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2656                 if (!delete && rec->ino == root_dirid) {
2657                         if (!rec->found_inode_item) {
2658                                 ret = create_inode_item(root, rec, 1);
2659                                 if (ret)
2660                                         break;
2661                                 repaired++;
2662                         }
2663                 }
2664
2665                 /* Index 0 for root dir's are special, don't mess with it */
2666                 if (rec->ino == root_dirid && backref->index == 0)
2667                         continue;
2668
2669                 if (delete &&
2670                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2671                      (backref->found_dir_index && backref->found_inode_ref &&
2672                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2673                         ret = delete_dir_index(root, backref);
2674                         if (ret)
2675                                 break;
2676                         repaired++;
2677                         list_del(&backref->list);
2678                         free(backref);
2679                         continue;
2680                 }
2681
2682                 if (!delete && !backref->found_dir_index &&
2683                     backref->found_dir_item && backref->found_inode_ref) {
2684                         ret = add_missing_dir_index(root, inode_cache, rec,
2685                                                     backref);
2686                         if (ret)
2687                                 break;
2688                         repaired++;
2689                         if (backref->found_dir_item &&
2690                             backref->found_dir_index) {
2691                                 if (!backref->errors &&
2692                                     backref->found_inode_ref) {
2693                                         list_del(&backref->list);
2694                                         free(backref);
2695                                         continue;
2696                                 }
2697                         }
2698                 }
2699
2700                 if (!delete && (!backref->found_dir_index &&
2701                                 !backref->found_dir_item &&
2702                                 backref->found_inode_ref)) {
2703                         struct btrfs_trans_handle *trans;
2704                         struct btrfs_key location;
2705
2706                         ret = check_dir_conflict(root, backref->name,
2707                                                  backref->namelen,
2708                                                  backref->dir,
2709                                                  backref->index);
2710                         if (ret) {
2711                                 /*
2712                                  * let nlink fixing routine to handle it,
2713                                  * which can do it better.
2714                                  */
2715                                 ret = 0;
2716                                 break;
2717                         }
2718                         location.objectid = rec->ino;
2719                         location.type = BTRFS_INODE_ITEM_KEY;
2720                         location.offset = 0;
2721
2722                         trans = btrfs_start_transaction(root, 1);
2723                         if (IS_ERR(trans)) {
2724                                 ret = PTR_ERR(trans);
2725                                 break;
2726                         }
2727                         fprintf(stderr, "adding missing dir index/item pair "
2728                                 "for inode %llu\n",
2729                                 (unsigned long long)rec->ino);
2730                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2731                                                     backref->namelen,
2732                                                     backref->dir, &location,
2733                                                     imode_to_type(rec->imode),
2734                                                     backref->index);
2735                         BUG_ON(ret);
2736                         btrfs_commit_transaction(trans, root);
2737                         repaired++;
2738                 }
2739
2740                 if (!delete && (backref->found_inode_ref &&
2741                                 backref->found_dir_index &&
2742                                 backref->found_dir_item &&
2743                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2744                                 !rec->found_inode_item)) {
2745                         ret = create_inode_item(root, rec, 0);
2746                         if (ret)
2747                                 break;
2748                         repaired++;
2749                 }
2750
2751         }
2752         return ret ? ret : repaired;
2753 }
2754
2755 /*
2756  * To determine the file type for nlink/inode_item repair
2757  *
2758  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2759  * Return -ENOENT if file type is not found.
2760  */
2761 static int find_file_type(struct inode_record *rec, u8 *type)
2762 {
2763         struct inode_backref *backref;
2764
2765         /* For inode item recovered case */
2766         if (rec->found_inode_item) {
2767                 *type = imode_to_type(rec->imode);
2768                 return 0;
2769         }
2770
2771         list_for_each_entry(backref, &rec->backrefs, list) {
2772                 if (backref->found_dir_index || backref->found_dir_item) {
2773                         *type = backref->filetype;
2774                         return 0;
2775                 }
2776         }
2777         return -ENOENT;
2778 }
2779
2780 /*
2781  * To determine the file name for nlink repair
2782  *
2783  * Return 0 if file name is found, set name and namelen.
2784  * Return -ENOENT if file name is not found.
2785  */
2786 static int find_file_name(struct inode_record *rec,
2787                           char *name, int *namelen)
2788 {
2789         struct inode_backref *backref;
2790
2791         list_for_each_entry(backref, &rec->backrefs, list) {
2792                 if (backref->found_dir_index || backref->found_dir_item ||
2793                     backref->found_inode_ref) {
2794                         memcpy(name, backref->name, backref->namelen);
2795                         *namelen = backref->namelen;
2796                         return 0;
2797                 }
2798         }
2799         return -ENOENT;
2800 }
2801
2802 /* Reset the nlink of the inode to the correct one */
2803 static int reset_nlink(struct btrfs_trans_handle *trans,
2804                        struct btrfs_root *root,
2805                        struct btrfs_path *path,
2806                        struct inode_record *rec)
2807 {
2808         struct inode_backref *backref;
2809         struct inode_backref *tmp;
2810         struct btrfs_key key;
2811         struct btrfs_inode_item *inode_item;
2812         int ret = 0;
2813
2814         /* We don't believe this either, reset it and iterate backref */
2815         rec->found_link = 0;
2816
2817         /* Remove all backref including the valid ones */
2818         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2819                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2820                                    backref->index, backref->name,
2821                                    backref->namelen, 0);
2822                 if (ret < 0)
2823                         goto out;
2824
2825                 /* remove invalid backref, so it won't be added back */
2826                 if (!(backref->found_dir_index &&
2827                       backref->found_dir_item &&
2828                       backref->found_inode_ref)) {
2829                         list_del(&backref->list);
2830                         free(backref);
2831                 } else {
2832                         rec->found_link++;
2833                 }
2834         }
2835
2836         /* Set nlink to 0 */
2837         key.objectid = rec->ino;
2838         key.type = BTRFS_INODE_ITEM_KEY;
2839         key.offset = 0;
2840         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2841         if (ret < 0)
2842                 goto out;
2843         if (ret > 0) {
2844                 ret = -ENOENT;
2845                 goto out;
2846         }
2847         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2848                                     struct btrfs_inode_item);
2849         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2850         btrfs_mark_buffer_dirty(path->nodes[0]);
2851         btrfs_release_path(path);
2852
2853         /*
2854          * Add back valid inode_ref/dir_item/dir_index,
2855          * add_link() will handle the nlink inc, so new nlink must be correct
2856          */
2857         list_for_each_entry(backref, &rec->backrefs, list) {
2858                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2859                                      backref->name, backref->namelen,
2860                                      backref->filetype, &backref->index, 1);
2861                 if (ret < 0)
2862                         goto out;
2863         }
2864 out:
2865         btrfs_release_path(path);
2866         return ret;
2867 }
2868
2869 static int get_highest_inode(struct btrfs_trans_handle *trans,
2870                                 struct btrfs_root *root,
2871                                 struct btrfs_path *path,
2872                                 u64 *highest_ino)
2873 {
2874         struct btrfs_key key, found_key;
2875         int ret;
2876
2877         btrfs_init_path(path);
2878         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2879         key.offset = -1;
2880         key.type = BTRFS_INODE_ITEM_KEY;
2881         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2882         if (ret == 1) {
2883                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2884                                 path->slots[0] - 1);
2885                 *highest_ino = found_key.objectid;
2886                 ret = 0;
2887         }
2888         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2889                 ret = -EOVERFLOW;
2890         btrfs_release_path(path);
2891         return ret;
2892 }
2893
2894 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2895                                struct btrfs_root *root,
2896                                struct btrfs_path *path,
2897                                struct inode_record *rec)
2898 {
2899         char *dir_name = "lost+found";
2900         char namebuf[BTRFS_NAME_LEN] = {0};
2901         u64 lost_found_ino;
2902         u32 mode = 0700;
2903         u8 type = 0;
2904         int namelen = 0;
2905         int name_recovered = 0;
2906         int type_recovered = 0;
2907         int ret = 0;
2908
2909         /*
2910          * Get file name and type first before these invalid inode ref
2911          * are deleted by remove_all_invalid_backref()
2912          */
2913         name_recovered = !find_file_name(rec, namebuf, &namelen);
2914         type_recovered = !find_file_type(rec, &type);
2915
2916         if (!name_recovered) {
2917                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2918                        rec->ino, rec->ino);
2919                 namelen = count_digits(rec->ino);
2920                 sprintf(namebuf, "%llu", rec->ino);
2921                 name_recovered = 1;
2922         }
2923         if (!type_recovered) {
2924                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2925                        rec->ino);
2926                 type = BTRFS_FT_REG_FILE;
2927                 type_recovered = 1;
2928         }
2929
2930         ret = reset_nlink(trans, root, path, rec);
2931         if (ret < 0) {
2932                 fprintf(stderr,
2933                         "Failed to reset nlink for inode %llu: %s\n",
2934                         rec->ino, strerror(-ret));
2935                 goto out;
2936         }
2937
2938         if (rec->found_link == 0) {
2939                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2940                 if (ret < 0)
2941                         goto out;
2942                 lost_found_ino++;
2943                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2944                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2945                                   mode);
2946                 if (ret < 0) {
2947                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2948                                 dir_name, strerror(-ret));
2949                         goto out;
2950                 }
2951                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2952                                      namebuf, namelen, type, NULL, 1);
2953                 /*
2954                  * Add ".INO" suffix several times to handle case where
2955                  * "FILENAME.INO" is already taken by another file.
2956                  */
2957                 while (ret == -EEXIST) {
2958                         /*
2959                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2960                          */
2961                         if (namelen + count_digits(rec->ino) + 1 >
2962                             BTRFS_NAME_LEN) {
2963                                 ret = -EFBIG;
2964                                 goto out;
2965                         }
2966                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2967                                  ".%llu", rec->ino);
2968                         namelen += count_digits(rec->ino) + 1;
2969                         ret = btrfs_add_link(trans, root, rec->ino,
2970                                              lost_found_ino, namebuf,
2971                                              namelen, type, NULL, 1);
2972                 }
2973                 if (ret < 0) {
2974                         fprintf(stderr,
2975                                 "Failed to link the inode %llu to %s dir: %s\n",
2976                                 rec->ino, dir_name, strerror(-ret));
2977                         goto out;
2978                 }
2979                 /*
2980                  * Just increase the found_link, don't actually add the
2981                  * backref. This will make things easier and this inode
2982                  * record will be freed after the repair is done.
2983                  * So fsck will not report problem about this inode.
2984                  */
2985                 rec->found_link++;
2986                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2987                        namelen, namebuf, dir_name);
2988         }
2989         printf("Fixed the nlink of inode %llu\n", rec->ino);
2990 out:
2991         /*
2992          * Clear the flag anyway, or we will loop forever for the same inode
2993          * as it will not be removed from the bad inode list and the dead loop
2994          * happens.
2995          */
2996         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2997         btrfs_release_path(path);
2998         return ret;
2999 }
3000
3001 /*
3002  * Check if there is any normal(reg or prealloc) file extent for given
3003  * ino.
3004  * This is used to determine the file type when neither its dir_index/item or
3005  * inode_item exists.
3006  *
3007  * This will *NOT* report error, if any error happens, just consider it does
3008  * not have any normal file extent.
3009  */
3010 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3011 {
3012         struct btrfs_path path;
3013         struct btrfs_key key;
3014         struct btrfs_key found_key;
3015         struct btrfs_file_extent_item *fi;
3016         u8 type;
3017         int ret = 0;
3018
3019         btrfs_init_path(&path);
3020         key.objectid = ino;
3021         key.type = BTRFS_EXTENT_DATA_KEY;
3022         key.offset = 0;
3023
3024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3025         if (ret < 0) {
3026                 ret = 0;
3027                 goto out;
3028         }
3029         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3030                 ret = btrfs_next_leaf(root, &path);
3031                 if (ret) {
3032                         ret = 0;
3033                         goto out;
3034                 }
3035         }
3036         while (1) {
3037                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3038                                       path.slots[0]);
3039                 if (found_key.objectid != ino ||
3040                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3041                         break;
3042                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3043                                     struct btrfs_file_extent_item);
3044                 type = btrfs_file_extent_type(path.nodes[0], fi);
3045                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3046                         ret = 1;
3047                         goto out;
3048                 }
3049         }
3050 out:
3051         btrfs_release_path(&path);
3052         return ret;
3053 }
3054
3055 static u32 btrfs_type_to_imode(u8 type)
3056 {
3057         static u32 imode_by_btrfs_type[] = {
3058                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3059                 [BTRFS_FT_DIR]          = S_IFDIR,
3060                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3061                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3062                 [BTRFS_FT_FIFO]         = S_IFIFO,
3063                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3064                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3065         };
3066
3067         return imode_by_btrfs_type[(type)];
3068 }
3069
3070 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3071                                 struct btrfs_root *root,
3072                                 struct btrfs_path *path,
3073                                 struct inode_record *rec)
3074 {
3075         u8 filetype;
3076         u32 mode = 0700;
3077         int type_recovered = 0;
3078         int ret = 0;
3079
3080         printf("Trying to rebuild inode:%llu\n", rec->ino);
3081
3082         type_recovered = !find_file_type(rec, &filetype);
3083
3084         /*
3085          * Try to determine inode type if type not found.
3086          *
3087          * For found regular file extent, it must be FILE.
3088          * For found dir_item/index, it must be DIR.
3089          *
3090          * For undetermined one, use FILE as fallback.
3091          *
3092          * TODO:
3093          * 1. If found backref(inode_index/item is already handled) to it,
3094          *    it must be DIR.
3095          *    Need new inode-inode ref structure to allow search for that.
3096          */
3097         if (!type_recovered) {
3098                 if (rec->found_file_extent &&
3099                     find_normal_file_extent(root, rec->ino)) {
3100                         type_recovered = 1;
3101                         filetype = BTRFS_FT_REG_FILE;
3102                 } else if (rec->found_dir_item) {
3103                         type_recovered = 1;
3104                         filetype = BTRFS_FT_DIR;
3105                 } else if (!list_empty(&rec->orphan_extents)) {
3106                         type_recovered = 1;
3107                         filetype = BTRFS_FT_REG_FILE;
3108                 } else{
3109                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3110                                rec->ino);
3111                         type_recovered = 1;
3112                         filetype = BTRFS_FT_REG_FILE;
3113                 }
3114         }
3115
3116         ret = btrfs_new_inode(trans, root, rec->ino,
3117                               mode | btrfs_type_to_imode(filetype));
3118         if (ret < 0)
3119                 goto out;
3120
3121         /*
3122          * Here inode rebuild is done, we only rebuild the inode item,
3123          * don't repair the nlink(like move to lost+found).
3124          * That is the job of nlink repair.
3125          *
3126          * We just fill the record and return
3127          */
3128         rec->found_dir_item = 1;
3129         rec->imode = mode | btrfs_type_to_imode(filetype);
3130         rec->nlink = 0;
3131         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3132         /* Ensure the inode_nlinks repair function will be called */
3133         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3134 out:
3135         return ret;
3136 }
3137
3138 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3139                                       struct btrfs_root *root,
3140                                       struct btrfs_path *path,
3141                                       struct inode_record *rec)
3142 {
3143         struct orphan_data_extent *orphan;
3144         struct orphan_data_extent *tmp;
3145         int ret = 0;
3146
3147         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3148                 /*
3149                  * Check for conflicting file extents
3150                  *
3151                  * Here we don't know whether the extents is compressed or not,
3152                  * so we can only assume it not compressed nor data offset,
3153                  * and use its disk_len as extent length.
3154                  */
3155                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3156                                        orphan->offset, orphan->disk_len, 0);
3157                 btrfs_release_path(path);
3158                 if (ret < 0)
3159                         goto out;
3160                 if (!ret) {
3161                         fprintf(stderr,
3162                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3163                                 orphan->disk_bytenr, orphan->disk_len);
3164                         ret = btrfs_free_extent(trans,
3165                                         root->fs_info->extent_root,
3166                                         orphan->disk_bytenr, orphan->disk_len,
3167                                         0, root->objectid, orphan->objectid,
3168                                         orphan->offset);
3169                         if (ret < 0)
3170                                 goto out;
3171                 }
3172                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3173                                 orphan->offset, orphan->disk_bytenr,
3174                                 orphan->disk_len, orphan->disk_len);
3175                 if (ret < 0)
3176                         goto out;
3177
3178                 /* Update file size info */
3179                 rec->found_size += orphan->disk_len;
3180                 if (rec->found_size == rec->nbytes)
3181                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3182
3183                 /* Update the file extent hole info too */
3184                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3185                                            orphan->disk_len);
3186                 if (ret < 0)
3187                         goto out;
3188                 if (RB_EMPTY_ROOT(&rec->holes))
3189                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3190
3191                 list_del(&orphan->list);
3192                 free(orphan);
3193         }
3194         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3195 out:
3196         return ret;
3197 }
3198
3199 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3200                                         struct btrfs_root *root,
3201                                         struct btrfs_path *path,
3202                                         struct inode_record *rec)
3203 {
3204         struct rb_node *node;
3205         struct file_extent_hole *hole;
3206         int found = 0;
3207         int ret = 0;
3208
3209         node = rb_first(&rec->holes);
3210
3211         while (node) {
3212                 found = 1;
3213                 hole = rb_entry(node, struct file_extent_hole, node);
3214                 ret = btrfs_punch_hole(trans, root, rec->ino,
3215                                        hole->start, hole->len);
3216                 if (ret < 0)
3217                         goto out;
3218                 ret = del_file_extent_hole(&rec->holes, hole->start,
3219                                            hole->len);
3220                 if (ret < 0)
3221                         goto out;
3222                 if (RB_EMPTY_ROOT(&rec->holes))
3223                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3224                 node = rb_first(&rec->holes);
3225         }
3226         /* special case for a file losing all its file extent */
3227         if (!found) {
3228                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3229                                        round_up(rec->isize, root->sectorsize));
3230                 if (ret < 0)
3231                         goto out;
3232         }
3233         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3234                rec->ino, root->objectid);
3235 out:
3236         return ret;
3237 }
3238
3239 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3240 {
3241         struct btrfs_trans_handle *trans;
3242         struct btrfs_path path;
3243         int ret = 0;
3244
3245         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3246                              I_ERR_NO_ORPHAN_ITEM |
3247                              I_ERR_LINK_COUNT_WRONG |
3248                              I_ERR_NO_INODE_ITEM |
3249                              I_ERR_FILE_EXTENT_ORPHAN |
3250                              I_ERR_FILE_EXTENT_DISCOUNT|
3251                              I_ERR_FILE_NBYTES_WRONG)))
3252                 return rec->errors;
3253
3254         /*
3255          * For nlink repair, it may create a dir and add link, so
3256          * 2 for parent(256)'s dir_index and dir_item
3257          * 2 for lost+found dir's inode_item and inode_ref
3258          * 1 for the new inode_ref of the file
3259          * 2 for lost+found dir's dir_index and dir_item for the file
3260          */
3261         trans = btrfs_start_transaction(root, 7);
3262         if (IS_ERR(trans))
3263                 return PTR_ERR(trans);
3264
3265         btrfs_init_path(&path);
3266         if (rec->errors & I_ERR_NO_INODE_ITEM)
3267                 ret = repair_inode_no_item(trans, root, &path, rec);
3268         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3269                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3270         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3271                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3272         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3273                 ret = repair_inode_isize(trans, root, &path, rec);
3274         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3275                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3276         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3277                 ret = repair_inode_nlinks(trans, root, &path, rec);
3278         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3279                 ret = repair_inode_nbytes(trans, root, &path, rec);
3280         btrfs_commit_transaction(trans, root);
3281         btrfs_release_path(&path);
3282         return ret;
3283 }
3284
3285 static int check_inode_recs(struct btrfs_root *root,
3286                             struct cache_tree *inode_cache)
3287 {
3288         struct cache_extent *cache;
3289         struct ptr_node *node;
3290         struct inode_record *rec;
3291         struct inode_backref *backref;
3292         int stage = 0;
3293         int ret = 0;
3294         int err = 0;
3295         u64 error = 0;
3296         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3297
3298         if (btrfs_root_refs(&root->root_item) == 0) {
3299                 if (!cache_tree_empty(inode_cache))
3300                         fprintf(stderr, "warning line %d\n", __LINE__);
3301                 return 0;
3302         }
3303
3304         /*
3305          * We need to repair backrefs first because we could change some of the
3306          * errors in the inode recs.
3307          *
3308          * We also need to go through and delete invalid backrefs first and then
3309          * add the correct ones second.  We do this because we may get EEXIST
3310          * when adding back the correct index because we hadn't yet deleted the
3311          * invalid index.
3312          *
3313          * For example, if we were missing a dir index then the directories
3314          * isize would be wrong, so if we fixed the isize to what we thought it
3315          * would be and then fixed the backref we'd still have a invalid fs, so
3316          * we need to add back the dir index and then check to see if the isize
3317          * is still wrong.
3318          */
3319         while (stage < 3) {
3320                 stage++;
3321                 if (stage == 3 && !err)
3322                         break;
3323
3324                 cache = search_cache_extent(inode_cache, 0);
3325                 while (repair && cache) {
3326                         node = container_of(cache, struct ptr_node, cache);
3327                         rec = node->data;
3328                         cache = next_cache_extent(cache);
3329
3330                         /* Need to free everything up and rescan */
3331                         if (stage == 3) {
3332                                 remove_cache_extent(inode_cache, &node->cache);
3333                                 free(node);
3334                                 free_inode_rec(rec);
3335                                 continue;
3336                         }
3337
3338                         if (list_empty(&rec->backrefs))
3339                                 continue;
3340
3341                         ret = repair_inode_backrefs(root, rec, inode_cache,
3342                                                     stage == 1);
3343                         if (ret < 0) {
3344                                 err = ret;
3345                                 stage = 2;
3346                                 break;
3347                         } if (ret > 0) {
3348                                 err = -EAGAIN;
3349                         }
3350                 }
3351         }
3352         if (err)
3353                 return err;
3354
3355         rec = get_inode_rec(inode_cache, root_dirid, 0);
3356         BUG_ON(IS_ERR(rec));
3357         if (rec) {
3358                 ret = check_root_dir(rec);
3359                 if (ret) {
3360                         fprintf(stderr, "root %llu root dir %llu error\n",
3361                                 (unsigned long long)root->root_key.objectid,
3362                                 (unsigned long long)root_dirid);
3363                         print_inode_error(root, rec);
3364                         error++;
3365                 }
3366         } else {
3367                 if (repair) {
3368                         struct btrfs_trans_handle *trans;
3369
3370                         trans = btrfs_start_transaction(root, 1);
3371                         if (IS_ERR(trans)) {
3372                                 err = PTR_ERR(trans);
3373                                 return err;
3374                         }
3375
3376                         fprintf(stderr,
3377                                 "root %llu missing its root dir, recreating\n",
3378                                 (unsigned long long)root->objectid);
3379
3380                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3381                         BUG_ON(ret);
3382
3383                         btrfs_commit_transaction(trans, root);
3384                         return -EAGAIN;
3385                 }
3386
3387                 fprintf(stderr, "root %llu root dir %llu not found\n",
3388                         (unsigned long long)root->root_key.objectid,
3389                         (unsigned long long)root_dirid);
3390         }
3391
3392         while (1) {
3393                 cache = search_cache_extent(inode_cache, 0);
3394                 if (!cache)
3395                         break;
3396                 node = container_of(cache, struct ptr_node, cache);
3397                 rec = node->data;
3398                 remove_cache_extent(inode_cache, &node->cache);
3399                 free(node);
3400                 if (rec->ino == root_dirid ||
3401                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3402                         free_inode_rec(rec);
3403                         continue;
3404                 }
3405
3406                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3407                         ret = check_orphan_item(root, rec->ino);
3408                         if (ret == 0)
3409                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3410                         if (can_free_inode_rec(rec)) {
3411                                 free_inode_rec(rec);
3412                                 continue;
3413                         }
3414                 }
3415
3416                 if (!rec->found_inode_item)
3417                         rec->errors |= I_ERR_NO_INODE_ITEM;
3418                 if (rec->found_link != rec->nlink)
3419                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3420                 if (repair) {
3421                         ret = try_repair_inode(root, rec);
3422                         if (ret == 0 && can_free_inode_rec(rec)) {
3423                                 free_inode_rec(rec);
3424                                 continue;
3425                         }
3426                         ret = 0;
3427                 }
3428
3429                 if (!(repair && ret == 0))
3430                         error++;
3431                 print_inode_error(root, rec);
3432                 list_for_each_entry(backref, &rec->backrefs, list) {
3433                         if (!backref->found_dir_item)
3434                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3435                         if (!backref->found_dir_index)
3436                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3437                         if (!backref->found_inode_ref)
3438                                 backref->errors |= REF_ERR_NO_INODE_REF;
3439                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3440                                 " namelen %u name %s filetype %d errors %x",
3441                                 (unsigned long long)backref->dir,
3442                                 (unsigned long long)backref->index,
3443                                 backref->namelen, backref->name,
3444                                 backref->filetype, backref->errors);
3445                         print_ref_error(backref->errors);
3446                 }
3447                 free_inode_rec(rec);
3448         }
3449         return (error > 0) ? -1 : 0;
3450 }
3451
3452 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3453                                         u64 objectid)
3454 {
3455         struct cache_extent *cache;
3456         struct root_record *rec = NULL;
3457         int ret;
3458
3459         cache = lookup_cache_extent(root_cache, objectid, 1);
3460         if (cache) {
3461                 rec = container_of(cache, struct root_record, cache);
3462         } else {
3463                 rec = calloc(1, sizeof(*rec));
3464                 if (!rec)
3465                         return ERR_PTR(-ENOMEM);
3466                 rec->objectid = objectid;
3467                 INIT_LIST_HEAD(&rec->backrefs);
3468                 rec->cache.start = objectid;
3469                 rec->cache.size = 1;
3470
3471                 ret = insert_cache_extent(root_cache, &rec->cache);
3472                 if (ret)
3473                         return ERR_PTR(-EEXIST);
3474         }
3475         return rec;
3476 }
3477
3478 static struct root_backref *get_root_backref(struct root_record *rec,
3479                                              u64 ref_root, u64 dir, u64 index,
3480                                              const char *name, int namelen)
3481 {
3482         struct root_backref *backref;
3483
3484         list_for_each_entry(backref, &rec->backrefs, list) {
3485                 if (backref->ref_root != ref_root || backref->dir != dir ||
3486                     backref->namelen != namelen)
3487                         continue;
3488                 if (memcmp(name, backref->name, namelen))
3489                         continue;
3490                 return backref;
3491         }
3492
3493         backref = calloc(1, sizeof(*backref) + namelen + 1);
3494         if (!backref)
3495                 return NULL;
3496         backref->ref_root = ref_root;
3497         backref->dir = dir;
3498         backref->index = index;
3499         backref->namelen = namelen;
3500         memcpy(backref->name, name, namelen);
3501         backref->name[namelen] = '\0';
3502         list_add_tail(&backref->list, &rec->backrefs);
3503         return backref;
3504 }
3505
3506 static void free_root_record(struct cache_extent *cache)
3507 {
3508         struct root_record *rec;
3509         struct root_backref *backref;
3510
3511         rec = container_of(cache, struct root_record, cache);
3512         while (!list_empty(&rec->backrefs)) {
3513                 backref = to_root_backref(rec->backrefs.next);
3514                 list_del(&backref->list);
3515                 free(backref);
3516         }
3517
3518         free(rec);
3519 }
3520
3521 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3522
3523 static int add_root_backref(struct cache_tree *root_cache,
3524                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3525                             const char *name, int namelen,
3526                             int item_type, int errors)
3527 {
3528         struct root_record *rec;
3529         struct root_backref *backref;
3530
3531         rec = get_root_rec(root_cache, root_id);
3532         BUG_ON(IS_ERR(rec));
3533         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3534         BUG_ON(!backref);
3535
3536         backref->errors |= errors;
3537
3538         if (item_type != BTRFS_DIR_ITEM_KEY) {
3539                 if (backref->found_dir_index || backref->found_back_ref ||
3540                     backref->found_forward_ref) {
3541                         if (backref->index != index)
3542                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3543                 } else {
3544                         backref->index = index;
3545                 }
3546         }
3547
3548         if (item_type == BTRFS_DIR_ITEM_KEY) {
3549                 if (backref->found_forward_ref)
3550                         rec->found_ref++;
3551                 backref->found_dir_item = 1;
3552         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3553                 backref->found_dir_index = 1;
3554         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3555                 if (backref->found_forward_ref)
3556                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3557                 else if (backref->found_dir_item)
3558                         rec->found_ref++;
3559                 backref->found_forward_ref = 1;
3560         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3561                 if (backref->found_back_ref)
3562                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3563                 backref->found_back_ref = 1;
3564         } else {
3565                 BUG_ON(1);
3566         }
3567
3568         if (backref->found_forward_ref && backref->found_dir_item)
3569                 backref->reachable = 1;
3570         return 0;
3571 }
3572
3573 static int merge_root_recs(struct btrfs_root *root,
3574                            struct cache_tree *src_cache,
3575                            struct cache_tree *dst_cache)
3576 {
3577         struct cache_extent *cache;
3578         struct ptr_node *node;
3579         struct inode_record *rec;
3580         struct inode_backref *backref;
3581         int ret = 0;
3582
3583         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3584                 free_inode_recs_tree(src_cache);
3585                 return 0;
3586         }
3587
3588         while (1) {
3589                 cache = search_cache_extent(src_cache, 0);
3590                 if (!cache)
3591                         break;
3592                 node = container_of(cache, struct ptr_node, cache);
3593                 rec = node->data;
3594                 remove_cache_extent(src_cache, &node->cache);
3595                 free(node);
3596
3597                 ret = is_child_root(root, root->objectid, rec->ino);
3598                 if (ret < 0)
3599                         break;
3600                 else if (ret == 0)
3601                         goto skip;
3602
3603                 list_for_each_entry(backref, &rec->backrefs, list) {
3604                         BUG_ON(backref->found_inode_ref);
3605                         if (backref->found_dir_item)
3606                                 add_root_backref(dst_cache, rec->ino,
3607                                         root->root_key.objectid, backref->dir,
3608                                         backref->index, backref->name,
3609                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3610                                         backref->errors);
3611                         if (backref->found_dir_index)
3612                                 add_root_backref(dst_cache, rec->ino,
3613                                         root->root_key.objectid, backref->dir,
3614                                         backref->index, backref->name,
3615                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3616                                         backref->errors);
3617                 }
3618 skip:
3619                 free_inode_rec(rec);
3620         }
3621         if (ret < 0)
3622                 return ret;
3623         return 0;
3624 }
3625
3626 static int check_root_refs(struct btrfs_root *root,
3627                            struct cache_tree *root_cache)
3628 {
3629         struct root_record *rec;
3630         struct root_record *ref_root;
3631         struct root_backref *backref;
3632         struct cache_extent *cache;
3633         int loop = 1;
3634         int ret;
3635         int error;
3636         int errors = 0;
3637
3638         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3639         BUG_ON(IS_ERR(rec));
3640         rec->found_ref = 1;
3641
3642         /* fixme: this can not detect circular references */
3643         while (loop) {
3644                 loop = 0;
3645                 cache = search_cache_extent(root_cache, 0);
3646                 while (1) {
3647                         if (!cache)
3648                                 break;
3649                         rec = container_of(cache, struct root_record, cache);
3650                         cache = next_cache_extent(cache);
3651
3652                         if (rec->found_ref == 0)
3653                                 continue;
3654
3655                         list_for_each_entry(backref, &rec->backrefs, list) {
3656                                 if (!backref->reachable)
3657                                         continue;
3658
3659                                 ref_root = get_root_rec(root_cache,
3660                                                         backref->ref_root);
3661                                 BUG_ON(IS_ERR(ref_root));
3662                                 if (ref_root->found_ref > 0)
3663                                         continue;
3664
3665                                 backref->reachable = 0;
3666                                 rec->found_ref--;
3667                                 if (rec->found_ref == 0)
3668                                         loop = 1;
3669                         }
3670                 }
3671         }
3672
3673         cache = search_cache_extent(root_cache, 0);
3674         while (1) {
3675                 if (!cache)
3676                         break;
3677                 rec = container_of(cache, struct root_record, cache);
3678                 cache = next_cache_extent(cache);
3679
3680                 if (rec->found_ref == 0 &&
3681                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3682                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3683                         ret = check_orphan_item(root->fs_info->tree_root,
3684                                                 rec->objectid);
3685                         if (ret == 0)
3686                                 continue;
3687
3688                         /*
3689                          * If we don't have a root item then we likely just have
3690                          * a dir item in a snapshot for this root but no actual
3691                          * ref key or anything so it's meaningless.
3692                          */
3693                         if (!rec->found_root_item)
3694                                 continue;
3695                         errors++;
3696                         fprintf(stderr, "fs tree %llu not referenced\n",
3697                                 (unsigned long long)rec->objectid);
3698                 }
3699
3700                 error = 0;
3701                 if (rec->found_ref > 0 && !rec->found_root_item)
3702                         error = 1;
3703                 list_for_each_entry(backref, &rec->backrefs, list) {
3704                         if (!backref->found_dir_item)
3705                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3706                         if (!backref->found_dir_index)
3707                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3708                         if (!backref->found_back_ref)
3709                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3710                         if (!backref->found_forward_ref)
3711                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3712                         if (backref->reachable && backref->errors)
3713                                 error = 1;
3714                 }
3715                 if (!error)
3716                         continue;
3717
3718                 errors++;
3719                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3720                         (unsigned long long)rec->objectid, rec->found_ref,
3721                          rec->found_root_item ? "" : "not found");
3722
3723                 list_for_each_entry(backref, &rec->backrefs, list) {
3724                         if (!backref->reachable)
3725                                 continue;
3726                         if (!backref->errors && rec->found_root_item)
3727                                 continue;
3728                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3729                                 " index %llu namelen %u name %s errors %x\n",
3730                                 (unsigned long long)backref->ref_root,
3731                                 (unsigned long long)backref->dir,
3732                                 (unsigned long long)backref->index,
3733                                 backref->namelen, backref->name,
3734                                 backref->errors);
3735                         print_ref_error(backref->errors);
3736                 }
3737         }
3738         return errors > 0 ? 1 : 0;
3739 }
3740
3741 static int process_root_ref(struct extent_buffer *eb, int slot,
3742                             struct btrfs_key *key,
3743                             struct cache_tree *root_cache)
3744 {
3745         u64 dirid;
3746         u64 index;
3747         u32 len;
3748         u32 name_len;
3749         struct btrfs_root_ref *ref;
3750         char namebuf[BTRFS_NAME_LEN];
3751         int error;
3752
3753         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3754
3755         dirid = btrfs_root_ref_dirid(eb, ref);
3756         index = btrfs_root_ref_sequence(eb, ref);
3757         name_len = btrfs_root_ref_name_len(eb, ref);
3758
3759         if (name_len <= BTRFS_NAME_LEN) {
3760                 len = name_len;
3761                 error = 0;
3762         } else {
3763                 len = BTRFS_NAME_LEN;
3764                 error = REF_ERR_NAME_TOO_LONG;
3765         }
3766         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3767
3768         if (key->type == BTRFS_ROOT_REF_KEY) {
3769                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3770                                  index, namebuf, len, key->type, error);
3771         } else {
3772                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3773                                  index, namebuf, len, key->type, error);
3774         }
3775         return 0;
3776 }
3777
3778 static void free_corrupt_block(struct cache_extent *cache)
3779 {
3780         struct btrfs_corrupt_block *corrupt;
3781
3782         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3783         free(corrupt);
3784 }
3785
3786 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3787
3788 /*
3789  * Repair the btree of the given root.
3790  *
3791  * The fix is to remove the node key in corrupt_blocks cache_tree.
3792  * and rebalance the tree.
3793  * After the fix, the btree should be writeable.
3794  */
3795 static int repair_btree(struct btrfs_root *root,
3796                         struct cache_tree *corrupt_blocks)
3797 {
3798         struct btrfs_trans_handle *trans;
3799         struct btrfs_path path;
3800         struct btrfs_corrupt_block *corrupt;
3801         struct cache_extent *cache;
3802         struct btrfs_key key;
3803         u64 offset;
3804         int level;
3805         int ret = 0;
3806
3807         if (cache_tree_empty(corrupt_blocks))
3808                 return 0;
3809
3810         trans = btrfs_start_transaction(root, 1);
3811         if (IS_ERR(trans)) {
3812                 ret = PTR_ERR(trans);
3813                 fprintf(stderr, "Error starting transaction: %s\n",
3814                         strerror(-ret));
3815                 return ret;
3816         }
3817         btrfs_init_path(&path);
3818         cache = first_cache_extent(corrupt_blocks);
3819         while (cache) {
3820                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3821                                        cache);
3822                 level = corrupt->level;
3823                 path.lowest_level = level;
3824                 key.objectid = corrupt->key.objectid;
3825                 key.type = corrupt->key.type;
3826                 key.offset = corrupt->key.offset;
3827
3828                 /*
3829                  * Here we don't want to do any tree balance, since it may
3830                  * cause a balance with corrupted brother leaf/node,
3831                  * so ins_len set to 0 here.
3832                  * Balance will be done after all corrupt node/leaf is deleted.
3833                  */
3834                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3835                 if (ret < 0)
3836                         goto out;
3837                 offset = btrfs_node_blockptr(path.nodes[level],
3838                                              path.slots[level]);
3839
3840                 /* Remove the ptr */
3841                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3842                 if (ret < 0)
3843                         goto out;
3844                 /*
3845                  * Remove the corresponding extent
3846                  * return value is not concerned.
3847                  */
3848                 btrfs_release_path(&path);
3849                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3850                                         0, root->root_key.objectid,
3851                                         level - 1, 0);
3852                 cache = next_cache_extent(cache);
3853         }
3854
3855         /* Balance the btree using btrfs_search_slot() */
3856         cache = first_cache_extent(corrupt_blocks);
3857         while (cache) {
3858                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3859                                        cache);
3860                 memcpy(&key, &corrupt->key, sizeof(key));
3861                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3862                 if (ret < 0)
3863                         goto out;
3864                 /* return will always >0 since it won't find the item */
3865                 ret = 0;
3866                 btrfs_release_path(&path);
3867                 cache = next_cache_extent(cache);
3868         }
3869 out:
3870         btrfs_commit_transaction(trans, root);
3871         btrfs_release_path(&path);
3872         return ret;
3873 }
3874
3875 static int check_fs_root(struct btrfs_root *root,
3876                          struct cache_tree *root_cache,
3877                          struct walk_control *wc)
3878 {
3879         int ret = 0;
3880         int err = 0;
3881         int wret;
3882         int level;
3883         struct btrfs_path path;
3884         struct shared_node root_node;
3885         struct root_record *rec;
3886         struct btrfs_root_item *root_item = &root->root_item;
3887         struct cache_tree corrupt_blocks;
3888         struct orphan_data_extent *orphan;
3889         struct orphan_data_extent *tmp;
3890         enum btrfs_tree_block_status status;
3891         struct node_refs nrefs;
3892
3893         /*
3894          * Reuse the corrupt_block cache tree to record corrupted tree block
3895          *
3896          * Unlike the usage in extent tree check, here we do it in a per
3897          * fs/subvol tree base.
3898          */
3899         cache_tree_init(&corrupt_blocks);
3900         root->fs_info->corrupt_blocks = &corrupt_blocks;
3901
3902         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3903                 rec = get_root_rec(root_cache, root->root_key.objectid);
3904                 BUG_ON(IS_ERR(rec));
3905                 if (btrfs_root_refs(root_item) > 0)
3906                         rec->found_root_item = 1;
3907         }
3908
3909         btrfs_init_path(&path);
3910         memset(&root_node, 0, sizeof(root_node));
3911         cache_tree_init(&root_node.root_cache);
3912         cache_tree_init(&root_node.inode_cache);
3913         memset(&nrefs, 0, sizeof(nrefs));
3914
3915         /* Move the orphan extent record to corresponding inode_record */
3916         list_for_each_entry_safe(orphan, tmp,
3917                                  &root->orphan_data_extents, list) {
3918                 struct inode_record *inode;
3919
3920                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3921                                       1);
3922                 BUG_ON(IS_ERR(inode));
3923                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3924                 list_move(&orphan->list, &inode->orphan_extents);
3925         }
3926
3927         level = btrfs_header_level(root->node);
3928         memset(wc->nodes, 0, sizeof(wc->nodes));
3929         wc->nodes[level] = &root_node;
3930         wc->active_node = level;
3931         wc->root_level = level;
3932
3933         /* We may not have checked the root block, lets do that now */
3934         if (btrfs_is_leaf(root->node))
3935                 status = btrfs_check_leaf(root, NULL, root->node);
3936         else
3937                 status = btrfs_check_node(root, NULL, root->node);
3938         if (status != BTRFS_TREE_BLOCK_CLEAN)
3939                 return -EIO;
3940
3941         if (btrfs_root_refs(root_item) > 0 ||
3942             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3943                 path.nodes[level] = root->node;
3944                 extent_buffer_get(root->node);
3945                 path.slots[level] = 0;
3946         } else {
3947                 struct btrfs_key key;
3948                 struct btrfs_disk_key found_key;
3949
3950                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3951                 level = root_item->drop_level;
3952                 path.lowest_level = level;
3953                 if (level > btrfs_header_level(root->node) ||
3954                     level >= BTRFS_MAX_LEVEL) {
3955                         error("ignoring invalid drop level: %u", level);
3956                         goto skip_walking;
3957                 }
3958                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3959                 if (wret < 0)
3960                         goto skip_walking;
3961                 btrfs_node_key(path.nodes[level], &found_key,
3962                                 path.slots[level]);
3963                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3964                                         sizeof(found_key)));
3965         }
3966
3967         while (1) {
3968                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3969                 if (wret < 0)
3970                         ret = wret;
3971                 if (wret != 0)
3972                         break;
3973
3974                 wret = walk_up_tree(root, &path, wc, &level);
3975                 if (wret < 0)
3976                         ret = wret;
3977                 if (wret != 0)
3978                         break;
3979         }
3980 skip_walking:
3981         btrfs_release_path(&path);
3982
3983         if (!cache_tree_empty(&corrupt_blocks)) {
3984                 struct cache_extent *cache;
3985                 struct btrfs_corrupt_block *corrupt;
3986
3987                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3988                        root->root_key.objectid);
3989                 cache = first_cache_extent(&corrupt_blocks);
3990                 while (cache) {
3991                         corrupt = container_of(cache,
3992                                                struct btrfs_corrupt_block,
3993                                                cache);
3994                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3995                                cache->start, corrupt->level,
3996                                corrupt->key.objectid, corrupt->key.type,
3997                                corrupt->key.offset);
3998                         cache = next_cache_extent(cache);
3999                 }
4000                 if (repair) {
4001                         printf("Try to repair the btree for root %llu\n",
4002                                root->root_key.objectid);
4003                         ret = repair_btree(root, &corrupt_blocks);
4004                         if (ret < 0)
4005                                 fprintf(stderr, "Failed to repair btree: %s\n",
4006                                         strerror(-ret));
4007                         if (!ret)
4008                                 printf("Btree for root %llu is fixed\n",
4009                                        root->root_key.objectid);
4010                 }
4011         }
4012
4013         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4014         if (err < 0)
4015                 ret = err;
4016
4017         if (root_node.current) {
4018                 root_node.current->checked = 1;
4019                 maybe_free_inode_rec(&root_node.inode_cache,
4020                                 root_node.current);
4021         }
4022
4023         err = check_inode_recs(root, &root_node.inode_cache);
4024         if (!ret)
4025                 ret = err;
4026
4027         free_corrupt_blocks_tree(&corrupt_blocks);
4028         root->fs_info->corrupt_blocks = NULL;
4029         free_orphan_data_extents(&root->orphan_data_extents);
4030         return ret;
4031 }
4032
4033 static int fs_root_objectid(u64 objectid)
4034 {
4035         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4036             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4037                 return 1;
4038         return is_fstree(objectid);
4039 }
4040
4041 static int check_fs_roots(struct btrfs_root *root,
4042                           struct cache_tree *root_cache)
4043 {
4044         struct btrfs_path path;
4045         struct btrfs_key key;
4046         struct walk_control wc;
4047         struct extent_buffer *leaf, *tree_node;
4048         struct btrfs_root *tmp_root;
4049         struct btrfs_root *tree_root = root->fs_info->tree_root;
4050         int ret;
4051         int err = 0;
4052
4053         if (ctx.progress_enabled) {
4054                 ctx.tp = TASK_FS_ROOTS;
4055                 task_start(ctx.info);
4056         }
4057
4058         /*
4059          * Just in case we made any changes to the extent tree that weren't
4060          * reflected into the free space cache yet.
4061          */
4062         if (repair)
4063                 reset_cached_block_groups(root->fs_info);
4064         memset(&wc, 0, sizeof(wc));
4065         cache_tree_init(&wc.shared);
4066         btrfs_init_path(&path);
4067
4068 again:
4069         key.offset = 0;
4070         key.objectid = 0;
4071         key.type = BTRFS_ROOT_ITEM_KEY;
4072         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4073         if (ret < 0) {
4074                 err = 1;
4075                 goto out;
4076         }
4077         tree_node = tree_root->node;
4078         while (1) {
4079                 if (tree_node != tree_root->node) {
4080                         free_root_recs_tree(root_cache);
4081                         btrfs_release_path(&path);
4082                         goto again;
4083                 }
4084                 leaf = path.nodes[0];
4085                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4086                         ret = btrfs_next_leaf(tree_root, &path);
4087                         if (ret) {
4088                                 if (ret < 0)
4089                                         err = 1;
4090                                 break;
4091                         }
4092                         leaf = path.nodes[0];
4093                 }
4094                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4095                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4096                     fs_root_objectid(key.objectid)) {
4097                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4098                                 tmp_root = btrfs_read_fs_root_no_cache(
4099                                                 root->fs_info, &key);
4100                         } else {
4101                                 key.offset = (u64)-1;
4102                                 tmp_root = btrfs_read_fs_root(
4103                                                 root->fs_info, &key);
4104                         }
4105                         if (IS_ERR(tmp_root)) {
4106                                 err = 1;
4107                                 goto next;
4108                         }
4109                         ret = check_fs_root(tmp_root, root_cache, &wc);
4110                         if (ret == -EAGAIN) {
4111                                 free_root_recs_tree(root_cache);
4112                                 btrfs_release_path(&path);
4113                                 goto again;
4114                         }
4115                         if (ret)
4116                                 err = 1;
4117                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4118                                 btrfs_free_fs_root(tmp_root);
4119                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4120                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4121                         process_root_ref(leaf, path.slots[0], &key,
4122                                          root_cache);
4123                 }
4124 next:
4125                 path.slots[0]++;
4126         }
4127 out:
4128         btrfs_release_path(&path);
4129         if (err)
4130                 free_extent_cache_tree(&wc.shared);
4131         if (!cache_tree_empty(&wc.shared))
4132                 fprintf(stderr, "warning line %d\n", __LINE__);
4133
4134         task_stop(ctx.info);
4135
4136         return err;
4137 }
4138
4139 /*
4140  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4141  * INODE_REF/INODE_EXTREF match.
4142  *
4143  * @root:       the root of the fs/file tree
4144  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4145  * @key:        the key of the DIR_ITEM/DIR_INDEX
4146  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4147  *              distinguish root_dir between normal dir/file
4148  * @name:       the name in the INODE_REF/INODE_EXTREF
4149  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4150  * @mode:       the st_mode of INODE_ITEM
4151  *
4152  * Return 0 if no error occurred.
4153  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4154  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4155  * dir/file.
4156  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4157  * not match for normal dir/file.
4158  */
4159 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4160                          struct btrfs_key *key, u64 index, char *name,
4161                          u32 namelen, u32 mode)
4162 {
4163         struct btrfs_path path;
4164         struct extent_buffer *node;
4165         struct btrfs_dir_item *di;
4166         struct btrfs_key location;
4167         char namebuf[BTRFS_NAME_LEN] = {0};
4168         u32 total;
4169         u32 cur = 0;
4170         u32 len;
4171         u32 name_len;
4172         u32 data_len;
4173         u8 filetype;
4174         int slot;
4175         int ret;
4176
4177         btrfs_init_path(&path);
4178         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4179         if (ret < 0) {
4180                 ret = DIR_ITEM_MISSING;
4181                 goto out;
4182         }
4183
4184         /* Process root dir and goto out*/
4185         if (index == 0) {
4186                 if (ret == 0) {
4187                         ret = ROOT_DIR_ERROR;
4188                         error(
4189                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4190                                 root->objectid,
4191                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4192                                         "REF" : "EXTREF",
4193                                 ref_key->objectid, ref_key->offset,
4194                                 key->type == BTRFS_DIR_ITEM_KEY ?
4195                                         "DIR_ITEM" : "DIR_INDEX");
4196                 } else {
4197                         ret = 0;
4198                 }
4199
4200                 goto out;
4201         }
4202
4203         /* Process normal file/dir */
4204         if (ret > 0) {
4205                 ret = DIR_ITEM_MISSING;
4206                 error(
4207                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4208                         root->objectid,
4209                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4210                         ref_key->objectid, ref_key->offset,
4211                         key->type == BTRFS_DIR_ITEM_KEY ?
4212                                 "DIR_ITEM" : "DIR_INDEX",
4213                         key->objectid, key->offset, namelen, name,
4214                         imode_to_type(mode));
4215                 goto out;
4216         }
4217
4218         /* Check whether inode_id/filetype/name match */
4219         node = path.nodes[0];
4220         slot = path.slots[0];
4221         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4222         total = btrfs_item_size_nr(node, slot);
4223         while (cur < total) {
4224                 ret = DIR_ITEM_MISMATCH;
4225                 name_len = btrfs_dir_name_len(node, di);
4226                 data_len = btrfs_dir_data_len(node, di);
4227
4228                 btrfs_dir_item_key_to_cpu(node, di, &location);
4229                 if (location.objectid != ref_key->objectid ||
4230                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4231                     location.offset != 0)
4232                         goto next;
4233
4234                 filetype = btrfs_dir_type(node, di);
4235                 if (imode_to_type(mode) != filetype)
4236                         goto next;
4237
4238                 if (name_len <= BTRFS_NAME_LEN) {
4239                         len = name_len;
4240                 } else {
4241                         len = BTRFS_NAME_LEN;
4242                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4243                         root->objectid,
4244                         key->type == BTRFS_DIR_ITEM_KEY ?
4245                         "DIR_ITEM" : "DIR_INDEX",
4246                         key->objectid, key->offset, name_len);
4247                 }
4248                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4249                 if (len != namelen || strncmp(namebuf, name, len))
4250                         goto next;
4251
4252                 ret = 0;
4253                 goto out;
4254 next:
4255                 len = sizeof(*di) + name_len + data_len;
4256                 di = (struct btrfs_dir_item *)((char *)di + len);
4257                 cur += len;
4258         }
4259         if (ret == DIR_ITEM_MISMATCH)
4260                 error(
4261                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4262                         root->objectid,
4263                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4264                         ref_key->objectid, ref_key->offset,
4265                         key->type == BTRFS_DIR_ITEM_KEY ?
4266                                 "DIR_ITEM" : "DIR_INDEX",
4267                         key->objectid, key->offset, namelen, name,
4268                         imode_to_type(mode));
4269 out:
4270         btrfs_release_path(&path);
4271         return ret;
4272 }
4273
4274 /*
4275  * Traverse the given INODE_REF and call find_dir_item() to find related
4276  * DIR_ITEM/DIR_INDEX.
4277  *
4278  * @root:       the root of the fs/file tree
4279  * @ref_key:    the key of the INODE_REF
4280  * @refs:       the count of INODE_REF
4281  * @mode:       the st_mode of INODE_ITEM
4282  *
4283  * Return 0 if no error occurred.
4284  */
4285 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4286                            struct extent_buffer *node, int slot, u64 *refs,
4287                            int mode)
4288 {
4289         struct btrfs_key key;
4290         struct btrfs_inode_ref *ref;
4291         char namebuf[BTRFS_NAME_LEN] = {0};
4292         u32 total;
4293         u32 cur = 0;
4294         u32 len;
4295         u32 name_len;
4296         u64 index;
4297         int ret, err = 0;
4298
4299         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4300         total = btrfs_item_size_nr(node, slot);
4301
4302 next:
4303         /* Update inode ref count */
4304         (*refs)++;
4305
4306         index = btrfs_inode_ref_index(node, ref);
4307         name_len = btrfs_inode_ref_name_len(node, ref);
4308         if (cur + sizeof(*ref) + name_len > total ||
4309             name_len > BTRFS_NAME_LEN) {
4310                 warning("root %llu INODE_REF[%llu %llu] name too long",
4311                         root->objectid, ref_key->objectid, ref_key->offset);
4312
4313                 if (total < cur + sizeof(*ref))
4314                         goto out;
4315                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4316         } else {
4317                 len = name_len;
4318         }
4319
4320         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4321
4322         /* Check root dir ref name */
4323         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4324                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4325                       root->objectid, ref_key->objectid, ref_key->offset,
4326                       namebuf);
4327                 err |= ROOT_DIR_ERROR;
4328         }
4329
4330         /* Find related DIR_INDEX */
4331         key.objectid = ref_key->offset;
4332         key.type = BTRFS_DIR_INDEX_KEY;
4333         key.offset = index;
4334         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4335         err |= ret;
4336
4337         /* Find related dir_item */
4338         key.objectid = ref_key->offset;
4339         key.type = BTRFS_DIR_ITEM_KEY;
4340         key.offset = btrfs_name_hash(namebuf, len);
4341         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4342         err |= ret;
4343
4344         len = sizeof(*ref) + name_len;
4345         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4346         cur += len;
4347         if (cur < total)
4348                 goto next;
4349
4350 out:
4351         return err;
4352 }
4353
4354 /*
4355  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4356  * DIR_ITEM/DIR_INDEX.
4357  *
4358  * @root:       the root of the fs/file tree
4359  * @ref_key:    the key of the INODE_EXTREF
4360  * @refs:       the count of INODE_EXTREF
4361  * @mode:       the st_mode of INODE_ITEM
4362  *
4363  * Return 0 if no error occurred.
4364  */
4365 static int check_inode_extref(struct btrfs_root *root,
4366                               struct btrfs_key *ref_key,
4367                               struct extent_buffer *node, int slot, u64 *refs,
4368                               int mode)
4369 {
4370         struct btrfs_key key;
4371         struct btrfs_inode_extref *extref;
4372         char namebuf[BTRFS_NAME_LEN] = {0};
4373         u32 total;
4374         u32 cur = 0;
4375         u32 len;
4376         u32 name_len;
4377         u64 index;
4378         u64 parent;
4379         int ret;
4380         int err = 0;
4381
4382         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4383         total = btrfs_item_size_nr(node, slot);
4384
4385 next:
4386         /* update inode ref count */
4387         (*refs)++;
4388         name_len = btrfs_inode_extref_name_len(node, extref);
4389         index = btrfs_inode_extref_index(node, extref);
4390         parent = btrfs_inode_extref_parent(node, extref);
4391         if (name_len <= BTRFS_NAME_LEN) {
4392                 len = name_len;
4393         } else {
4394                 len = BTRFS_NAME_LEN;
4395                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4396                         root->objectid, ref_key->objectid, ref_key->offset);
4397         }
4398         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4399
4400         /* Check root dir ref name */
4401         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4402                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4403                       root->objectid, ref_key->objectid, ref_key->offset,
4404                       namebuf);
4405                 err |= ROOT_DIR_ERROR;
4406         }
4407
4408         /* find related dir_index */
4409         key.objectid = parent;
4410         key.type = BTRFS_DIR_INDEX_KEY;
4411         key.offset = index;
4412         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4413         err |= ret;
4414
4415         /* find related dir_item */
4416         key.objectid = parent;
4417         key.type = BTRFS_DIR_ITEM_KEY;
4418         key.offset = btrfs_name_hash(namebuf, len);
4419         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4420         err |= ret;
4421
4422         len = sizeof(*extref) + name_len;
4423         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4424         cur += len;
4425
4426         if (cur < total)
4427                 goto next;
4428
4429         return err;
4430 }
4431
4432 /*
4433  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4434  * DIR_ITEM/DIR_INDEX match.
4435  *
4436  * @root:       the root of the fs/file tree
4437  * @key:        the key of the INODE_REF/INODE_EXTREF
4438  * @name:       the name in the INODE_REF/INODE_EXTREF
4439  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4440  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4441  * to (u64)-1
4442  * @ext_ref:    the EXTENDED_IREF feature
4443  *
4444  * Return 0 if no error occurred.
4445  * Return >0 for error bitmap
4446  */
4447 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4448                           char *name, int namelen, u64 index,
4449                           unsigned int ext_ref)
4450 {
4451         struct btrfs_path path;
4452         struct btrfs_inode_ref *ref;
4453         struct btrfs_inode_extref *extref;
4454         struct extent_buffer *node;
4455         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4456         u32 total;
4457         u32 cur = 0;
4458         u32 len;
4459         u32 ref_namelen;
4460         u64 ref_index;
4461         u64 parent;
4462         u64 dir_id;
4463         int slot;
4464         int ret;
4465
4466         btrfs_init_path(&path);
4467         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4468         if (ret) {
4469                 ret = INODE_REF_MISSING;
4470                 goto extref;
4471         }
4472
4473         node = path.nodes[0];
4474         slot = path.slots[0];
4475
4476         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4477         total = btrfs_item_size_nr(node, slot);
4478
4479         /* Iterate all entry of INODE_REF */
4480         while (cur < total) {
4481                 ret = INODE_REF_MISSING;
4482
4483                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4484                 ref_index = btrfs_inode_ref_index(node, ref);
4485                 if (index != (u64)-1 && index != ref_index)
4486                         goto next_ref;
4487
4488                 if (cur + sizeof(*ref) + ref_namelen > total ||
4489                     ref_namelen > BTRFS_NAME_LEN) {
4490                         warning("root %llu INODE %s[%llu %llu] name too long",
4491                                 root->objectid,
4492                                 key->type == BTRFS_INODE_REF_KEY ?
4493                                         "REF" : "EXTREF",
4494                                 key->objectid, key->offset);
4495
4496                         if (cur + sizeof(*ref) > total)
4497                                 break;
4498                         len = min_t(u32, total - cur - sizeof(*ref),
4499                                     BTRFS_NAME_LEN);
4500                 } else {
4501                         len = ref_namelen;
4502                 }
4503
4504                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4505                                    len);
4506
4507                 if (len != namelen || strncmp(ref_namebuf, name, len))
4508                         goto next_ref;
4509
4510                 ret = 0;
4511                 goto out;
4512 next_ref:
4513                 len = sizeof(*ref) + ref_namelen;
4514                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4515                 cur += len;
4516         }
4517
4518 extref:
4519         /* Skip if not support EXTENDED_IREF feature */
4520         if (!ext_ref)
4521                 goto out;
4522
4523         btrfs_release_path(&path);
4524         btrfs_init_path(&path);
4525
4526         dir_id = key->offset;
4527         key->type = BTRFS_INODE_EXTREF_KEY;
4528         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4529
4530         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4531         if (ret) {
4532                 ret = INODE_REF_MISSING;
4533                 goto out;
4534         }
4535
4536         node = path.nodes[0];
4537         slot = path.slots[0];
4538
4539         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4540         cur = 0;
4541         total = btrfs_item_size_nr(node, slot);
4542
4543         /* Iterate all entry of INODE_EXTREF */
4544         while (cur < total) {
4545                 ret = INODE_REF_MISSING;
4546
4547                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4548                 ref_index = btrfs_inode_extref_index(node, extref);
4549                 parent = btrfs_inode_extref_parent(node, extref);
4550                 if (index != (u64)-1 && index != ref_index)
4551                         goto next_extref;
4552
4553                 if (parent != dir_id)
4554                         goto next_extref;
4555
4556                 if (ref_namelen <= BTRFS_NAME_LEN) {
4557                         len = ref_namelen;
4558                 } else {
4559                         len = BTRFS_NAME_LEN;
4560                         warning("root %llu INODE %s[%llu %llu] name too long",
4561                                 root->objectid,
4562                                 key->type == BTRFS_INODE_REF_KEY ?
4563                                         "REF" : "EXTREF",
4564                                 key->objectid, key->offset);
4565                 }
4566                 read_extent_buffer(node, ref_namebuf,
4567                                    (unsigned long)(extref + 1), len);
4568
4569                 if (len != namelen || strncmp(ref_namebuf, name, len))
4570                         goto next_extref;
4571
4572                 ret = 0;
4573                 goto out;
4574
4575 next_extref:
4576                 len = sizeof(*extref) + ref_namelen;
4577                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4578                 cur += len;
4579
4580         }
4581 out:
4582         btrfs_release_path(&path);
4583         return ret;
4584 }
4585
4586 /*
4587  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4588  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4589  *
4590  * @root:       the root of the fs/file tree
4591  * @key:        the key of the INODE_REF/INODE_EXTREF
4592  * @size:       the st_size of the INODE_ITEM
4593  * @ext_ref:    the EXTENDED_IREF feature
4594  *
4595  * Return 0 if no error occurred.
4596  */
4597 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4598                           struct extent_buffer *node, int slot, u64 *size,
4599                           unsigned int ext_ref)
4600 {
4601         struct btrfs_dir_item *di;
4602         struct btrfs_inode_item *ii;
4603         struct btrfs_path path;
4604         struct btrfs_key location;
4605         char namebuf[BTRFS_NAME_LEN] = {0};
4606         u32 total;
4607         u32 cur = 0;
4608         u32 len;
4609         u32 name_len;
4610         u32 data_len;
4611         u8 filetype;
4612         u32 mode;
4613         u64 index;
4614         int ret;
4615         int err = 0;
4616
4617         /*
4618          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4619          * ignore index check.
4620          */
4621         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4622
4623         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4624         total = btrfs_item_size_nr(node, slot);
4625
4626         while (cur < total) {
4627                 data_len = btrfs_dir_data_len(node, di);
4628                 if (data_len)
4629                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4630                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4631                               "DIR_ITEM" : "DIR_INDEX",
4632                               key->objectid, key->offset, data_len);
4633
4634                 name_len = btrfs_dir_name_len(node, di);
4635                 if (name_len <= BTRFS_NAME_LEN) {
4636                         len = name_len;
4637                 } else {
4638                         len = BTRFS_NAME_LEN;
4639                         warning("root %llu %s[%llu %llu] name too long",
4640                                 root->objectid,
4641                                 key->type == BTRFS_DIR_ITEM_KEY ?
4642                                 "DIR_ITEM" : "DIR_INDEX",
4643                                 key->objectid, key->offset);
4644                 }
4645                 (*size) += name_len;
4646
4647                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4648                 filetype = btrfs_dir_type(node, di);
4649
4650                 btrfs_init_path(&path);
4651                 btrfs_dir_item_key_to_cpu(node, di, &location);
4652
4653                 /* Ignore related ROOT_ITEM check */
4654                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4655                         goto next;
4656
4657                 /* Check relative INODE_ITEM(existence/filetype) */
4658                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4659                 if (ret) {
4660                         err |= INODE_ITEM_MISSING;
4661                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, location.objectid, name_len,
4665                               namebuf, filetype);
4666                         goto next;
4667                 }
4668
4669                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4670                                     struct btrfs_inode_item);
4671                 mode = btrfs_inode_mode(path.nodes[0], ii);
4672
4673                 if (imode_to_type(mode) != filetype) {
4674                         err |= INODE_ITEM_MISMATCH;
4675                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4676                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4677                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4678                               key->offset, name_len, namebuf, filetype);
4679                 }
4680
4681                 /* Check relative INODE_REF/INODE_EXTREF */
4682                 location.type = BTRFS_INODE_REF_KEY;
4683                 location.offset = key->objectid;
4684                 ret = find_inode_ref(root, &location, namebuf, len,
4685                                        index, ext_ref);
4686                 err |= ret;
4687                 if (ret & INODE_REF_MISSING)
4688                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4689                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4690                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4691                               key->offset, name_len, namebuf, filetype);
4692
4693 next:
4694                 btrfs_release_path(&path);
4695                 len = sizeof(*di) + name_len + data_len;
4696                 di = (struct btrfs_dir_item *)((char *)di + len);
4697                 cur += len;
4698
4699                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4700                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4701                               root->objectid, key->objectid, key->offset);
4702                         break;
4703                 }
4704         }
4705
4706         return err;
4707 }
4708
4709 /*
4710  * Check file extent datasum/hole, update the size of the file extents,
4711  * check and update the last offset of the file extent.
4712  *
4713  * @root:       the root of fs/file tree.
4714  * @fkey:       the key of the file extent.
4715  * @nodatasum:  INODE_NODATASUM feature.
4716  * @size:       the sum of all EXTENT_DATA items size for this inode.
4717  * @end:        the offset of the last extent.
4718  *
4719  * Return 0 if no error occurred.
4720  */
4721 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4722                              struct extent_buffer *node, int slot,
4723                              unsigned int nodatasum, u64 *size, u64 *end)
4724 {
4725         struct btrfs_file_extent_item *fi;
4726         u64 disk_bytenr;
4727         u64 disk_num_bytes;
4728         u64 extent_num_bytes;
4729         u64 extent_offset;
4730         u64 csum_found;         /* In byte size, sectorsize aligned */
4731         u64 search_start;       /* Logical range start we search for csum */
4732         u64 search_len;         /* Logical range len we search for csum */
4733         unsigned int extent_type;
4734         unsigned int is_hole;
4735         int compressed = 0;
4736         int ret;
4737         int err = 0;
4738
4739         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4740
4741         /* Check inline extent */
4742         extent_type = btrfs_file_extent_type(node, fi);
4743         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4744                 struct btrfs_item *e = btrfs_item_nr(slot);
4745                 u32 item_inline_len;
4746
4747                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4748                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4749                 compressed = btrfs_file_extent_compression(node, fi);
4750                 if (extent_num_bytes == 0) {
4751                         error(
4752                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4753                                 root->objectid, fkey->objectid, fkey->offset);
4754                         err |= FILE_EXTENT_ERROR;
4755                 }
4756                 if (!compressed && extent_num_bytes != item_inline_len) {
4757                         error(
4758                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4759                                 root->objectid, fkey->objectid, fkey->offset,
4760                                 extent_num_bytes, item_inline_len);
4761                         err |= FILE_EXTENT_ERROR;
4762                 }
4763                 *size += extent_num_bytes;
4764                 return err;
4765         }
4766
4767         /* Check extent type */
4768         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4769                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4770                 err |= FILE_EXTENT_ERROR;
4771                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4772                       root->objectid, fkey->objectid, fkey->offset);
4773                 return err;
4774         }
4775
4776         /* Check REG_EXTENT/PREALLOC_EXTENT */
4777         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4778         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4779         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4780         extent_offset = btrfs_file_extent_offset(node, fi);
4781         compressed = btrfs_file_extent_compression(node, fi);
4782         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4783
4784         /*
4785          * Check EXTENT_DATA csum
4786          *
4787          * For plain (uncompressed) extent, we should only check the range
4788          * we're referring to, as it's possible that part of prealloc extent
4789          * has been written, and has csum:
4790          *
4791          * |<--- Original large preallocated extent A ---->|
4792          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4793          *      No csum                         Has csum
4794          *
4795          * For compressed extent, we should check the whole range.
4796          */
4797         if (!compressed) {
4798                 search_start = disk_bytenr + extent_offset;
4799                 search_len = extent_num_bytes;
4800         } else {
4801                 search_start = disk_bytenr;
4802                 search_len = disk_num_bytes;
4803         }
4804         ret = count_csum_range(root, search_start, search_len, &csum_found);
4805         if (csum_found > 0 && nodatasum) {
4806                 err |= ODD_CSUM_ITEM;
4807                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4808                       root->objectid, fkey->objectid, fkey->offset);
4809         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4810                    !is_hole && (ret < 0 || csum_found < search_len)) {
4811                 err |= CSUM_ITEM_MISSING;
4812                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4813                       root->objectid, fkey->objectid, fkey->offset,
4814                       csum_found, search_len);
4815         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4816                 err |= ODD_CSUM_ITEM;
4817                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4818                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4819         }
4820
4821         /* Check EXTENT_DATA hole */
4822         if (no_holes && is_hole) {
4823                 err |= FILE_EXTENT_ERROR;
4824                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4825                       root->objectid, fkey->objectid, fkey->offset);
4826         } else if (!no_holes && *end != fkey->offset) {
4827                 err |= FILE_EXTENT_ERROR;
4828                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4829                       root->objectid, fkey->objectid, fkey->offset);
4830         }
4831
4832         *end += extent_num_bytes;
4833         if (!is_hole)
4834                 *size += extent_num_bytes;
4835
4836         return err;
4837 }
4838
4839 /*
4840  * Check INODE_ITEM and related ITEMs (the same inode number)
4841  * 1. check link count
4842  * 2. check inode ref/extref
4843  * 3. check dir item/index
4844  *
4845  * @ext_ref:    the EXTENDED_IREF feature
4846  *
4847  * Return 0 if no error occurred.
4848  * Return >0 for error or hit the traversal is done(by error bitmap)
4849  */
4850 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4851                             unsigned int ext_ref)
4852 {
4853         struct extent_buffer *node;
4854         struct btrfs_inode_item *ii;
4855         struct btrfs_key key;
4856         u64 inode_id;
4857         u32 mode;
4858         u64 nlink;
4859         u64 nbytes;
4860         u64 isize;
4861         u64 size = 0;
4862         u64 refs = 0;
4863         u64 extent_end = 0;
4864         u64 extent_size = 0;
4865         unsigned int dir;
4866         unsigned int nodatasum;
4867         int slot;
4868         int ret;
4869         int err = 0;
4870
4871         node = path->nodes[0];
4872         slot = path->slots[0];
4873
4874         btrfs_item_key_to_cpu(node, &key, slot);
4875         inode_id = key.objectid;
4876
4877         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4878                 ret = btrfs_next_item(root, path);
4879                 if (ret > 0)
4880                         err |= LAST_ITEM;
4881                 return err;
4882         }
4883
4884         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4885         isize = btrfs_inode_size(node, ii);
4886         nbytes = btrfs_inode_nbytes(node, ii);
4887         mode = btrfs_inode_mode(node, ii);
4888         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4889         nlink = btrfs_inode_nlink(node, ii);
4890         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4891
4892         while (1) {
4893                 ret = btrfs_next_item(root, path);
4894                 if (ret < 0) {
4895                         /* out will fill 'err' rusing current statistics */
4896                         goto out;
4897                 } else if (ret > 0) {
4898                         err |= LAST_ITEM;
4899                         goto out;
4900                 }
4901
4902                 node = path->nodes[0];
4903                 slot = path->slots[0];
4904                 btrfs_item_key_to_cpu(node, &key, slot);
4905                 if (key.objectid != inode_id)
4906                         goto out;
4907
4908                 switch (key.type) {
4909                 case BTRFS_INODE_REF_KEY:
4910                         ret = check_inode_ref(root, &key, node, slot, &refs,
4911                                               mode);
4912                         err |= ret;
4913                         break;
4914                 case BTRFS_INODE_EXTREF_KEY:
4915                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4916                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4917                                         root->objectid, key.objectid,
4918                                         key.offset);
4919                         ret = check_inode_extref(root, &key, node, slot, &refs,
4920                                                  mode);
4921                         err |= ret;
4922                         break;
4923                 case BTRFS_DIR_ITEM_KEY:
4924                 case BTRFS_DIR_INDEX_KEY:
4925                         if (!dir) {
4926                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4927                                         root->objectid, inode_id,
4928                                         imode_to_type(mode), key.objectid,
4929                                         key.offset);
4930                         }
4931                         ret = check_dir_item(root, &key, node, slot, &size,
4932                                              ext_ref);
4933                         err |= ret;
4934                         break;
4935                 case BTRFS_EXTENT_DATA_KEY:
4936                         if (dir) {
4937                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4938                                         root->objectid, inode_id, key.objectid,
4939                                         key.offset);
4940                         }
4941                         ret = check_file_extent(root, &key, node, slot,
4942                                                 nodatasum, &extent_size,
4943                                                 &extent_end);
4944                         err |= ret;
4945                         break;
4946                 case BTRFS_XATTR_ITEM_KEY:
4947                         break;
4948                 default:
4949                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4950                               key.objectid, key.type, key.offset);
4951                 }
4952         }
4953
4954 out:
4955         /* verify INODE_ITEM nlink/isize/nbytes */
4956         if (dir) {
4957                 if (nlink != 1) {
4958                         err |= LINK_COUNT_ERROR;
4959                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4960                               root->objectid, inode_id, nlink);
4961                 }
4962
4963                 /*
4964                  * Just a warning, as dir inode nbytes is just an
4965                  * instructive value.
4966                  */
4967                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4968                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4969                                 root->objectid, inode_id, root->nodesize);
4970                 }
4971
4972                 if (isize != size) {
4973                         err |= ISIZE_ERROR;
4974                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4975                               root->objectid, inode_id, isize, size);
4976                 }
4977         } else {
4978                 if (nlink != refs) {
4979                         err |= LINK_COUNT_ERROR;
4980                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4981                               root->objectid, inode_id, nlink, refs);
4982                 } else if (!nlink) {
4983                         err |= ORPHAN_ITEM;
4984                 }
4985
4986                 if (!nbytes && !no_holes && extent_end < isize) {
4987                         err |= NBYTES_ERROR;
4988                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4989                               root->objectid, inode_id, isize);
4990                 }
4991
4992                 if (nbytes != extent_size) {
4993                         err |= NBYTES_ERROR;
4994                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4995                               root->objectid, inode_id, nbytes, extent_size);
4996                 }
4997         }
4998
4999         return err;
5000 }
5001
5002 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5003 {
5004         struct btrfs_path path;
5005         struct btrfs_key key;
5006         int err = 0;
5007         int ret;
5008
5009         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5010         key.type = BTRFS_INODE_ITEM_KEY;
5011         key.offset = 0;
5012
5013         /* For root being dropped, we don't need to check first inode */
5014         if (btrfs_root_refs(&root->root_item) == 0 &&
5015             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5016             key.objectid)
5017                 return 0;
5018
5019         btrfs_init_path(&path);
5020
5021         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5022         if (ret < 0)
5023                 goto out;
5024         if (ret > 0) {
5025                 ret = 0;
5026                 err |= INODE_ITEM_MISSING;
5027                 error("first inode item of root %llu is missing",
5028                       root->objectid);
5029         }
5030
5031         err |= check_inode_item(root, &path, ext_ref);
5032         err &= ~LAST_ITEM;
5033         if (err && !ret)
5034                 ret = -EIO;
5035 out:
5036         btrfs_release_path(&path);
5037         return ret;
5038 }
5039
5040 /*
5041  * Iterate all item on the tree and call check_inode_item() to check.
5042  *
5043  * @root:       the root of the tree to be checked.
5044  * @ext_ref:    the EXTENDED_IREF feature
5045  *
5046  * Return 0 if no error found.
5047  * Return <0 for error.
5048  */
5049 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5050 {
5051         struct btrfs_path path;
5052         struct node_refs nrefs;
5053         struct btrfs_root_item *root_item = &root->root_item;
5054         int ret;
5055         int level;
5056         int err = 0;
5057
5058         /*
5059          * We need to manually check the first inode item(256)
5060          * As the following traversal function will only start from
5061          * the first inode item in the leaf, if inode item(256) is missing
5062          * we will just skip it forever.
5063          */
5064         ret = check_fs_first_inode(root, ext_ref);
5065         if (ret < 0)
5066                 return ret;
5067
5068         memset(&nrefs, 0, sizeof(nrefs));
5069         level = btrfs_header_level(root->node);
5070         btrfs_init_path(&path);
5071
5072         if (btrfs_root_refs(root_item) > 0 ||
5073             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5074                 path.nodes[level] = root->node;
5075                 path.slots[level] = 0;
5076                 extent_buffer_get(root->node);
5077         } else {
5078                 struct btrfs_key key;
5079
5080                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5081                 level = root_item->drop_level;
5082                 path.lowest_level = level;
5083                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5084                 if (ret < 0)
5085                         goto out;
5086                 ret = 0;
5087         }
5088
5089         while (1) {
5090                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5091                 err |= !!ret;
5092
5093                 /* if ret is negative, walk shall stop */
5094                 if (ret < 0) {
5095                         ret = err;
5096                         break;
5097                 }
5098
5099                 ret = walk_up_tree_v2(root, &path, &level);
5100                 if (ret != 0) {
5101                         /* Normal exit, reset ret to err */
5102                         ret = err;
5103                         break;
5104                 }
5105         }
5106
5107 out:
5108         btrfs_release_path(&path);
5109         return ret;
5110 }
5111
5112 /*
5113  * Find the relative ref for root_ref and root_backref.
5114  *
5115  * @root:       the root of the root tree.
5116  * @ref_key:    the key of the root ref.
5117  *
5118  * Return 0 if no error occurred.
5119  */
5120 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5121                           struct extent_buffer *node, int slot)
5122 {
5123         struct btrfs_path path;
5124         struct btrfs_key key;
5125         struct btrfs_root_ref *ref;
5126         struct btrfs_root_ref *backref;
5127         char ref_name[BTRFS_NAME_LEN] = {0};
5128         char backref_name[BTRFS_NAME_LEN] = {0};
5129         u64 ref_dirid;
5130         u64 ref_seq;
5131         u32 ref_namelen;
5132         u64 backref_dirid;
5133         u64 backref_seq;
5134         u32 backref_namelen;
5135         u32 len;
5136         int ret;
5137         int err = 0;
5138
5139         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5140         ref_dirid = btrfs_root_ref_dirid(node, ref);
5141         ref_seq = btrfs_root_ref_sequence(node, ref);
5142         ref_namelen = btrfs_root_ref_name_len(node, ref);
5143
5144         if (ref_namelen <= BTRFS_NAME_LEN) {
5145                 len = ref_namelen;
5146         } else {
5147                 len = BTRFS_NAME_LEN;
5148                 warning("%s[%llu %llu] ref_name too long",
5149                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5150                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5151                         ref_key->offset);
5152         }
5153         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5154
5155         /* Find relative root_ref */
5156         key.objectid = ref_key->offset;
5157         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5158         key.offset = ref_key->objectid;
5159
5160         btrfs_init_path(&path);
5161         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5162         if (ret) {
5163                 err |= ROOT_REF_MISSING;
5164                 error("%s[%llu %llu] couldn't find relative ref",
5165                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5166                       "ROOT_REF" : "ROOT_BACKREF",
5167                       ref_key->objectid, ref_key->offset);
5168                 goto out;
5169         }
5170
5171         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5172                                  struct btrfs_root_ref);
5173         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5174         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5175         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5176
5177         if (backref_namelen <= BTRFS_NAME_LEN) {
5178                 len = backref_namelen;
5179         } else {
5180                 len = BTRFS_NAME_LEN;
5181                 warning("%s[%llu %llu] ref_name too long",
5182                         key.type == BTRFS_ROOT_REF_KEY ?
5183                         "ROOT_REF" : "ROOT_BACKREF",
5184                         key.objectid, key.offset);
5185         }
5186         read_extent_buffer(path.nodes[0], backref_name,
5187                            (unsigned long)(backref + 1), len);
5188
5189         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5190             ref_namelen != backref_namelen ||
5191             strncmp(ref_name, backref_name, len)) {
5192                 err |= ROOT_REF_MISMATCH;
5193                 error("%s[%llu %llu] mismatch relative ref",
5194                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5195                       "ROOT_REF" : "ROOT_BACKREF",
5196                       ref_key->objectid, ref_key->offset);
5197         }
5198 out:
5199         btrfs_release_path(&path);
5200         return err;
5201 }
5202
5203 /*
5204  * Check all fs/file tree in low_memory mode.
5205  *
5206  * 1. for fs tree root item, call check_fs_root_v2()
5207  * 2. for fs tree root ref/backref, call check_root_ref()
5208  *
5209  * Return 0 if no error occurred.
5210  */
5211 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5212 {
5213         struct btrfs_root *tree_root = fs_info->tree_root;
5214         struct btrfs_root *cur_root = NULL;
5215         struct btrfs_path path;
5216         struct btrfs_key key;
5217         struct extent_buffer *node;
5218         unsigned int ext_ref;
5219         int slot;
5220         int ret;
5221         int err = 0;
5222
5223         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5224
5225         btrfs_init_path(&path);
5226         key.objectid = BTRFS_FS_TREE_OBJECTID;
5227         key.offset = 0;
5228         key.type = BTRFS_ROOT_ITEM_KEY;
5229
5230         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5231         if (ret < 0) {
5232                 err = ret;
5233                 goto out;
5234         } else if (ret > 0) {
5235                 err = -ENOENT;
5236                 goto out;
5237         }
5238
5239         while (1) {
5240                 node = path.nodes[0];
5241                 slot = path.slots[0];
5242                 btrfs_item_key_to_cpu(node, &key, slot);
5243                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5244                         goto out;
5245                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5246                     fs_root_objectid(key.objectid)) {
5247                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5248                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5249                                                                        &key);
5250                         } else {
5251                                 key.offset = (u64)-1;
5252                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5253                         }
5254
5255                         if (IS_ERR(cur_root)) {
5256                                 error("Fail to read fs/subvol tree: %lld",
5257                                       key.objectid);
5258                                 err = -EIO;
5259                                 goto next;
5260                         }
5261
5262                         ret = check_fs_root_v2(cur_root, ext_ref);
5263                         err |= ret;
5264
5265                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5266                                 btrfs_free_fs_root(cur_root);
5267                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5268                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5269                         ret = check_root_ref(tree_root, &key, node, slot);
5270                         err |= ret;
5271                 }
5272 next:
5273                 ret = btrfs_next_item(tree_root, &path);
5274                 if (ret > 0)
5275                         goto out;
5276                 if (ret < 0) {
5277                         err = ret;
5278                         goto out;
5279                 }
5280         }
5281
5282 out:
5283         btrfs_release_path(&path);
5284         return err;
5285 }
5286
5287 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5288 {
5289         struct list_head *cur = rec->backrefs.next;
5290         struct extent_backref *back;
5291         struct tree_backref *tback;
5292         struct data_backref *dback;
5293         u64 found = 0;
5294         int err = 0;
5295
5296         while(cur != &rec->backrefs) {
5297                 back = to_extent_backref(cur);
5298                 cur = cur->next;
5299                 if (!back->found_extent_tree) {
5300                         err = 1;
5301                         if (!print_errs)
5302                                 goto out;
5303                         if (back->is_data) {
5304                                 dback = to_data_backref(back);
5305                                 fprintf(stderr, "Backref %llu %s %llu"
5306                                         " owner %llu offset %llu num_refs %lu"
5307                                         " not found in extent tree\n",
5308                                         (unsigned long long)rec->start,
5309                                         back->full_backref ?
5310                                         "parent" : "root",
5311                                         back->full_backref ?
5312                                         (unsigned long long)dback->parent:
5313                                         (unsigned long long)dback->root,
5314                                         (unsigned long long)dback->owner,
5315                                         (unsigned long long)dback->offset,
5316                                         (unsigned long)dback->num_refs);
5317                         } else {
5318                                 tback = to_tree_backref(back);
5319                                 fprintf(stderr, "Backref %llu parent %llu"
5320                                         " root %llu not found in extent tree\n",
5321                                         (unsigned long long)rec->start,
5322                                         (unsigned long long)tback->parent,
5323                                         (unsigned long long)tback->root);
5324                         }
5325                 }
5326                 if (!back->is_data && !back->found_ref) {
5327                         err = 1;
5328                         if (!print_errs)
5329                                 goto out;
5330                         tback = to_tree_backref(back);
5331                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5332                                 (unsigned long long)rec->start,
5333                                 back->full_backref ? "parent" : "root",
5334                                 back->full_backref ?
5335                                 (unsigned long long)tback->parent :
5336                                 (unsigned long long)tback->root, back);
5337                 }
5338                 if (back->is_data) {
5339                         dback = to_data_backref(back);
5340                         if (dback->found_ref != dback->num_refs) {
5341                                 err = 1;
5342                                 if (!print_errs)
5343                                         goto out;
5344                                 fprintf(stderr, "Incorrect local backref count"
5345                                         " on %llu %s %llu owner %llu"
5346                                         " offset %llu found %u wanted %u back %p\n",
5347                                         (unsigned long long)rec->start,
5348                                         back->full_backref ?
5349                                         "parent" : "root",
5350                                         back->full_backref ?
5351                                         (unsigned long long)dback->parent:
5352                                         (unsigned long long)dback->root,
5353                                         (unsigned long long)dback->owner,
5354                                         (unsigned long long)dback->offset,
5355                                         dback->found_ref, dback->num_refs, back);
5356                         }
5357                         if (dback->disk_bytenr != rec->start) {
5358                                 err = 1;
5359                                 if (!print_errs)
5360                                         goto out;
5361                                 fprintf(stderr, "Backref disk bytenr does not"
5362                                         " match extent record, bytenr=%llu, "
5363                                         "ref bytenr=%llu\n",
5364                                         (unsigned long long)rec->start,
5365                                         (unsigned long long)dback->disk_bytenr);
5366                         }
5367
5368                         if (dback->bytes != rec->nr) {
5369                                 err = 1;
5370                                 if (!print_errs)
5371                                         goto out;
5372                                 fprintf(stderr, "Backref bytes do not match "
5373                                         "extent backref, bytenr=%llu, ref "
5374                                         "bytes=%llu, backref bytes=%llu\n",
5375                                         (unsigned long long)rec->start,
5376                                         (unsigned long long)rec->nr,
5377                                         (unsigned long long)dback->bytes);
5378                         }
5379                 }
5380                 if (!back->is_data) {
5381                         found += 1;
5382                 } else {
5383                         dback = to_data_backref(back);
5384                         found += dback->found_ref;
5385                 }
5386         }
5387         if (found != rec->refs) {
5388                 err = 1;
5389                 if (!print_errs)
5390                         goto out;
5391                 fprintf(stderr, "Incorrect global backref count "
5392                         "on %llu found %llu wanted %llu\n",
5393                         (unsigned long long)rec->start,
5394                         (unsigned long long)found,
5395                         (unsigned long long)rec->refs);
5396         }
5397 out:
5398         return err;
5399 }
5400
5401 static int free_all_extent_backrefs(struct extent_record *rec)
5402 {
5403         struct extent_backref *back;
5404         struct list_head *cur;
5405         while (!list_empty(&rec->backrefs)) {
5406                 cur = rec->backrefs.next;
5407                 back = to_extent_backref(cur);
5408                 list_del(cur);
5409                 free(back);
5410         }
5411         return 0;
5412 }
5413
5414 static void free_extent_record_cache(struct cache_tree *extent_cache)
5415 {
5416         struct cache_extent *cache;
5417         struct extent_record *rec;
5418
5419         while (1) {
5420                 cache = first_cache_extent(extent_cache);
5421                 if (!cache)
5422                         break;
5423                 rec = container_of(cache, struct extent_record, cache);
5424                 remove_cache_extent(extent_cache, cache);
5425                 free_all_extent_backrefs(rec);
5426                 free(rec);
5427         }
5428 }
5429
5430 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5431                                  struct extent_record *rec)
5432 {
5433         if (rec->content_checked && rec->owner_ref_checked &&
5434             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5435             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5436             !rec->bad_full_backref && !rec->crossing_stripes &&
5437             !rec->wrong_chunk_type) {
5438                 remove_cache_extent(extent_cache, &rec->cache);
5439                 free_all_extent_backrefs(rec);
5440                 list_del_init(&rec->list);
5441                 free(rec);
5442         }
5443         return 0;
5444 }
5445
5446 static int check_owner_ref(struct btrfs_root *root,
5447                             struct extent_record *rec,
5448                             struct extent_buffer *buf)
5449 {
5450         struct extent_backref *node;
5451         struct tree_backref *back;
5452         struct btrfs_root *ref_root;
5453         struct btrfs_key key;
5454         struct btrfs_path path;
5455         struct extent_buffer *parent;
5456         int level;
5457         int found = 0;
5458         int ret;
5459
5460         list_for_each_entry(node, &rec->backrefs, list) {
5461                 if (node->is_data)
5462                         continue;
5463                 if (!node->found_ref)
5464                         continue;
5465                 if (node->full_backref)
5466                         continue;
5467                 back = to_tree_backref(node);
5468                 if (btrfs_header_owner(buf) == back->root)
5469                         return 0;
5470         }
5471         BUG_ON(rec->is_root);
5472
5473         /* try to find the block by search corresponding fs tree */
5474         key.objectid = btrfs_header_owner(buf);
5475         key.type = BTRFS_ROOT_ITEM_KEY;
5476         key.offset = (u64)-1;
5477
5478         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5479         if (IS_ERR(ref_root))
5480                 return 1;
5481
5482         level = btrfs_header_level(buf);
5483         if (level == 0)
5484                 btrfs_item_key_to_cpu(buf, &key, 0);
5485         else
5486                 btrfs_node_key_to_cpu(buf, &key, 0);
5487
5488         btrfs_init_path(&path);
5489         path.lowest_level = level + 1;
5490         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5491         if (ret < 0)
5492                 return 0;
5493
5494         parent = path.nodes[level + 1];
5495         if (parent && buf->start == btrfs_node_blockptr(parent,
5496                                                         path.slots[level + 1]))
5497                 found = 1;
5498
5499         btrfs_release_path(&path);
5500         return found ? 0 : 1;
5501 }
5502
5503 static int is_extent_tree_record(struct extent_record *rec)
5504 {
5505         struct list_head *cur = rec->backrefs.next;
5506         struct extent_backref *node;
5507         struct tree_backref *back;
5508         int is_extent = 0;
5509
5510         while(cur != &rec->backrefs) {
5511                 node = to_extent_backref(cur);
5512                 cur = cur->next;
5513                 if (node->is_data)
5514                         return 0;
5515                 back = to_tree_backref(node);
5516                 if (node->full_backref)
5517                         return 0;
5518                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5519                         is_extent = 1;
5520         }
5521         return is_extent;
5522 }
5523
5524
5525 static int record_bad_block_io(struct btrfs_fs_info *info,
5526                                struct cache_tree *extent_cache,
5527                                u64 start, u64 len)
5528 {
5529         struct extent_record *rec;
5530         struct cache_extent *cache;
5531         struct btrfs_key key;
5532
5533         cache = lookup_cache_extent(extent_cache, start, len);
5534         if (!cache)
5535                 return 0;
5536
5537         rec = container_of(cache, struct extent_record, cache);
5538         if (!is_extent_tree_record(rec))
5539                 return 0;
5540
5541         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5542         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5543 }
5544
5545 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5546                        struct extent_buffer *buf, int slot)
5547 {
5548         if (btrfs_header_level(buf)) {
5549                 struct btrfs_key_ptr ptr1, ptr2;
5550
5551                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5552                                    sizeof(struct btrfs_key_ptr));
5553                 read_extent_buffer(buf, &ptr2,
5554                                    btrfs_node_key_ptr_offset(slot + 1),
5555                                    sizeof(struct btrfs_key_ptr));
5556                 write_extent_buffer(buf, &ptr1,
5557                                     btrfs_node_key_ptr_offset(slot + 1),
5558                                     sizeof(struct btrfs_key_ptr));
5559                 write_extent_buffer(buf, &ptr2,
5560                                     btrfs_node_key_ptr_offset(slot),
5561                                     sizeof(struct btrfs_key_ptr));
5562                 if (slot == 0) {
5563                         struct btrfs_disk_key key;
5564                         btrfs_node_key(buf, &key, 0);
5565                         btrfs_fixup_low_keys(root, path, &key,
5566                                              btrfs_header_level(buf) + 1);
5567                 }
5568         } else {
5569                 struct btrfs_item *item1, *item2;
5570                 struct btrfs_key k1, k2;
5571                 char *item1_data, *item2_data;
5572                 u32 item1_offset, item2_offset, item1_size, item2_size;
5573
5574                 item1 = btrfs_item_nr(slot);
5575                 item2 = btrfs_item_nr(slot + 1);
5576                 btrfs_item_key_to_cpu(buf, &k1, slot);
5577                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5578                 item1_offset = btrfs_item_offset(buf, item1);
5579                 item2_offset = btrfs_item_offset(buf, item2);
5580                 item1_size = btrfs_item_size(buf, item1);
5581                 item2_size = btrfs_item_size(buf, item2);
5582
5583                 item1_data = malloc(item1_size);
5584                 if (!item1_data)
5585                         return -ENOMEM;
5586                 item2_data = malloc(item2_size);
5587                 if (!item2_data) {
5588                         free(item1_data);
5589                         return -ENOMEM;
5590                 }
5591
5592                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5593                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5594
5595                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5596                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5597                 free(item1_data);
5598                 free(item2_data);
5599
5600                 btrfs_set_item_offset(buf, item1, item2_offset);
5601                 btrfs_set_item_offset(buf, item2, item1_offset);
5602                 btrfs_set_item_size(buf, item1, item2_size);
5603                 btrfs_set_item_size(buf, item2, item1_size);
5604
5605                 path->slots[0] = slot;
5606                 btrfs_set_item_key_unsafe(root, path, &k2);
5607                 path->slots[0] = slot + 1;
5608                 btrfs_set_item_key_unsafe(root, path, &k1);
5609         }
5610         return 0;
5611 }
5612
5613 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5614 {
5615         struct extent_buffer *buf;
5616         struct btrfs_key k1, k2;
5617         int i;
5618         int level = path->lowest_level;
5619         int ret = -EIO;
5620
5621         buf = path->nodes[level];
5622         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5623                 if (level) {
5624                         btrfs_node_key_to_cpu(buf, &k1, i);
5625                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5626                 } else {
5627                         btrfs_item_key_to_cpu(buf, &k1, i);
5628                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5629                 }
5630                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5631                         continue;
5632                 ret = swap_values(root, path, buf, i);
5633                 if (ret)
5634                         break;
5635                 btrfs_mark_buffer_dirty(buf);
5636                 i = 0;
5637         }
5638         return ret;
5639 }
5640
5641 static int delete_bogus_item(struct btrfs_root *root,
5642                              struct btrfs_path *path,
5643                              struct extent_buffer *buf, int slot)
5644 {
5645         struct btrfs_key key;
5646         int nritems = btrfs_header_nritems(buf);
5647
5648         btrfs_item_key_to_cpu(buf, &key, slot);
5649
5650         /* These are all the keys we can deal with missing. */
5651         if (key.type != BTRFS_DIR_INDEX_KEY &&
5652             key.type != BTRFS_EXTENT_ITEM_KEY &&
5653             key.type != BTRFS_METADATA_ITEM_KEY &&
5654             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5655             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5656                 return -1;
5657
5658         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5659                (unsigned long long)key.objectid, key.type,
5660                (unsigned long long)key.offset, slot, buf->start);
5661         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5662                               btrfs_item_nr_offset(slot + 1),
5663                               sizeof(struct btrfs_item) *
5664                               (nritems - slot - 1));
5665         btrfs_set_header_nritems(buf, nritems - 1);
5666         if (slot == 0) {
5667                 struct btrfs_disk_key disk_key;
5668
5669                 btrfs_item_key(buf, &disk_key, 0);
5670                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5671         }
5672         btrfs_mark_buffer_dirty(buf);
5673         return 0;
5674 }
5675
5676 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5677 {
5678         struct extent_buffer *buf;
5679         int i;
5680         int ret = 0;
5681
5682         /* We should only get this for leaves */
5683         BUG_ON(path->lowest_level);
5684         buf = path->nodes[0];
5685 again:
5686         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5687                 unsigned int shift = 0, offset;
5688
5689                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5690                     BTRFS_LEAF_DATA_SIZE(root)) {
5691                         if (btrfs_item_end_nr(buf, i) >
5692                             BTRFS_LEAF_DATA_SIZE(root)) {
5693                                 ret = delete_bogus_item(root, path, buf, i);
5694                                 if (!ret)
5695                                         goto again;
5696                                 fprintf(stderr, "item is off the end of the "
5697                                         "leaf, can't fix\n");
5698                                 ret = -EIO;
5699                                 break;
5700                         }
5701                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5702                                 btrfs_item_end_nr(buf, i);
5703                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5704                            btrfs_item_offset_nr(buf, i - 1)) {
5705                         if (btrfs_item_end_nr(buf, i) >
5706                             btrfs_item_offset_nr(buf, i - 1)) {
5707                                 ret = delete_bogus_item(root, path, buf, i);
5708                                 if (!ret)
5709                                         goto again;
5710                                 fprintf(stderr, "items overlap, can't fix\n");
5711                                 ret = -EIO;
5712                                 break;
5713                         }
5714                         shift = btrfs_item_offset_nr(buf, i - 1) -
5715                                 btrfs_item_end_nr(buf, i);
5716                 }
5717                 if (!shift)
5718                         continue;
5719
5720                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5721                        i, shift, (unsigned long long)buf->start);
5722                 offset = btrfs_item_offset_nr(buf, i);
5723                 memmove_extent_buffer(buf,
5724                                       btrfs_leaf_data(buf) + offset + shift,
5725                                       btrfs_leaf_data(buf) + offset,
5726                                       btrfs_item_size_nr(buf, i));
5727                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5728                                       offset + shift);
5729                 btrfs_mark_buffer_dirty(buf);
5730         }
5731
5732         /*
5733          * We may have moved things, in which case we want to exit so we don't
5734          * write those changes out.  Once we have proper abort functionality in
5735          * progs this can be changed to something nicer.
5736          */
5737         BUG_ON(ret);
5738         return ret;
5739 }
5740
5741 /*
5742  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5743  * then just return -EIO.
5744  */
5745 static int try_to_fix_bad_block(struct btrfs_root *root,
5746                                 struct extent_buffer *buf,
5747                                 enum btrfs_tree_block_status status)
5748 {
5749         struct btrfs_trans_handle *trans;
5750         struct ulist *roots;
5751         struct ulist_node *node;
5752         struct btrfs_root *search_root;
5753         struct btrfs_path path;
5754         struct ulist_iterator iter;
5755         struct btrfs_key root_key, key;
5756         int ret;
5757
5758         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5759             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5760                 return -EIO;
5761
5762         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5763         if (ret)
5764                 return -EIO;
5765
5766         btrfs_init_path(&path);
5767         ULIST_ITER_INIT(&iter);
5768         while ((node = ulist_next(roots, &iter))) {
5769                 root_key.objectid = node->val;
5770                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5771                 root_key.offset = (u64)-1;
5772
5773                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5774                 if (IS_ERR(root)) {
5775                         ret = -EIO;
5776                         break;
5777                 }
5778
5779
5780                 trans = btrfs_start_transaction(search_root, 0);
5781                 if (IS_ERR(trans)) {
5782                         ret = PTR_ERR(trans);
5783                         break;
5784                 }
5785
5786                 path.lowest_level = btrfs_header_level(buf);
5787                 path.skip_check_block = 1;
5788                 if (path.lowest_level)
5789                         btrfs_node_key_to_cpu(buf, &key, 0);
5790                 else
5791                         btrfs_item_key_to_cpu(buf, &key, 0);
5792                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5793                 if (ret) {
5794                         ret = -EIO;
5795                         btrfs_commit_transaction(trans, search_root);
5796                         break;
5797                 }
5798                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5799                         ret = fix_key_order(search_root, &path);
5800                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5801                         ret = fix_item_offset(search_root, &path);
5802                 if (ret) {
5803                         btrfs_commit_transaction(trans, search_root);
5804                         break;
5805                 }
5806                 btrfs_release_path(&path);
5807                 btrfs_commit_transaction(trans, search_root);
5808         }
5809         ulist_free(roots);
5810         btrfs_release_path(&path);
5811         return ret;
5812 }
5813
5814 static int check_block(struct btrfs_root *root,
5815                        struct cache_tree *extent_cache,
5816                        struct extent_buffer *buf, u64 flags)
5817 {
5818         struct extent_record *rec;
5819         struct cache_extent *cache;
5820         struct btrfs_key key;
5821         enum btrfs_tree_block_status status;
5822         int ret = 0;
5823         int level;
5824
5825         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5826         if (!cache)
5827                 return 1;
5828         rec = container_of(cache, struct extent_record, cache);
5829         rec->generation = btrfs_header_generation(buf);
5830
5831         level = btrfs_header_level(buf);
5832         if (btrfs_header_nritems(buf) > 0) {
5833
5834                 if (level == 0)
5835                         btrfs_item_key_to_cpu(buf, &key, 0);
5836                 else
5837                         btrfs_node_key_to_cpu(buf, &key, 0);
5838
5839                 rec->info_objectid = key.objectid;
5840         }
5841         rec->info_level = level;
5842
5843         if (btrfs_is_leaf(buf))
5844                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5845         else
5846                 status = btrfs_check_node(root, &rec->parent_key, buf);
5847
5848         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5849                 if (repair)
5850                         status = try_to_fix_bad_block(root, buf, status);
5851                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5852                         ret = -EIO;
5853                         fprintf(stderr, "bad block %llu\n",
5854                                 (unsigned long long)buf->start);
5855                 } else {
5856                         /*
5857                          * Signal to callers we need to start the scan over
5858                          * again since we'll have cowed blocks.
5859                          */
5860                         ret = -EAGAIN;
5861                 }
5862         } else {
5863                 rec->content_checked = 1;
5864                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5865                         rec->owner_ref_checked = 1;
5866                 else {
5867                         ret = check_owner_ref(root, rec, buf);
5868                         if (!ret)
5869                                 rec->owner_ref_checked = 1;
5870                 }
5871         }
5872         if (!ret)
5873                 maybe_free_extent_rec(extent_cache, rec);
5874         return ret;
5875 }
5876
5877 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5878                                                 u64 parent, u64 root)
5879 {
5880         struct list_head *cur = rec->backrefs.next;
5881         struct extent_backref *node;
5882         struct tree_backref *back;
5883
5884         while(cur != &rec->backrefs) {
5885                 node = to_extent_backref(cur);
5886                 cur = cur->next;
5887                 if (node->is_data)
5888                         continue;
5889                 back = to_tree_backref(node);
5890                 if (parent > 0) {
5891                         if (!node->full_backref)
5892                                 continue;
5893                         if (parent == back->parent)
5894                                 return back;
5895                 } else {
5896                         if (node->full_backref)
5897                                 continue;
5898                         if (back->root == root)
5899                                 return back;
5900                 }
5901         }
5902         return NULL;
5903 }
5904
5905 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5906                                                 u64 parent, u64 root)
5907 {
5908         struct tree_backref *ref = malloc(sizeof(*ref));
5909
5910         if (!ref)
5911                 return NULL;
5912         memset(&ref->node, 0, sizeof(ref->node));
5913         if (parent > 0) {
5914                 ref->parent = parent;
5915                 ref->node.full_backref = 1;
5916         } else {
5917                 ref->root = root;
5918                 ref->node.full_backref = 0;
5919         }
5920         list_add_tail(&ref->node.list, &rec->backrefs);
5921
5922         return ref;
5923 }
5924
5925 static struct data_backref *find_data_backref(struct extent_record *rec,
5926                                                 u64 parent, u64 root,
5927                                                 u64 owner, u64 offset,
5928                                                 int found_ref,
5929                                                 u64 disk_bytenr, u64 bytes)
5930 {
5931         struct list_head *cur = rec->backrefs.next;
5932         struct extent_backref *node;
5933         struct data_backref *back;
5934
5935         while(cur != &rec->backrefs) {
5936                 node = to_extent_backref(cur);
5937                 cur = cur->next;
5938                 if (!node->is_data)
5939                         continue;
5940                 back = to_data_backref(node);
5941                 if (parent > 0) {
5942                         if (!node->full_backref)
5943                                 continue;
5944                         if (parent == back->parent)
5945                                 return back;
5946                 } else {
5947                         if (node->full_backref)
5948                                 continue;
5949                         if (back->root == root && back->owner == owner &&
5950                             back->offset == offset) {
5951                                 if (found_ref && node->found_ref &&
5952                                     (back->bytes != bytes ||
5953                                     back->disk_bytenr != disk_bytenr))
5954                                         continue;
5955                                 return back;
5956                         }
5957                 }
5958         }
5959         return NULL;
5960 }
5961
5962 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5963                                                 u64 parent, u64 root,
5964                                                 u64 owner, u64 offset,
5965                                                 u64 max_size)
5966 {
5967         struct data_backref *ref = malloc(sizeof(*ref));
5968
5969         if (!ref)
5970                 return NULL;
5971         memset(&ref->node, 0, sizeof(ref->node));
5972         ref->node.is_data = 1;
5973
5974         if (parent > 0) {
5975                 ref->parent = parent;
5976                 ref->owner = 0;
5977                 ref->offset = 0;
5978                 ref->node.full_backref = 1;
5979         } else {
5980                 ref->root = root;
5981                 ref->owner = owner;
5982                 ref->offset = offset;
5983                 ref->node.full_backref = 0;
5984         }
5985         ref->bytes = max_size;
5986         ref->found_ref = 0;
5987         ref->num_refs = 0;
5988         list_add_tail(&ref->node.list, &rec->backrefs);
5989         if (max_size > rec->max_size)
5990                 rec->max_size = max_size;
5991         return ref;
5992 }
5993
5994 /* Check if the type of extent matches with its chunk */
5995 static void check_extent_type(struct extent_record *rec)
5996 {
5997         struct btrfs_block_group_cache *bg_cache;
5998
5999         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6000         if (!bg_cache)
6001                 return;
6002
6003         /* data extent, check chunk directly*/
6004         if (!rec->metadata) {
6005                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6006                         rec->wrong_chunk_type = 1;
6007                 return;
6008         }
6009
6010         /* metadata extent, check the obvious case first */
6011         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6012                                  BTRFS_BLOCK_GROUP_METADATA))) {
6013                 rec->wrong_chunk_type = 1;
6014                 return;
6015         }
6016
6017         /*
6018          * Check SYSTEM extent, as it's also marked as metadata, we can only
6019          * make sure it's a SYSTEM extent by its backref
6020          */
6021         if (!list_empty(&rec->backrefs)) {
6022                 struct extent_backref *node;
6023                 struct tree_backref *tback;
6024                 u64 bg_type;
6025
6026                 node = to_extent_backref(rec->backrefs.next);
6027                 if (node->is_data) {
6028                         /* tree block shouldn't have data backref */
6029                         rec->wrong_chunk_type = 1;
6030                         return;
6031                 }
6032                 tback = container_of(node, struct tree_backref, node);
6033
6034                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6035                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6036                 else
6037                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6038                 if (!(bg_cache->flags & bg_type))
6039                         rec->wrong_chunk_type = 1;
6040         }
6041 }
6042
6043 /*
6044  * Allocate a new extent record, fill default values from @tmpl and insert int
6045  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6046  * the cache, otherwise it fails.
6047  */
6048 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6049                 struct extent_record *tmpl)
6050 {
6051         struct extent_record *rec;
6052         int ret = 0;
6053
6054         BUG_ON(tmpl->max_size == 0);
6055         rec = malloc(sizeof(*rec));
6056         if (!rec)
6057                 return -ENOMEM;
6058         rec->start = tmpl->start;
6059         rec->max_size = tmpl->max_size;
6060         rec->nr = max(tmpl->nr, tmpl->max_size);
6061         rec->found_rec = tmpl->found_rec;
6062         rec->content_checked = tmpl->content_checked;
6063         rec->owner_ref_checked = tmpl->owner_ref_checked;
6064         rec->num_duplicates = 0;
6065         rec->metadata = tmpl->metadata;
6066         rec->flag_block_full_backref = FLAG_UNSET;
6067         rec->bad_full_backref = 0;
6068         rec->crossing_stripes = 0;
6069         rec->wrong_chunk_type = 0;
6070         rec->is_root = tmpl->is_root;
6071         rec->refs = tmpl->refs;
6072         rec->extent_item_refs = tmpl->extent_item_refs;
6073         rec->parent_generation = tmpl->parent_generation;
6074         INIT_LIST_HEAD(&rec->backrefs);
6075         INIT_LIST_HEAD(&rec->dups);
6076         INIT_LIST_HEAD(&rec->list);
6077         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6078         rec->cache.start = tmpl->start;
6079         rec->cache.size = tmpl->nr;
6080         ret = insert_cache_extent(extent_cache, &rec->cache);
6081         if (ret) {
6082                 free(rec);
6083                 return ret;
6084         }
6085         bytes_used += rec->nr;
6086
6087         if (tmpl->metadata)
6088                 rec->crossing_stripes = check_crossing_stripes(global_info,
6089                                 rec->start, global_info->tree_root->nodesize);
6090         check_extent_type(rec);
6091         return ret;
6092 }
6093
6094 /*
6095  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6096  * some are hints:
6097  * - refs              - if found, increase refs
6098  * - is_root           - if found, set
6099  * - content_checked   - if found, set
6100  * - owner_ref_checked - if found, set
6101  *
6102  * If not found, create a new one, initialize and insert.
6103  */
6104 static int add_extent_rec(struct cache_tree *extent_cache,
6105                 struct extent_record *tmpl)
6106 {
6107         struct extent_record *rec;
6108         struct cache_extent *cache;
6109         int ret = 0;
6110         int dup = 0;
6111
6112         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6113         if (cache) {
6114                 rec = container_of(cache, struct extent_record, cache);
6115                 if (tmpl->refs)
6116                         rec->refs++;
6117                 if (rec->nr == 1)
6118                         rec->nr = max(tmpl->nr, tmpl->max_size);
6119
6120                 /*
6121                  * We need to make sure to reset nr to whatever the extent
6122                  * record says was the real size, this way we can compare it to
6123                  * the backrefs.
6124                  */
6125                 if (tmpl->found_rec) {
6126                         if (tmpl->start != rec->start || rec->found_rec) {
6127                                 struct extent_record *tmp;
6128
6129                                 dup = 1;
6130                                 if (list_empty(&rec->list))
6131                                         list_add_tail(&rec->list,
6132                                                       &duplicate_extents);
6133
6134                                 /*
6135                                  * We have to do this song and dance in case we
6136                                  * find an extent record that falls inside of
6137                                  * our current extent record but does not have
6138                                  * the same objectid.
6139                                  */
6140                                 tmp = malloc(sizeof(*tmp));
6141                                 if (!tmp)
6142                                         return -ENOMEM;
6143                                 tmp->start = tmpl->start;
6144                                 tmp->max_size = tmpl->max_size;
6145                                 tmp->nr = tmpl->nr;
6146                                 tmp->found_rec = 1;
6147                                 tmp->metadata = tmpl->metadata;
6148                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6149                                 INIT_LIST_HEAD(&tmp->list);
6150                                 list_add_tail(&tmp->list, &rec->dups);
6151                                 rec->num_duplicates++;
6152                         } else {
6153                                 rec->nr = tmpl->nr;
6154                                 rec->found_rec = 1;
6155                         }
6156                 }
6157
6158                 if (tmpl->extent_item_refs && !dup) {
6159                         if (rec->extent_item_refs) {
6160                                 fprintf(stderr, "block %llu rec "
6161                                         "extent_item_refs %llu, passed %llu\n",
6162                                         (unsigned long long)tmpl->start,
6163                                         (unsigned long long)
6164                                                         rec->extent_item_refs,
6165                                         (unsigned long long)tmpl->extent_item_refs);
6166                         }
6167                         rec->extent_item_refs = tmpl->extent_item_refs;
6168                 }
6169                 if (tmpl->is_root)
6170                         rec->is_root = 1;
6171                 if (tmpl->content_checked)
6172                         rec->content_checked = 1;
6173                 if (tmpl->owner_ref_checked)
6174                         rec->owner_ref_checked = 1;
6175                 memcpy(&rec->parent_key, &tmpl->parent_key,
6176                                 sizeof(tmpl->parent_key));
6177                 if (tmpl->parent_generation)
6178                         rec->parent_generation = tmpl->parent_generation;
6179                 if (rec->max_size < tmpl->max_size)
6180                         rec->max_size = tmpl->max_size;
6181
6182                 /*
6183                  * A metadata extent can't cross stripe_len boundary, otherwise
6184                  * kernel scrub won't be able to handle it.
6185                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6186                  * it.
6187                  */
6188                 if (tmpl->metadata)
6189                         rec->crossing_stripes = check_crossing_stripes(
6190                                         global_info, rec->start,
6191                                         global_info->tree_root->nodesize);
6192                 check_extent_type(rec);
6193                 maybe_free_extent_rec(extent_cache, rec);
6194                 return ret;
6195         }
6196
6197         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6198
6199         return ret;
6200 }
6201
6202 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6203                             u64 parent, u64 root, int found_ref)
6204 {
6205         struct extent_record *rec;
6206         struct tree_backref *back;
6207         struct cache_extent *cache;
6208         int ret;
6209
6210         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6211         if (!cache) {
6212                 struct extent_record tmpl;
6213
6214                 memset(&tmpl, 0, sizeof(tmpl));
6215                 tmpl.start = bytenr;
6216                 tmpl.nr = 1;
6217                 tmpl.metadata = 1;
6218                 tmpl.max_size = 1;
6219
6220                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6221                 if (ret)
6222                         return ret;
6223
6224                 /* really a bug in cache_extent implement now */
6225                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6226                 if (!cache)
6227                         return -ENOENT;
6228         }
6229
6230         rec = container_of(cache, struct extent_record, cache);
6231         if (rec->start != bytenr) {
6232                 /*
6233                  * Several cause, from unaligned bytenr to over lapping extents
6234                  */
6235                 return -EEXIST;
6236         }
6237
6238         back = find_tree_backref(rec, parent, root);
6239         if (!back) {
6240                 back = alloc_tree_backref(rec, parent, root);
6241                 if (!back)
6242                         return -ENOMEM;
6243         }
6244
6245         if (found_ref) {
6246                 if (back->node.found_ref) {
6247                         fprintf(stderr, "Extent back ref already exists "
6248                                 "for %llu parent %llu root %llu \n",
6249                                 (unsigned long long)bytenr,
6250                                 (unsigned long long)parent,
6251                                 (unsigned long long)root);
6252                 }
6253                 back->node.found_ref = 1;
6254         } else {
6255                 if (back->node.found_extent_tree) {
6256                         fprintf(stderr, "Extent back ref already exists "
6257                                 "for %llu parent %llu root %llu \n",
6258                                 (unsigned long long)bytenr,
6259                                 (unsigned long long)parent,
6260                                 (unsigned long long)root);
6261                 }
6262                 back->node.found_extent_tree = 1;
6263         }
6264         check_extent_type(rec);
6265         maybe_free_extent_rec(extent_cache, rec);
6266         return 0;
6267 }
6268
6269 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6270                             u64 parent, u64 root, u64 owner, u64 offset,
6271                             u32 num_refs, int found_ref, u64 max_size)
6272 {
6273         struct extent_record *rec;
6274         struct data_backref *back;
6275         struct cache_extent *cache;
6276         int ret;
6277
6278         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6279         if (!cache) {
6280                 struct extent_record tmpl;
6281
6282                 memset(&tmpl, 0, sizeof(tmpl));
6283                 tmpl.start = bytenr;
6284                 tmpl.nr = 1;
6285                 tmpl.max_size = max_size;
6286
6287                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6288                 if (ret)
6289                         return ret;
6290
6291                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6292                 if (!cache)
6293                         abort();
6294         }
6295
6296         rec = container_of(cache, struct extent_record, cache);
6297         if (rec->max_size < max_size)
6298                 rec->max_size = max_size;
6299
6300         /*
6301          * If found_ref is set then max_size is the real size and must match the
6302          * existing refs.  So if we have already found a ref then we need to
6303          * make sure that this ref matches the existing one, otherwise we need
6304          * to add a new backref so we can notice that the backrefs don't match
6305          * and we need to figure out who is telling the truth.  This is to
6306          * account for that awful fsync bug I introduced where we'd end up with
6307          * a btrfs_file_extent_item that would have its length include multiple
6308          * prealloc extents or point inside of a prealloc extent.
6309          */
6310         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6311                                  bytenr, max_size);
6312         if (!back) {
6313                 back = alloc_data_backref(rec, parent, root, owner, offset,
6314                                           max_size);
6315                 BUG_ON(!back);
6316         }
6317
6318         if (found_ref) {
6319                 BUG_ON(num_refs != 1);
6320                 if (back->node.found_ref)
6321                         BUG_ON(back->bytes != max_size);
6322                 back->node.found_ref = 1;
6323                 back->found_ref += 1;
6324                 back->bytes = max_size;
6325                 back->disk_bytenr = bytenr;
6326                 rec->refs += 1;
6327                 rec->content_checked = 1;
6328                 rec->owner_ref_checked = 1;
6329         } else {
6330                 if (back->node.found_extent_tree) {
6331                         fprintf(stderr, "Extent back ref already exists "
6332                                 "for %llu parent %llu root %llu "
6333                                 "owner %llu offset %llu num_refs %lu\n",
6334                                 (unsigned long long)bytenr,
6335                                 (unsigned long long)parent,
6336                                 (unsigned long long)root,
6337                                 (unsigned long long)owner,
6338                                 (unsigned long long)offset,
6339                                 (unsigned long)num_refs);
6340                 }
6341                 back->num_refs = num_refs;
6342                 back->node.found_extent_tree = 1;
6343         }
6344         maybe_free_extent_rec(extent_cache, rec);
6345         return 0;
6346 }
6347
6348 static int add_pending(struct cache_tree *pending,
6349                        struct cache_tree *seen, u64 bytenr, u32 size)
6350 {
6351         int ret;
6352         ret = add_cache_extent(seen, bytenr, size);
6353         if (ret)
6354                 return ret;
6355         add_cache_extent(pending, bytenr, size);
6356         return 0;
6357 }
6358
6359 static int pick_next_pending(struct cache_tree *pending,
6360                         struct cache_tree *reada,
6361                         struct cache_tree *nodes,
6362                         u64 last, struct block_info *bits, int bits_nr,
6363                         int *reada_bits)
6364 {
6365         unsigned long node_start = last;
6366         struct cache_extent *cache;
6367         int ret;
6368
6369         cache = search_cache_extent(reada, 0);
6370         if (cache) {
6371                 bits[0].start = cache->start;
6372                 bits[0].size = cache->size;
6373                 *reada_bits = 1;
6374                 return 1;
6375         }
6376         *reada_bits = 0;
6377         if (node_start > 32768)
6378                 node_start -= 32768;
6379
6380         cache = search_cache_extent(nodes, node_start);
6381         if (!cache)
6382                 cache = search_cache_extent(nodes, 0);
6383
6384         if (!cache) {
6385                  cache = search_cache_extent(pending, 0);
6386                  if (!cache)
6387                          return 0;
6388                  ret = 0;
6389                  do {
6390                          bits[ret].start = cache->start;
6391                          bits[ret].size = cache->size;
6392                          cache = next_cache_extent(cache);
6393                          ret++;
6394                  } while (cache && ret < bits_nr);
6395                  return ret;
6396         }
6397
6398         ret = 0;
6399         do {
6400                 bits[ret].start = cache->start;
6401                 bits[ret].size = cache->size;
6402                 cache = next_cache_extent(cache);
6403                 ret++;
6404         } while (cache && ret < bits_nr);
6405
6406         if (bits_nr - ret > 8) {
6407                 u64 lookup = bits[0].start + bits[0].size;
6408                 struct cache_extent *next;
6409                 next = search_cache_extent(pending, lookup);
6410                 while(next) {
6411                         if (next->start - lookup > 32768)
6412                                 break;
6413                         bits[ret].start = next->start;
6414                         bits[ret].size = next->size;
6415                         lookup = next->start + next->size;
6416                         ret++;
6417                         if (ret == bits_nr)
6418                                 break;
6419                         next = next_cache_extent(next);
6420                         if (!next)
6421                                 break;
6422                 }
6423         }
6424         return ret;
6425 }
6426
6427 static void free_chunk_record(struct cache_extent *cache)
6428 {
6429         struct chunk_record *rec;
6430
6431         rec = container_of(cache, struct chunk_record, cache);
6432         list_del_init(&rec->list);
6433         list_del_init(&rec->dextents);
6434         free(rec);
6435 }
6436
6437 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6438 {
6439         cache_tree_free_extents(chunk_cache, free_chunk_record);
6440 }
6441
6442 static void free_device_record(struct rb_node *node)
6443 {
6444         struct device_record *rec;
6445
6446         rec = container_of(node, struct device_record, node);
6447         free(rec);
6448 }
6449
6450 FREE_RB_BASED_TREE(device_cache, free_device_record);
6451
6452 int insert_block_group_record(struct block_group_tree *tree,
6453                               struct block_group_record *bg_rec)
6454 {
6455         int ret;
6456
6457         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6458         if (ret)
6459                 return ret;
6460
6461         list_add_tail(&bg_rec->list, &tree->block_groups);
6462         return 0;
6463 }
6464
6465 static void free_block_group_record(struct cache_extent *cache)
6466 {
6467         struct block_group_record *rec;
6468
6469         rec = container_of(cache, struct block_group_record, cache);
6470         list_del_init(&rec->list);
6471         free(rec);
6472 }
6473
6474 void free_block_group_tree(struct block_group_tree *tree)
6475 {
6476         cache_tree_free_extents(&tree->tree, free_block_group_record);
6477 }
6478
6479 int insert_device_extent_record(struct device_extent_tree *tree,
6480                                 struct device_extent_record *de_rec)
6481 {
6482         int ret;
6483
6484         /*
6485          * Device extent is a bit different from the other extents, because
6486          * the extents which belong to the different devices may have the
6487          * same start and size, so we need use the special extent cache
6488          * search/insert functions.
6489          */
6490         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6491         if (ret)
6492                 return ret;
6493
6494         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6495         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6496         return 0;
6497 }
6498
6499 static void free_device_extent_record(struct cache_extent *cache)
6500 {
6501         struct device_extent_record *rec;
6502
6503         rec = container_of(cache, struct device_extent_record, cache);
6504         if (!list_empty(&rec->chunk_list))
6505                 list_del_init(&rec->chunk_list);
6506         if (!list_empty(&rec->device_list))
6507                 list_del_init(&rec->device_list);
6508         free(rec);
6509 }
6510
6511 void free_device_extent_tree(struct device_extent_tree *tree)
6512 {
6513         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6514 }
6515
6516 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6517 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6518                                  struct extent_buffer *leaf, int slot)
6519 {
6520         struct btrfs_extent_ref_v0 *ref0;
6521         struct btrfs_key key;
6522         int ret;
6523
6524         btrfs_item_key_to_cpu(leaf, &key, slot);
6525         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6526         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6527                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6528                                 0, 0);
6529         } else {
6530                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6531                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6532         }
6533         return ret;
6534 }
6535 #endif
6536
6537 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6538                                             struct btrfs_key *key,
6539                                             int slot)
6540 {
6541         struct btrfs_chunk *ptr;
6542         struct chunk_record *rec;
6543         int num_stripes, i;
6544
6545         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6546         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6547
6548         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6549         if (!rec) {
6550                 fprintf(stderr, "memory allocation failed\n");
6551                 exit(-1);
6552         }
6553
6554         INIT_LIST_HEAD(&rec->list);
6555         INIT_LIST_HEAD(&rec->dextents);
6556         rec->bg_rec = NULL;
6557
6558         rec->cache.start = key->offset;
6559         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6560
6561         rec->generation = btrfs_header_generation(leaf);
6562
6563         rec->objectid = key->objectid;
6564         rec->type = key->type;
6565         rec->offset = key->offset;
6566
6567         rec->length = rec->cache.size;
6568         rec->owner = btrfs_chunk_owner(leaf, ptr);
6569         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6570         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6571         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6572         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6573         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6574         rec->num_stripes = num_stripes;
6575         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6576
6577         for (i = 0; i < rec->num_stripes; ++i) {
6578                 rec->stripes[i].devid =
6579                         btrfs_stripe_devid_nr(leaf, ptr, i);
6580                 rec->stripes[i].offset =
6581                         btrfs_stripe_offset_nr(leaf, ptr, i);
6582                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6583                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6584                                 BTRFS_UUID_SIZE);
6585         }
6586
6587         return rec;
6588 }
6589
6590 static int process_chunk_item(struct cache_tree *chunk_cache,
6591                               struct btrfs_key *key, struct extent_buffer *eb,
6592                               int slot)
6593 {
6594         struct chunk_record *rec;
6595         struct btrfs_chunk *chunk;
6596         int ret = 0;
6597
6598         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6599         /*
6600          * Do extra check for this chunk item,
6601          *
6602          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6603          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6604          * and owner<->key_type check.
6605          */
6606         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6607                                       key->offset);
6608         if (ret < 0) {
6609                 error("chunk(%llu, %llu) is not valid, ignore it",
6610                       key->offset, btrfs_chunk_length(eb, chunk));
6611                 return 0;
6612         }
6613         rec = btrfs_new_chunk_record(eb, key, slot);
6614         ret = insert_cache_extent(chunk_cache, &rec->cache);
6615         if (ret) {
6616                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6617                         rec->offset, rec->length);
6618                 free(rec);
6619         }
6620
6621         return ret;
6622 }
6623
6624 static int process_device_item(struct rb_root *dev_cache,
6625                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6626 {
6627         struct btrfs_dev_item *ptr;
6628         struct device_record *rec;
6629         int ret = 0;
6630
6631         ptr = btrfs_item_ptr(eb,
6632                 slot, struct btrfs_dev_item);
6633
6634         rec = malloc(sizeof(*rec));
6635         if (!rec) {
6636                 fprintf(stderr, "memory allocation failed\n");
6637                 return -ENOMEM;
6638         }
6639
6640         rec->devid = key->offset;
6641         rec->generation = btrfs_header_generation(eb);
6642
6643         rec->objectid = key->objectid;
6644         rec->type = key->type;
6645         rec->offset = key->offset;
6646
6647         rec->devid = btrfs_device_id(eb, ptr);
6648         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6649         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6650
6651         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6652         if (ret) {
6653                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6654                 free(rec);
6655         }
6656
6657         return ret;
6658 }
6659
6660 struct block_group_record *
6661 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6662                              int slot)
6663 {
6664         struct btrfs_block_group_item *ptr;
6665         struct block_group_record *rec;
6666
6667         rec = calloc(1, sizeof(*rec));
6668         if (!rec) {
6669                 fprintf(stderr, "memory allocation failed\n");
6670                 exit(-1);
6671         }
6672
6673         rec->cache.start = key->objectid;
6674         rec->cache.size = key->offset;
6675
6676         rec->generation = btrfs_header_generation(leaf);
6677
6678         rec->objectid = key->objectid;
6679         rec->type = key->type;
6680         rec->offset = key->offset;
6681
6682         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6683         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6684
6685         INIT_LIST_HEAD(&rec->list);
6686
6687         return rec;
6688 }
6689
6690 static int process_block_group_item(struct block_group_tree *block_group_cache,
6691                                     struct btrfs_key *key,
6692                                     struct extent_buffer *eb, int slot)
6693 {
6694         struct block_group_record *rec;
6695         int ret = 0;
6696
6697         rec = btrfs_new_block_group_record(eb, key, slot);
6698         ret = insert_block_group_record(block_group_cache, rec);
6699         if (ret) {
6700                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6701                         rec->objectid, rec->offset);
6702                 free(rec);
6703         }
6704
6705         return ret;
6706 }
6707
6708 struct device_extent_record *
6709 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6710                                struct btrfs_key *key, int slot)
6711 {
6712         struct device_extent_record *rec;
6713         struct btrfs_dev_extent *ptr;
6714
6715         rec = calloc(1, sizeof(*rec));
6716         if (!rec) {
6717                 fprintf(stderr, "memory allocation failed\n");
6718                 exit(-1);
6719         }
6720
6721         rec->cache.objectid = key->objectid;
6722         rec->cache.start = key->offset;
6723
6724         rec->generation = btrfs_header_generation(leaf);
6725
6726         rec->objectid = key->objectid;
6727         rec->type = key->type;
6728         rec->offset = key->offset;
6729
6730         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6731         rec->chunk_objecteid =
6732                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6733         rec->chunk_offset =
6734                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6735         rec->length = btrfs_dev_extent_length(leaf, ptr);
6736         rec->cache.size = rec->length;
6737
6738         INIT_LIST_HEAD(&rec->chunk_list);
6739         INIT_LIST_HEAD(&rec->device_list);
6740
6741         return rec;
6742 }
6743
6744 static int
6745 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6746                            struct btrfs_key *key, struct extent_buffer *eb,
6747                            int slot)
6748 {
6749         struct device_extent_record *rec;
6750         int ret;
6751
6752         rec = btrfs_new_device_extent_record(eb, key, slot);
6753         ret = insert_device_extent_record(dev_extent_cache, rec);
6754         if (ret) {
6755                 fprintf(stderr,
6756                         "Device extent[%llu, %llu, %llu] existed.\n",
6757                         rec->objectid, rec->offset, rec->length);
6758                 free(rec);
6759         }
6760
6761         return ret;
6762 }
6763
6764 static int process_extent_item(struct btrfs_root *root,
6765                                struct cache_tree *extent_cache,
6766                                struct extent_buffer *eb, int slot)
6767 {
6768         struct btrfs_extent_item *ei;
6769         struct btrfs_extent_inline_ref *iref;
6770         struct btrfs_extent_data_ref *dref;
6771         struct btrfs_shared_data_ref *sref;
6772         struct btrfs_key key;
6773         struct extent_record tmpl;
6774         unsigned long end;
6775         unsigned long ptr;
6776         int ret;
6777         int type;
6778         u32 item_size = btrfs_item_size_nr(eb, slot);
6779         u64 refs = 0;
6780         u64 offset;
6781         u64 num_bytes;
6782         int metadata = 0;
6783
6784         btrfs_item_key_to_cpu(eb, &key, slot);
6785
6786         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6787                 metadata = 1;
6788                 num_bytes = root->nodesize;
6789         } else {
6790                 num_bytes = key.offset;
6791         }
6792
6793         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6794                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6795                       key.objectid, root->sectorsize);
6796                 return -EIO;
6797         }
6798         if (item_size < sizeof(*ei)) {
6799 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6800                 struct btrfs_extent_item_v0 *ei0;
6801                 BUG_ON(item_size != sizeof(*ei0));
6802                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6803                 refs = btrfs_extent_refs_v0(eb, ei0);
6804 #else
6805                 BUG();
6806 #endif
6807                 memset(&tmpl, 0, sizeof(tmpl));
6808                 tmpl.start = key.objectid;
6809                 tmpl.nr = num_bytes;
6810                 tmpl.extent_item_refs = refs;
6811                 tmpl.metadata = metadata;
6812                 tmpl.found_rec = 1;
6813                 tmpl.max_size = num_bytes;
6814
6815                 return add_extent_rec(extent_cache, &tmpl);
6816         }
6817
6818         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6819         refs = btrfs_extent_refs(eb, ei);
6820         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6821                 metadata = 1;
6822         else
6823                 metadata = 0;
6824         if (metadata && num_bytes != root->nodesize) {
6825                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6826                       num_bytes, root->nodesize);
6827                 return -EIO;
6828         }
6829         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6830                 error("ignore invalid data extent, length %llu is not aligned to %u",
6831                       num_bytes, root->sectorsize);
6832                 return -EIO;
6833         }
6834
6835         memset(&tmpl, 0, sizeof(tmpl));
6836         tmpl.start = key.objectid;
6837         tmpl.nr = num_bytes;
6838         tmpl.extent_item_refs = refs;
6839         tmpl.metadata = metadata;
6840         tmpl.found_rec = 1;
6841         tmpl.max_size = num_bytes;
6842         add_extent_rec(extent_cache, &tmpl);
6843
6844         ptr = (unsigned long)(ei + 1);
6845         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6846             key.type == BTRFS_EXTENT_ITEM_KEY)
6847                 ptr += sizeof(struct btrfs_tree_block_info);
6848
6849         end = (unsigned long)ei + item_size;
6850         while (ptr < end) {
6851                 iref = (struct btrfs_extent_inline_ref *)ptr;
6852                 type = btrfs_extent_inline_ref_type(eb, iref);
6853                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6854                 switch (type) {
6855                 case BTRFS_TREE_BLOCK_REF_KEY:
6856                         ret = add_tree_backref(extent_cache, key.objectid,
6857                                         0, offset, 0);
6858                         if (ret < 0)
6859                                 error(
6860                         "add_tree_backref failed (extent items tree block): %s",
6861                                       strerror(-ret));
6862                         break;
6863                 case BTRFS_SHARED_BLOCK_REF_KEY:
6864                         ret = add_tree_backref(extent_cache, key.objectid,
6865                                         offset, 0, 0);
6866                         if (ret < 0)
6867                                 error(
6868                         "add_tree_backref failed (extent items shared block): %s",
6869                                       strerror(-ret));
6870                         break;
6871                 case BTRFS_EXTENT_DATA_REF_KEY:
6872                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6873                         add_data_backref(extent_cache, key.objectid, 0,
6874                                         btrfs_extent_data_ref_root(eb, dref),
6875                                         btrfs_extent_data_ref_objectid(eb,
6876                                                                        dref),
6877                                         btrfs_extent_data_ref_offset(eb, dref),
6878                                         btrfs_extent_data_ref_count(eb, dref),
6879                                         0, num_bytes);
6880                         break;
6881                 case BTRFS_SHARED_DATA_REF_KEY:
6882                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6883                         add_data_backref(extent_cache, key.objectid, offset,
6884                                         0, 0, 0,
6885                                         btrfs_shared_data_ref_count(eb, sref),
6886                                         0, num_bytes);
6887                         break;
6888                 default:
6889                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6890                                 key.objectid, key.type, num_bytes);
6891                         goto out;
6892                 }
6893                 ptr += btrfs_extent_inline_ref_size(type);
6894         }
6895         WARN_ON(ptr > end);
6896 out:
6897         return 0;
6898 }
6899
6900 static int check_cache_range(struct btrfs_root *root,
6901                              struct btrfs_block_group_cache *cache,
6902                              u64 offset, u64 bytes)
6903 {
6904         struct btrfs_free_space *entry;
6905         u64 *logical;
6906         u64 bytenr;
6907         int stripe_len;
6908         int i, nr, ret;
6909
6910         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6911                 bytenr = btrfs_sb_offset(i);
6912                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6913                                        cache->key.objectid, bytenr, 0,
6914                                        &logical, &nr, &stripe_len);
6915                 if (ret)
6916                         return ret;
6917
6918                 while (nr--) {
6919                         if (logical[nr] + stripe_len <= offset)
6920                                 continue;
6921                         if (offset + bytes <= logical[nr])
6922                                 continue;
6923                         if (logical[nr] == offset) {
6924                                 if (stripe_len >= bytes) {
6925                                         free(logical);
6926                                         return 0;
6927                                 }
6928                                 bytes -= stripe_len;
6929                                 offset += stripe_len;
6930                         } else if (logical[nr] < offset) {
6931                                 if (logical[nr] + stripe_len >=
6932                                     offset + bytes) {
6933                                         free(logical);
6934                                         return 0;
6935                                 }
6936                                 bytes = (offset + bytes) -
6937                                         (logical[nr] + stripe_len);
6938                                 offset = logical[nr] + stripe_len;
6939                         } else {
6940                                 /*
6941                                  * Could be tricky, the super may land in the
6942                                  * middle of the area we're checking.  First
6943                                  * check the easiest case, it's at the end.
6944                                  */
6945                                 if (logical[nr] + stripe_len >=
6946                                     bytes + offset) {
6947                                         bytes = logical[nr] - offset;
6948                                         continue;
6949                                 }
6950
6951                                 /* Check the left side */
6952                                 ret = check_cache_range(root, cache,
6953                                                         offset,
6954                                                         logical[nr] - offset);
6955                                 if (ret) {
6956                                         free(logical);
6957                                         return ret;
6958                                 }
6959
6960                                 /* Now we continue with the right side */
6961                                 bytes = (offset + bytes) -
6962                                         (logical[nr] + stripe_len);
6963                                 offset = logical[nr] + stripe_len;
6964                         }
6965                 }
6966
6967                 free(logical);
6968         }
6969
6970         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6971         if (!entry) {
6972                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6973                         offset, offset+bytes);
6974                 return -EINVAL;
6975         }
6976
6977         if (entry->offset != offset) {
6978                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6979                         entry->offset);
6980                 return -EINVAL;
6981         }
6982
6983         if (entry->bytes != bytes) {
6984                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6985                         bytes, entry->bytes, offset);
6986                 return -EINVAL;
6987         }
6988
6989         unlink_free_space(cache->free_space_ctl, entry);
6990         free(entry);
6991         return 0;
6992 }
6993
6994 static int verify_space_cache(struct btrfs_root *root,
6995                               struct btrfs_block_group_cache *cache)
6996 {
6997         struct btrfs_path path;
6998         struct extent_buffer *leaf;
6999         struct btrfs_key key;
7000         u64 last;
7001         int ret = 0;
7002
7003         root = root->fs_info->extent_root;
7004
7005         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7006
7007         btrfs_init_path(&path);
7008         key.objectid = last;
7009         key.offset = 0;
7010         key.type = BTRFS_EXTENT_ITEM_KEY;
7011         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7012         if (ret < 0)
7013                 goto out;
7014         ret = 0;
7015         while (1) {
7016                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7017                         ret = btrfs_next_leaf(root, &path);
7018                         if (ret < 0)
7019                                 goto out;
7020                         if (ret > 0) {
7021                                 ret = 0;
7022                                 break;
7023                         }
7024                 }
7025                 leaf = path.nodes[0];
7026                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7027                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7028                         break;
7029                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7030                     key.type != BTRFS_METADATA_ITEM_KEY) {
7031                         path.slots[0]++;
7032                         continue;
7033                 }
7034
7035                 if (last == key.objectid) {
7036                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7037                                 last = key.objectid + key.offset;
7038                         else
7039                                 last = key.objectid + root->nodesize;
7040                         path.slots[0]++;
7041                         continue;
7042                 }
7043
7044                 ret = check_cache_range(root, cache, last,
7045                                         key.objectid - last);
7046                 if (ret)
7047                         break;
7048                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7049                         last = key.objectid + key.offset;
7050                 else
7051                         last = key.objectid + root->nodesize;
7052                 path.slots[0]++;
7053         }
7054
7055         if (last < cache->key.objectid + cache->key.offset)
7056                 ret = check_cache_range(root, cache, last,
7057                                         cache->key.objectid +
7058                                         cache->key.offset - last);
7059
7060 out:
7061         btrfs_release_path(&path);
7062
7063         if (!ret &&
7064             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7065                 fprintf(stderr, "There are still entries left in the space "
7066                         "cache\n");
7067                 ret = -EINVAL;
7068         }
7069
7070         return ret;
7071 }
7072
7073 static int check_space_cache(struct btrfs_root *root)
7074 {
7075         struct btrfs_block_group_cache *cache;
7076         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7077         int ret;
7078         int error = 0;
7079
7080         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7081             btrfs_super_generation(root->fs_info->super_copy) !=
7082             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7083                 printf("cache and super generation don't match, space cache "
7084                        "will be invalidated\n");
7085                 return 0;
7086         }
7087
7088         if (ctx.progress_enabled) {
7089                 ctx.tp = TASK_FREE_SPACE;
7090                 task_start(ctx.info);
7091         }
7092
7093         while (1) {
7094                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7095                 if (!cache)
7096                         break;
7097
7098                 start = cache->key.objectid + cache->key.offset;
7099                 if (!cache->free_space_ctl) {
7100                         if (btrfs_init_free_space_ctl(cache,
7101                                                       root->sectorsize)) {
7102                                 ret = -ENOMEM;
7103                                 break;
7104                         }
7105                 } else {
7106                         btrfs_remove_free_space_cache(cache);
7107                 }
7108
7109                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7110                         ret = exclude_super_stripes(root, cache);
7111                         if (ret) {
7112                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7113                                         strerror(-ret));
7114                                 error++;
7115                                 continue;
7116                         }
7117                         ret = load_free_space_tree(root->fs_info, cache);
7118                         free_excluded_extents(root, cache);
7119                         if (ret < 0) {
7120                                 fprintf(stderr, "could not load free space tree: %s\n",
7121                                         strerror(-ret));
7122                                 error++;
7123                                 continue;
7124                         }
7125                         error += ret;
7126                 } else {
7127                         ret = load_free_space_cache(root->fs_info, cache);
7128                         if (!ret)
7129                                 continue;
7130                 }
7131
7132                 ret = verify_space_cache(root, cache);
7133                 if (ret) {
7134                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7135                                 cache->key.objectid);
7136                         error++;
7137                 }
7138         }
7139
7140         task_stop(ctx.info);
7141
7142         return error ? -EINVAL : 0;
7143 }
7144
7145 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7146                         u64 num_bytes, unsigned long leaf_offset,
7147                         struct extent_buffer *eb) {
7148
7149         u64 offset = 0;
7150         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7151         char *data;
7152         unsigned long csum_offset;
7153         u32 csum;
7154         u32 csum_expected;
7155         u64 read_len;
7156         u64 data_checked = 0;
7157         u64 tmp;
7158         int ret = 0;
7159         int mirror;
7160         int num_copies;
7161
7162         if (num_bytes % root->sectorsize)
7163                 return -EINVAL;
7164
7165         data = malloc(num_bytes);
7166         if (!data)
7167                 return -ENOMEM;
7168
7169         while (offset < num_bytes) {
7170                 mirror = 0;
7171 again:
7172                 read_len = num_bytes - offset;
7173                 /* read as much space once a time */
7174                 ret = read_extent_data(root, data + offset,
7175                                 bytenr + offset, &read_len, mirror);
7176                 if (ret)
7177                         goto out;
7178                 data_checked = 0;
7179                 /* verify every 4k data's checksum */
7180                 while (data_checked < read_len) {
7181                         csum = ~(u32)0;
7182                         tmp = offset + data_checked;
7183
7184                         csum = btrfs_csum_data((char *)data + tmp,
7185                                                csum, root->sectorsize);
7186                         btrfs_csum_final(csum, (u8 *)&csum);
7187
7188                         csum_offset = leaf_offset +
7189                                  tmp / root->sectorsize * csum_size;
7190                         read_extent_buffer(eb, (char *)&csum_expected,
7191                                            csum_offset, csum_size);
7192                         /* try another mirror */
7193                         if (csum != csum_expected) {
7194                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7195                                                 mirror, bytenr + tmp,
7196                                                 csum, csum_expected);
7197                                 num_copies = btrfs_num_copies(
7198                                                 &root->fs_info->mapping_tree,
7199                                                 bytenr, num_bytes);
7200                                 if (mirror < num_copies - 1) {
7201                                         mirror += 1;
7202                                         goto again;
7203                                 }
7204                         }
7205                         data_checked += root->sectorsize;
7206                 }
7207                 offset += read_len;
7208         }
7209 out:
7210         free(data);
7211         return ret;
7212 }
7213
7214 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7215                                u64 num_bytes)
7216 {
7217         struct btrfs_path path;
7218         struct extent_buffer *leaf;
7219         struct btrfs_key key;
7220         int ret;
7221
7222         btrfs_init_path(&path);
7223         key.objectid = bytenr;
7224         key.type = BTRFS_EXTENT_ITEM_KEY;
7225         key.offset = (u64)-1;
7226
7227 again:
7228         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7229                                 0, 0);
7230         if (ret < 0) {
7231                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7232                 btrfs_release_path(&path);
7233                 return ret;
7234         } else if (ret) {
7235                 if (path.slots[0] > 0) {
7236                         path.slots[0]--;
7237                 } else {
7238                         ret = btrfs_prev_leaf(root, &path);
7239                         if (ret < 0) {
7240                                 goto out;
7241                         } else if (ret > 0) {
7242                                 ret = 0;
7243                                 goto out;
7244                         }
7245                 }
7246         }
7247
7248         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7249
7250         /*
7251          * Block group items come before extent items if they have the same
7252          * bytenr, so walk back one more just in case.  Dear future traveller,
7253          * first congrats on mastering time travel.  Now if it's not too much
7254          * trouble could you go back to 2006 and tell Chris to make the
7255          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7256          * EXTENT_ITEM_KEY please?
7257          */
7258         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7259                 if (path.slots[0] > 0) {
7260                         path.slots[0]--;
7261                 } else {
7262                         ret = btrfs_prev_leaf(root, &path);
7263                         if (ret < 0) {
7264                                 goto out;
7265                         } else if (ret > 0) {
7266                                 ret = 0;
7267                                 goto out;
7268                         }
7269                 }
7270                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7271         }
7272
7273         while (num_bytes) {
7274                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7275                         ret = btrfs_next_leaf(root, &path);
7276                         if (ret < 0) {
7277                                 fprintf(stderr, "Error going to next leaf "
7278                                         "%d\n", ret);
7279                                 btrfs_release_path(&path);
7280                                 return ret;
7281                         } else if (ret) {
7282                                 break;
7283                         }
7284                 }
7285                 leaf = path.nodes[0];
7286                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7287                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7288                         path.slots[0]++;
7289                         continue;
7290                 }
7291                 if (key.objectid + key.offset < bytenr) {
7292                         path.slots[0]++;
7293                         continue;
7294                 }
7295                 if (key.objectid > bytenr + num_bytes)
7296                         break;
7297
7298                 if (key.objectid == bytenr) {
7299                         if (key.offset >= num_bytes) {
7300                                 num_bytes = 0;
7301                                 break;
7302                         }
7303                         num_bytes -= key.offset;
7304                         bytenr += key.offset;
7305                 } else if (key.objectid < bytenr) {
7306                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7307                                 num_bytes = 0;
7308                                 break;
7309                         }
7310                         num_bytes = (bytenr + num_bytes) -
7311                                 (key.objectid + key.offset);
7312                         bytenr = key.objectid + key.offset;
7313                 } else {
7314                         if (key.objectid + key.offset < bytenr + num_bytes) {
7315                                 u64 new_start = key.objectid + key.offset;
7316                                 u64 new_bytes = bytenr + num_bytes - new_start;
7317
7318                                 /*
7319                                  * Weird case, the extent is in the middle of
7320                                  * our range, we'll have to search one side
7321                                  * and then the other.  Not sure if this happens
7322                                  * in real life, but no harm in coding it up
7323                                  * anyway just in case.
7324                                  */
7325                                 btrfs_release_path(&path);
7326                                 ret = check_extent_exists(root, new_start,
7327                                                           new_bytes);
7328                                 if (ret) {
7329                                         fprintf(stderr, "Right section didn't "
7330                                                 "have a record\n");
7331                                         break;
7332                                 }
7333                                 num_bytes = key.objectid - bytenr;
7334                                 goto again;
7335                         }
7336                         num_bytes = key.objectid - bytenr;
7337                 }
7338                 path.slots[0]++;
7339         }
7340         ret = 0;
7341
7342 out:
7343         if (num_bytes && !ret) {
7344                 fprintf(stderr, "There are no extents for csum range "
7345                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7346                 ret = 1;
7347         }
7348
7349         btrfs_release_path(&path);
7350         return ret;
7351 }
7352
7353 static int check_csums(struct btrfs_root *root)
7354 {
7355         struct btrfs_path path;
7356         struct extent_buffer *leaf;
7357         struct btrfs_key key;
7358         u64 offset = 0, num_bytes = 0;
7359         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7360         int errors = 0;
7361         int ret;
7362         u64 data_len;
7363         unsigned long leaf_offset;
7364
7365         root = root->fs_info->csum_root;
7366         if (!extent_buffer_uptodate(root->node)) {
7367                 fprintf(stderr, "No valid csum tree found\n");
7368                 return -ENOENT;
7369         }
7370
7371         btrfs_init_path(&path);
7372         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7373         key.type = BTRFS_EXTENT_CSUM_KEY;
7374         key.offset = 0;
7375         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7376         if (ret < 0) {
7377                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7378                 btrfs_release_path(&path);
7379                 return ret;
7380         }
7381
7382         if (ret > 0 && path.slots[0])
7383                 path.slots[0]--;
7384         ret = 0;
7385
7386         while (1) {
7387                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7388                         ret = btrfs_next_leaf(root, &path);
7389                         if (ret < 0) {
7390                                 fprintf(stderr, "Error going to next leaf "
7391                                         "%d\n", ret);
7392                                 break;
7393                         }
7394                         if (ret)
7395                                 break;
7396                 }
7397                 leaf = path.nodes[0];
7398
7399                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7400                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7401                         path.slots[0]++;
7402                         continue;
7403                 }
7404
7405                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7406                               csum_size) * root->sectorsize;
7407                 if (!check_data_csum)
7408                         goto skip_csum_check;
7409                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7410                 ret = check_extent_csums(root, key.offset, data_len,
7411                                          leaf_offset, leaf);
7412                 if (ret)
7413                         break;
7414 skip_csum_check:
7415                 if (!num_bytes) {
7416                         offset = key.offset;
7417                 } else if (key.offset != offset + num_bytes) {
7418                         ret = check_extent_exists(root, offset, num_bytes);
7419                         if (ret) {
7420                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7421                                         "there is no extent record\n",
7422                                         offset, offset+num_bytes);
7423                                 errors++;
7424                         }
7425                         offset = key.offset;
7426                         num_bytes = 0;
7427                 }
7428                 num_bytes += data_len;
7429                 path.slots[0]++;
7430         }
7431
7432         btrfs_release_path(&path);
7433         return errors;
7434 }
7435
7436 static int is_dropped_key(struct btrfs_key *key,
7437                           struct btrfs_key *drop_key) {
7438         if (key->objectid < drop_key->objectid)
7439                 return 1;
7440         else if (key->objectid == drop_key->objectid) {
7441                 if (key->type < drop_key->type)
7442                         return 1;
7443                 else if (key->type == drop_key->type) {
7444                         if (key->offset < drop_key->offset)
7445                                 return 1;
7446                 }
7447         }
7448         return 0;
7449 }
7450
7451 /*
7452  * Here are the rules for FULL_BACKREF.
7453  *
7454  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7455  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7456  *      FULL_BACKREF set.
7457  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7458  *    if it happened after the relocation occurred since we'll have dropped the
7459  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7460  *    have no real way to know for sure.
7461  *
7462  * We process the blocks one root at a time, and we start from the lowest root
7463  * objectid and go to the highest.  So we can just lookup the owner backref for
7464  * the record and if we don't find it then we know it doesn't exist and we have
7465  * a FULL BACKREF.
7466  *
7467  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7468  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7469  * be set or not and then we can check later once we've gathered all the refs.
7470  */
7471 static int calc_extent_flag(struct cache_tree *extent_cache,
7472                            struct extent_buffer *buf,
7473                            struct root_item_record *ri,
7474                            u64 *flags)
7475 {
7476         struct extent_record *rec;
7477         struct cache_extent *cache;
7478         struct tree_backref *tback;
7479         u64 owner = 0;
7480
7481         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7482         /* we have added this extent before */
7483         if (!cache)
7484                 return -ENOENT;
7485
7486         rec = container_of(cache, struct extent_record, cache);
7487
7488         /*
7489          * Except file/reloc tree, we can not have
7490          * FULL BACKREF MODE
7491          */
7492         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7493                 goto normal;
7494         /*
7495          * root node
7496          */
7497         if (buf->start == ri->bytenr)
7498                 goto normal;
7499
7500         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7501                 goto full_backref;
7502
7503         owner = btrfs_header_owner(buf);
7504         if (owner == ri->objectid)
7505                 goto normal;
7506
7507         tback = find_tree_backref(rec, 0, owner);
7508         if (!tback)
7509                 goto full_backref;
7510 normal:
7511         *flags = 0;
7512         if (rec->flag_block_full_backref != FLAG_UNSET &&
7513             rec->flag_block_full_backref != 0)
7514                 rec->bad_full_backref = 1;
7515         return 0;
7516 full_backref:
7517         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7518         if (rec->flag_block_full_backref != FLAG_UNSET &&
7519             rec->flag_block_full_backref != 1)
7520                 rec->bad_full_backref = 1;
7521         return 0;
7522 }
7523
7524 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7525 {
7526         fprintf(stderr, "Invalid key type(");
7527         print_key_type(stderr, 0, key_type);
7528         fprintf(stderr, ") found in root(");
7529         print_objectid(stderr, rootid, 0);
7530         fprintf(stderr, ")\n");
7531 }
7532
7533 /*
7534  * Check if the key is valid with its extent buffer.
7535  *
7536  * This is a early check in case invalid key exists in a extent buffer
7537  * This is not comprehensive yet, but should prevent wrong key/item passed
7538  * further
7539  */
7540 static int check_type_with_root(u64 rootid, u8 key_type)
7541 {
7542         switch (key_type) {
7543         /* Only valid in chunk tree */
7544         case BTRFS_DEV_ITEM_KEY:
7545         case BTRFS_CHUNK_ITEM_KEY:
7546                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7547                         goto err;
7548                 break;
7549         /* valid in csum and log tree */
7550         case BTRFS_CSUM_TREE_OBJECTID:
7551                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7552                       is_fstree(rootid)))
7553                         goto err;
7554                 break;
7555         case BTRFS_EXTENT_ITEM_KEY:
7556         case BTRFS_METADATA_ITEM_KEY:
7557         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7558                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7559                         goto err;
7560                 break;
7561         case BTRFS_ROOT_ITEM_KEY:
7562                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7563                         goto err;
7564                 break;
7565         case BTRFS_DEV_EXTENT_KEY:
7566                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7567                         goto err;
7568                 break;
7569         }
7570         return 0;
7571 err:
7572         report_mismatch_key_root(key_type, rootid);
7573         return -EINVAL;
7574 }
7575
7576 static int run_next_block(struct btrfs_root *root,
7577                           struct block_info *bits,
7578                           int bits_nr,
7579                           u64 *last,
7580                           struct cache_tree *pending,
7581                           struct cache_tree *seen,
7582                           struct cache_tree *reada,
7583                           struct cache_tree *nodes,
7584                           struct cache_tree *extent_cache,
7585                           struct cache_tree *chunk_cache,
7586                           struct rb_root *dev_cache,
7587                           struct block_group_tree *block_group_cache,
7588                           struct device_extent_tree *dev_extent_cache,
7589                           struct root_item_record *ri)
7590 {
7591         struct extent_buffer *buf;
7592         struct extent_record *rec = NULL;
7593         u64 bytenr;
7594         u32 size;
7595         u64 parent;
7596         u64 owner;
7597         u64 flags;
7598         u64 ptr;
7599         u64 gen = 0;
7600         int ret = 0;
7601         int i;
7602         int nritems;
7603         struct btrfs_key key;
7604         struct cache_extent *cache;
7605         int reada_bits;
7606
7607         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7608                                     bits_nr, &reada_bits);
7609         if (nritems == 0)
7610                 return 1;
7611
7612         if (!reada_bits) {
7613                 for(i = 0; i < nritems; i++) {
7614                         ret = add_cache_extent(reada, bits[i].start,
7615                                                bits[i].size);
7616                         if (ret == -EEXIST)
7617                                 continue;
7618
7619                         /* fixme, get the parent transid */
7620                         readahead_tree_block(root, bits[i].start,
7621                                              bits[i].size, 0);
7622                 }
7623         }
7624         *last = bits[0].start;
7625         bytenr = bits[0].start;
7626         size = bits[0].size;
7627
7628         cache = lookup_cache_extent(pending, bytenr, size);
7629         if (cache) {
7630                 remove_cache_extent(pending, cache);
7631                 free(cache);
7632         }
7633         cache = lookup_cache_extent(reada, bytenr, size);
7634         if (cache) {
7635                 remove_cache_extent(reada, cache);
7636                 free(cache);
7637         }
7638         cache = lookup_cache_extent(nodes, bytenr, size);
7639         if (cache) {
7640                 remove_cache_extent(nodes, cache);
7641                 free(cache);
7642         }
7643         cache = lookup_cache_extent(extent_cache, bytenr, size);
7644         if (cache) {
7645                 rec = container_of(cache, struct extent_record, cache);
7646                 gen = rec->parent_generation;
7647         }
7648
7649         /* fixme, get the real parent transid */
7650         buf = read_tree_block(root, bytenr, size, gen);
7651         if (!extent_buffer_uptodate(buf)) {
7652                 record_bad_block_io(root->fs_info,
7653                                     extent_cache, bytenr, size);
7654                 goto out;
7655         }
7656
7657         nritems = btrfs_header_nritems(buf);
7658
7659         flags = 0;
7660         if (!init_extent_tree) {
7661                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7662                                        btrfs_header_level(buf), 1, NULL,
7663                                        &flags);
7664                 if (ret < 0) {
7665                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7666                         if (ret < 0) {
7667                                 fprintf(stderr, "Couldn't calc extent flags\n");
7668                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7669                         }
7670                 }
7671         } else {
7672                 flags = 0;
7673                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7674                 if (ret < 0) {
7675                         fprintf(stderr, "Couldn't calc extent flags\n");
7676                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7677                 }
7678         }
7679
7680         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7681                 if (ri != NULL &&
7682                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7683                     ri->objectid == btrfs_header_owner(buf)) {
7684                         /*
7685                          * Ok we got to this block from it's original owner and
7686                          * we have FULL_BACKREF set.  Relocation can leave
7687                          * converted blocks over so this is altogether possible,
7688                          * however it's not possible if the generation > the
7689                          * last snapshot, so check for this case.
7690                          */
7691                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7692                             btrfs_header_generation(buf) > ri->last_snapshot) {
7693                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7694                                 rec->bad_full_backref = 1;
7695                         }
7696                 }
7697         } else {
7698                 if (ri != NULL &&
7699                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7700                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7701                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7702                         rec->bad_full_backref = 1;
7703                 }
7704         }
7705
7706         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7707                 rec->flag_block_full_backref = 1;
7708                 parent = bytenr;
7709                 owner = 0;
7710         } else {
7711                 rec->flag_block_full_backref = 0;
7712                 parent = 0;
7713                 owner = btrfs_header_owner(buf);
7714         }
7715
7716         ret = check_block(root, extent_cache, buf, flags);
7717         if (ret)
7718                 goto out;
7719
7720         if (btrfs_is_leaf(buf)) {
7721                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7722                 for (i = 0; i < nritems; i++) {
7723                         struct btrfs_file_extent_item *fi;
7724                         btrfs_item_key_to_cpu(buf, &key, i);
7725                         /*
7726                          * Check key type against the leaf owner.
7727                          * Could filter quite a lot of early error if
7728                          * owner is correct
7729                          */
7730                         if (check_type_with_root(btrfs_header_owner(buf),
7731                                                  key.type)) {
7732                                 fprintf(stderr, "ignoring invalid key\n");
7733                                 continue;
7734                         }
7735                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7736                                 process_extent_item(root, extent_cache, buf,
7737                                                     i);
7738                                 continue;
7739                         }
7740                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7741                                 process_extent_item(root, extent_cache, buf,
7742                                                     i);
7743                                 continue;
7744                         }
7745                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7746                                 total_csum_bytes +=
7747                                         btrfs_item_size_nr(buf, i);
7748                                 continue;
7749                         }
7750                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7751                                 process_chunk_item(chunk_cache, &key, buf, i);
7752                                 continue;
7753                         }
7754                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7755                                 process_device_item(dev_cache, &key, buf, i);
7756                                 continue;
7757                         }
7758                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7759                                 process_block_group_item(block_group_cache,
7760                                         &key, buf, i);
7761                                 continue;
7762                         }
7763                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7764                                 process_device_extent_item(dev_extent_cache,
7765                                         &key, buf, i);
7766                                 continue;
7767
7768                         }
7769                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7770 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7771                                 process_extent_ref_v0(extent_cache, buf, i);
7772 #else
7773                                 BUG();
7774 #endif
7775                                 continue;
7776                         }
7777
7778                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7779                                 ret = add_tree_backref(extent_cache,
7780                                                 key.objectid, 0, key.offset, 0);
7781                                 if (ret < 0)
7782                                         error(
7783                                 "add_tree_backref failed (leaf tree block): %s",
7784                                               strerror(-ret));
7785                                 continue;
7786                         }
7787                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7788                                 ret = add_tree_backref(extent_cache,
7789                                                 key.objectid, key.offset, 0, 0);
7790                                 if (ret < 0)
7791                                         error(
7792                                 "add_tree_backref failed (leaf shared block): %s",
7793                                               strerror(-ret));
7794                                 continue;
7795                         }
7796                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7797                                 struct btrfs_extent_data_ref *ref;
7798                                 ref = btrfs_item_ptr(buf, i,
7799                                                 struct btrfs_extent_data_ref);
7800                                 add_data_backref(extent_cache,
7801                                         key.objectid, 0,
7802                                         btrfs_extent_data_ref_root(buf, ref),
7803                                         btrfs_extent_data_ref_objectid(buf,
7804                                                                        ref),
7805                                         btrfs_extent_data_ref_offset(buf, ref),
7806                                         btrfs_extent_data_ref_count(buf, ref),
7807                                         0, root->sectorsize);
7808                                 continue;
7809                         }
7810                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7811                                 struct btrfs_shared_data_ref *ref;
7812                                 ref = btrfs_item_ptr(buf, i,
7813                                                 struct btrfs_shared_data_ref);
7814                                 add_data_backref(extent_cache,
7815                                         key.objectid, key.offset, 0, 0, 0,
7816                                         btrfs_shared_data_ref_count(buf, ref),
7817                                         0, root->sectorsize);
7818                                 continue;
7819                         }
7820                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7821                                 struct bad_item *bad;
7822
7823                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7824                                         continue;
7825                                 if (!owner)
7826                                         continue;
7827                                 bad = malloc(sizeof(struct bad_item));
7828                                 if (!bad)
7829                                         continue;
7830                                 INIT_LIST_HEAD(&bad->list);
7831                                 memcpy(&bad->key, &key,
7832                                        sizeof(struct btrfs_key));
7833                                 bad->root_id = owner;
7834                                 list_add_tail(&bad->list, &delete_items);
7835                                 continue;
7836                         }
7837                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7838                                 continue;
7839                         fi = btrfs_item_ptr(buf, i,
7840                                             struct btrfs_file_extent_item);
7841                         if (btrfs_file_extent_type(buf, fi) ==
7842                             BTRFS_FILE_EXTENT_INLINE)
7843                                 continue;
7844                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7845                                 continue;
7846
7847                         data_bytes_allocated +=
7848                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7849                         if (data_bytes_allocated < root->sectorsize) {
7850                                 abort();
7851                         }
7852                         data_bytes_referenced +=
7853                                 btrfs_file_extent_num_bytes(buf, fi);
7854                         add_data_backref(extent_cache,
7855                                 btrfs_file_extent_disk_bytenr(buf, fi),
7856                                 parent, owner, key.objectid, key.offset -
7857                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7858                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7859                 }
7860         } else {
7861                 int level;
7862                 struct btrfs_key first_key;
7863
7864                 first_key.objectid = 0;
7865
7866                 if (nritems > 0)
7867                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7868                 level = btrfs_header_level(buf);
7869                 for (i = 0; i < nritems; i++) {
7870                         struct extent_record tmpl;
7871
7872                         ptr = btrfs_node_blockptr(buf, i);
7873                         size = root->nodesize;
7874                         btrfs_node_key_to_cpu(buf, &key, i);
7875                         if (ri != NULL) {
7876                                 if ((level == ri->drop_level)
7877                                     && is_dropped_key(&key, &ri->drop_key)) {
7878                                         continue;
7879                                 }
7880                         }
7881
7882                         memset(&tmpl, 0, sizeof(tmpl));
7883                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7884                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7885                         tmpl.start = ptr;
7886                         tmpl.nr = size;
7887                         tmpl.refs = 1;
7888                         tmpl.metadata = 1;
7889                         tmpl.max_size = size;
7890                         ret = add_extent_rec(extent_cache, &tmpl);
7891                         if (ret < 0)
7892                                 goto out;
7893
7894                         ret = add_tree_backref(extent_cache, ptr, parent,
7895                                         owner, 1);
7896                         if (ret < 0) {
7897                                 error(
7898                                 "add_tree_backref failed (non-leaf block): %s",
7899                                       strerror(-ret));
7900                                 continue;
7901                         }
7902
7903                         if (level > 1) {
7904                                 add_pending(nodes, seen, ptr, size);
7905                         } else {
7906                                 add_pending(pending, seen, ptr, size);
7907                         }
7908                 }
7909                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7910                                       nritems) * sizeof(struct btrfs_key_ptr);
7911         }
7912         total_btree_bytes += buf->len;
7913         if (fs_root_objectid(btrfs_header_owner(buf)))
7914                 total_fs_tree_bytes += buf->len;
7915         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7916                 total_extent_tree_bytes += buf->len;
7917         if (!found_old_backref &&
7918             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7919             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7920             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7921                 found_old_backref = 1;
7922 out:
7923         free_extent_buffer(buf);
7924         return ret;
7925 }
7926
7927 static int add_root_to_pending(struct extent_buffer *buf,
7928                                struct cache_tree *extent_cache,
7929                                struct cache_tree *pending,
7930                                struct cache_tree *seen,
7931                                struct cache_tree *nodes,
7932                                u64 objectid)
7933 {
7934         struct extent_record tmpl;
7935         int ret;
7936
7937         if (btrfs_header_level(buf) > 0)
7938                 add_pending(nodes, seen, buf->start, buf->len);
7939         else
7940                 add_pending(pending, seen, buf->start, buf->len);
7941
7942         memset(&tmpl, 0, sizeof(tmpl));
7943         tmpl.start = buf->start;
7944         tmpl.nr = buf->len;
7945         tmpl.is_root = 1;
7946         tmpl.refs = 1;
7947         tmpl.metadata = 1;
7948         tmpl.max_size = buf->len;
7949         add_extent_rec(extent_cache, &tmpl);
7950
7951         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7952             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7953                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7954                                 0, 1);
7955         else
7956                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7957                                 1);
7958         return ret;
7959 }
7960
7961 /* as we fix the tree, we might be deleting blocks that
7962  * we're tracking for repair.  This hook makes sure we
7963  * remove any backrefs for blocks as we are fixing them.
7964  */
7965 static int free_extent_hook(struct btrfs_trans_handle *trans,
7966                             struct btrfs_root *root,
7967                             u64 bytenr, u64 num_bytes, u64 parent,
7968                             u64 root_objectid, u64 owner, u64 offset,
7969                             int refs_to_drop)
7970 {
7971         struct extent_record *rec;
7972         struct cache_extent *cache;
7973         int is_data;
7974         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7975
7976         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7977         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7978         if (!cache)
7979                 return 0;
7980
7981         rec = container_of(cache, struct extent_record, cache);
7982         if (is_data) {
7983                 struct data_backref *back;
7984                 back = find_data_backref(rec, parent, root_objectid, owner,
7985                                          offset, 1, bytenr, num_bytes);
7986                 if (!back)
7987                         goto out;
7988                 if (back->node.found_ref) {
7989                         back->found_ref -= refs_to_drop;
7990                         if (rec->refs)
7991                                 rec->refs -= refs_to_drop;
7992                 }
7993                 if (back->node.found_extent_tree) {
7994                         back->num_refs -= refs_to_drop;
7995                         if (rec->extent_item_refs)
7996                                 rec->extent_item_refs -= refs_to_drop;
7997                 }
7998                 if (back->found_ref == 0)
7999                         back->node.found_ref = 0;
8000                 if (back->num_refs == 0)
8001                         back->node.found_extent_tree = 0;
8002
8003                 if (!back->node.found_extent_tree && back->node.found_ref) {
8004                         list_del(&back->node.list);
8005                         free(back);
8006                 }
8007         } else {
8008                 struct tree_backref *back;
8009                 back = find_tree_backref(rec, parent, root_objectid);
8010                 if (!back)
8011                         goto out;
8012                 if (back->node.found_ref) {
8013                         if (rec->refs)
8014                                 rec->refs--;
8015                         back->node.found_ref = 0;
8016                 }
8017                 if (back->node.found_extent_tree) {
8018                         if (rec->extent_item_refs)
8019                                 rec->extent_item_refs--;
8020                         back->node.found_extent_tree = 0;
8021                 }
8022                 if (!back->node.found_extent_tree && back->node.found_ref) {
8023                         list_del(&back->node.list);
8024                         free(back);
8025                 }
8026         }
8027         maybe_free_extent_rec(extent_cache, rec);
8028 out:
8029         return 0;
8030 }
8031
8032 static int delete_extent_records(struct btrfs_trans_handle *trans,
8033                                  struct btrfs_root *root,
8034                                  struct btrfs_path *path,
8035                                  u64 bytenr)
8036 {
8037         struct btrfs_key key;
8038         struct btrfs_key found_key;
8039         struct extent_buffer *leaf;
8040         int ret;
8041         int slot;
8042
8043
8044         key.objectid = bytenr;
8045         key.type = (u8)-1;
8046         key.offset = (u64)-1;
8047
8048         while(1) {
8049                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8050                                         &key, path, 0, 1);
8051                 if (ret < 0)
8052                         break;
8053
8054                 if (ret > 0) {
8055                         ret = 0;
8056                         if (path->slots[0] == 0)
8057                                 break;
8058                         path->slots[0]--;
8059                 }
8060                 ret = 0;
8061
8062                 leaf = path->nodes[0];
8063                 slot = path->slots[0];
8064
8065                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8066                 if (found_key.objectid != bytenr)
8067                         break;
8068
8069                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8070                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8071                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8072                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8073                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8074                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8075                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8076                         btrfs_release_path(path);
8077                         if (found_key.type == 0) {
8078                                 if (found_key.offset == 0)
8079                                         break;
8080                                 key.offset = found_key.offset - 1;
8081                                 key.type = found_key.type;
8082                         }
8083                         key.type = found_key.type - 1;
8084                         key.offset = (u64)-1;
8085                         continue;
8086                 }
8087
8088                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8089                         found_key.objectid, found_key.type, found_key.offset);
8090
8091                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8092                 if (ret)
8093                         break;
8094                 btrfs_release_path(path);
8095
8096                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8097                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8098                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8099                                 found_key.offset : root->nodesize;
8100
8101                         ret = btrfs_update_block_group(trans, root, bytenr,
8102                                                        bytes, 0, 0);
8103                         if (ret)
8104                                 break;
8105                 }
8106         }
8107
8108         btrfs_release_path(path);
8109         return ret;
8110 }
8111
8112 /*
8113  * for a single backref, this will allocate a new extent
8114  * and add the backref to it.
8115  */
8116 static int record_extent(struct btrfs_trans_handle *trans,
8117                          struct btrfs_fs_info *info,
8118                          struct btrfs_path *path,
8119                          struct extent_record *rec,
8120                          struct extent_backref *back,
8121                          int allocated, u64 flags)
8122 {
8123         int ret = 0;
8124         struct btrfs_root *extent_root = info->extent_root;
8125         struct extent_buffer *leaf;
8126         struct btrfs_key ins_key;
8127         struct btrfs_extent_item *ei;
8128         struct data_backref *dback;
8129         struct btrfs_tree_block_info *bi;
8130
8131         if (!back->is_data)
8132                 rec->max_size = max_t(u64, rec->max_size,
8133                                     info->extent_root->nodesize);
8134
8135         if (!allocated) {
8136                 u32 item_size = sizeof(*ei);
8137
8138                 if (!back->is_data)
8139                         item_size += sizeof(*bi);
8140
8141                 ins_key.objectid = rec->start;
8142                 ins_key.offset = rec->max_size;
8143                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8144
8145                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8146                                         &ins_key, item_size);
8147                 if (ret)
8148                         goto fail;
8149
8150                 leaf = path->nodes[0];
8151                 ei = btrfs_item_ptr(leaf, path->slots[0],
8152                                     struct btrfs_extent_item);
8153
8154                 btrfs_set_extent_refs(leaf, ei, 0);
8155                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8156
8157                 if (back->is_data) {
8158                         btrfs_set_extent_flags(leaf, ei,
8159                                                BTRFS_EXTENT_FLAG_DATA);
8160                 } else {
8161                         struct btrfs_disk_key copy_key;;
8162
8163                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8164                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8165                                              sizeof(*bi));
8166
8167                         btrfs_set_disk_key_objectid(&copy_key,
8168                                                     rec->info_objectid);
8169                         btrfs_set_disk_key_type(&copy_key, 0);
8170                         btrfs_set_disk_key_offset(&copy_key, 0);
8171
8172                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8173                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8174
8175                         btrfs_set_extent_flags(leaf, ei,
8176                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8177                 }
8178
8179                 btrfs_mark_buffer_dirty(leaf);
8180                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8181                                                rec->max_size, 1, 0);
8182                 if (ret)
8183                         goto fail;
8184                 btrfs_release_path(path);
8185         }
8186
8187         if (back->is_data) {
8188                 u64 parent;
8189                 int i;
8190
8191                 dback = to_data_backref(back);
8192                 if (back->full_backref)
8193                         parent = dback->parent;
8194                 else
8195                         parent = 0;
8196
8197                 for (i = 0; i < dback->found_ref; i++) {
8198                         /* if parent != 0, we're doing a full backref
8199                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8200                          * just makes the backref allocator create a data
8201                          * backref
8202                          */
8203                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8204                                                    rec->start, rec->max_size,
8205                                                    parent,
8206                                                    dback->root,
8207                                                    parent ?
8208                                                    BTRFS_FIRST_FREE_OBJECTID :
8209                                                    dback->owner,
8210                                                    dback->offset);
8211                         if (ret)
8212                                 break;
8213                 }
8214                 fprintf(stderr, "adding new data backref"
8215                                 " on %llu %s %llu owner %llu"
8216                                 " offset %llu found %d\n",
8217                                 (unsigned long long)rec->start,
8218                                 back->full_backref ?
8219                                 "parent" : "root",
8220                                 back->full_backref ?
8221                                 (unsigned long long)parent :
8222                                 (unsigned long long)dback->root,
8223                                 (unsigned long long)dback->owner,
8224                                 (unsigned long long)dback->offset,
8225                                 dback->found_ref);
8226         } else {
8227                 u64 parent;
8228                 struct tree_backref *tback;
8229
8230                 tback = to_tree_backref(back);
8231                 if (back->full_backref)
8232                         parent = tback->parent;
8233                 else
8234                         parent = 0;
8235
8236                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8237                                            rec->start, rec->max_size,
8238                                            parent, tback->root, 0, 0);
8239                 fprintf(stderr, "adding new tree backref on "
8240                         "start %llu len %llu parent %llu root %llu\n",
8241                         rec->start, rec->max_size, parent, tback->root);
8242         }
8243 fail:
8244         btrfs_release_path(path);
8245         return ret;
8246 }
8247
8248 static struct extent_entry *find_entry(struct list_head *entries,
8249                                        u64 bytenr, u64 bytes)
8250 {
8251         struct extent_entry *entry = NULL;
8252
8253         list_for_each_entry(entry, entries, list) {
8254                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8255                         return entry;
8256         }
8257
8258         return NULL;
8259 }
8260
8261 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8262 {
8263         struct extent_entry *entry, *best = NULL, *prev = NULL;
8264
8265         list_for_each_entry(entry, entries, list) {
8266                 /*
8267                  * If there are as many broken entries as entries then we know
8268                  * not to trust this particular entry.
8269                  */
8270                 if (entry->broken == entry->count)
8271                         continue;
8272
8273                 /*
8274                  * Special case, when there are only two entries and 'best' is
8275                  * the first one
8276                  */
8277                 if (!prev) {
8278                         best = entry;
8279                         prev = entry;
8280                         continue;
8281                 }
8282
8283                 /*
8284                  * If our current entry == best then we can't be sure our best
8285                  * is really the best, so we need to keep searching.
8286                  */
8287                 if (best && best->count == entry->count) {
8288                         prev = entry;
8289                         best = NULL;
8290                         continue;
8291                 }
8292
8293                 /* Prev == entry, not good enough, have to keep searching */
8294                 if (!prev->broken && prev->count == entry->count)
8295                         continue;
8296
8297                 if (!best)
8298                         best = (prev->count > entry->count) ? prev : entry;
8299                 else if (best->count < entry->count)
8300                         best = entry;
8301                 prev = entry;
8302         }
8303
8304         return best;
8305 }
8306
8307 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8308                       struct data_backref *dback, struct extent_entry *entry)
8309 {
8310         struct btrfs_trans_handle *trans;
8311         struct btrfs_root *root;
8312         struct btrfs_file_extent_item *fi;
8313         struct extent_buffer *leaf;
8314         struct btrfs_key key;
8315         u64 bytenr, bytes;
8316         int ret, err;
8317
8318         key.objectid = dback->root;
8319         key.type = BTRFS_ROOT_ITEM_KEY;
8320         key.offset = (u64)-1;
8321         root = btrfs_read_fs_root(info, &key);
8322         if (IS_ERR(root)) {
8323                 fprintf(stderr, "Couldn't find root for our ref\n");
8324                 return -EINVAL;
8325         }
8326
8327         /*
8328          * The backref points to the original offset of the extent if it was
8329          * split, so we need to search down to the offset we have and then walk
8330          * forward until we find the backref we're looking for.
8331          */
8332         key.objectid = dback->owner;
8333         key.type = BTRFS_EXTENT_DATA_KEY;
8334         key.offset = dback->offset;
8335         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8336         if (ret < 0) {
8337                 fprintf(stderr, "Error looking up ref %d\n", ret);
8338                 return ret;
8339         }
8340
8341         while (1) {
8342                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8343                         ret = btrfs_next_leaf(root, path);
8344                         if (ret) {
8345                                 fprintf(stderr, "Couldn't find our ref, next\n");
8346                                 return -EINVAL;
8347                         }
8348                 }
8349                 leaf = path->nodes[0];
8350                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8351                 if (key.objectid != dback->owner ||
8352                     key.type != BTRFS_EXTENT_DATA_KEY) {
8353                         fprintf(stderr, "Couldn't find our ref, search\n");
8354                         return -EINVAL;
8355                 }
8356                 fi = btrfs_item_ptr(leaf, path->slots[0],
8357                                     struct btrfs_file_extent_item);
8358                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8359                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8360
8361                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8362                         break;
8363                 path->slots[0]++;
8364         }
8365
8366         btrfs_release_path(path);
8367
8368         trans = btrfs_start_transaction(root, 1);
8369         if (IS_ERR(trans))
8370                 return PTR_ERR(trans);
8371
8372         /*
8373          * Ok we have the key of the file extent we want to fix, now we can cow
8374          * down to the thing and fix it.
8375          */
8376         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8377         if (ret < 0) {
8378                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8379                         key.objectid, key.type, key.offset, ret);
8380                 goto out;
8381         }
8382         if (ret > 0) {
8383                 fprintf(stderr, "Well that's odd, we just found this key "
8384                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8385                         key.offset);
8386                 ret = -EINVAL;
8387                 goto out;
8388         }
8389         leaf = path->nodes[0];
8390         fi = btrfs_item_ptr(leaf, path->slots[0],
8391                             struct btrfs_file_extent_item);
8392
8393         if (btrfs_file_extent_compression(leaf, fi) &&
8394             dback->disk_bytenr != entry->bytenr) {
8395                 fprintf(stderr, "Ref doesn't match the record start and is "
8396                         "compressed, please take a btrfs-image of this file "
8397                         "system and send it to a btrfs developer so they can "
8398                         "complete this functionality for bytenr %Lu\n",
8399                         dback->disk_bytenr);
8400                 ret = -EINVAL;
8401                 goto out;
8402         }
8403
8404         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8405                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8406         } else if (dback->disk_bytenr > entry->bytenr) {
8407                 u64 off_diff, offset;
8408
8409                 off_diff = dback->disk_bytenr - entry->bytenr;
8410                 offset = btrfs_file_extent_offset(leaf, fi);
8411                 if (dback->disk_bytenr + offset +
8412                     btrfs_file_extent_num_bytes(leaf, fi) >
8413                     entry->bytenr + entry->bytes) {
8414                         fprintf(stderr, "Ref is past the entry end, please "
8415                                 "take a btrfs-image of this file system and "
8416                                 "send it to a btrfs developer, ref %Lu\n",
8417                                 dback->disk_bytenr);
8418                         ret = -EINVAL;
8419                         goto out;
8420                 }
8421                 offset += off_diff;
8422                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8423                 btrfs_set_file_extent_offset(leaf, fi, offset);
8424         } else if (dback->disk_bytenr < entry->bytenr) {
8425                 u64 offset;
8426
8427                 offset = btrfs_file_extent_offset(leaf, fi);
8428                 if (dback->disk_bytenr + offset < entry->bytenr) {
8429                         fprintf(stderr, "Ref is before the entry start, please"
8430                                 " take a btrfs-image of this file system and "
8431                                 "send it to a btrfs developer, ref %Lu\n",
8432                                 dback->disk_bytenr);
8433                         ret = -EINVAL;
8434                         goto out;
8435                 }
8436
8437                 offset += dback->disk_bytenr;
8438                 offset -= entry->bytenr;
8439                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8440                 btrfs_set_file_extent_offset(leaf, fi, offset);
8441         }
8442
8443         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8444
8445         /*
8446          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8447          * only do this if we aren't using compression, otherwise it's a
8448          * trickier case.
8449          */
8450         if (!btrfs_file_extent_compression(leaf, fi))
8451                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8452         else
8453                 printf("ram bytes may be wrong?\n");
8454         btrfs_mark_buffer_dirty(leaf);
8455 out:
8456         err = btrfs_commit_transaction(trans, root);
8457         btrfs_release_path(path);
8458         return ret ? ret : err;
8459 }
8460
8461 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8462                            struct extent_record *rec)
8463 {
8464         struct extent_backref *back;
8465         struct data_backref *dback;
8466         struct extent_entry *entry, *best = NULL;
8467         LIST_HEAD(entries);
8468         int nr_entries = 0;
8469         int broken_entries = 0;
8470         int ret = 0;
8471         short mismatch = 0;
8472
8473         /*
8474          * Metadata is easy and the backrefs should always agree on bytenr and
8475          * size, if not we've got bigger issues.
8476          */
8477         if (rec->metadata)
8478                 return 0;
8479
8480         list_for_each_entry(back, &rec->backrefs, list) {
8481                 if (back->full_backref || !back->is_data)
8482                         continue;
8483
8484                 dback = to_data_backref(back);
8485
8486                 /*
8487                  * We only pay attention to backrefs that we found a real
8488                  * backref for.
8489                  */
8490                 if (dback->found_ref == 0)
8491                         continue;
8492
8493                 /*
8494                  * For now we only catch when the bytes don't match, not the
8495                  * bytenr.  We can easily do this at the same time, but I want
8496                  * to have a fs image to test on before we just add repair
8497                  * functionality willy-nilly so we know we won't screw up the
8498                  * repair.
8499                  */
8500
8501                 entry = find_entry(&entries, dback->disk_bytenr,
8502                                    dback->bytes);
8503                 if (!entry) {
8504                         entry = malloc(sizeof(struct extent_entry));
8505                         if (!entry) {
8506                                 ret = -ENOMEM;
8507                                 goto out;
8508                         }
8509                         memset(entry, 0, sizeof(*entry));
8510                         entry->bytenr = dback->disk_bytenr;
8511                         entry->bytes = dback->bytes;
8512                         list_add_tail(&entry->list, &entries);
8513                         nr_entries++;
8514                 }
8515
8516                 /*
8517                  * If we only have on entry we may think the entries agree when
8518                  * in reality they don't so we have to do some extra checking.
8519                  */
8520                 if (dback->disk_bytenr != rec->start ||
8521                     dback->bytes != rec->nr || back->broken)
8522                         mismatch = 1;
8523
8524                 if (back->broken) {
8525                         entry->broken++;
8526                         broken_entries++;
8527                 }
8528
8529                 entry->count++;
8530         }
8531
8532         /* Yay all the backrefs agree, carry on good sir */
8533         if (nr_entries <= 1 && !mismatch)
8534                 goto out;
8535
8536         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8537                 "%Lu\n", rec->start);
8538
8539         /*
8540          * First we want to see if the backrefs can agree amongst themselves who
8541          * is right, so figure out which one of the entries has the highest
8542          * count.
8543          */
8544         best = find_most_right_entry(&entries);
8545
8546         /*
8547          * Ok so we may have an even split between what the backrefs think, so
8548          * this is where we use the extent ref to see what it thinks.
8549          */
8550         if (!best) {
8551                 entry = find_entry(&entries, rec->start, rec->nr);
8552                 if (!entry && (!broken_entries || !rec->found_rec)) {
8553                         fprintf(stderr, "Backrefs don't agree with each other "
8554                                 "and extent record doesn't agree with anybody,"
8555                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8556                                 rec->start, rec->nr);
8557                         ret = -EINVAL;
8558                         goto out;
8559                 } else if (!entry) {
8560                         /*
8561                          * Ok our backrefs were broken, we'll assume this is the
8562                          * correct value and add an entry for this range.
8563                          */
8564                         entry = malloc(sizeof(struct extent_entry));
8565                         if (!entry) {
8566                                 ret = -ENOMEM;
8567                                 goto out;
8568                         }
8569                         memset(entry, 0, sizeof(*entry));
8570                         entry->bytenr = rec->start;
8571                         entry->bytes = rec->nr;
8572                         list_add_tail(&entry->list, &entries);
8573                         nr_entries++;
8574                 }
8575                 entry->count++;
8576                 best = find_most_right_entry(&entries);
8577                 if (!best) {
8578                         fprintf(stderr, "Backrefs and extent record evenly "
8579                                 "split on who is right, this is going to "
8580                                 "require user input to fix bytenr %Lu bytes "
8581                                 "%Lu\n", rec->start, rec->nr);
8582                         ret = -EINVAL;
8583                         goto out;
8584                 }
8585         }
8586
8587         /*
8588          * I don't think this can happen currently as we'll abort() if we catch
8589          * this case higher up, but in case somebody removes that we still can't
8590          * deal with it properly here yet, so just bail out of that's the case.
8591          */
8592         if (best->bytenr != rec->start) {
8593                 fprintf(stderr, "Extent start and backref starts don't match, "
8594                         "please use btrfs-image on this file system and send "
8595                         "it to a btrfs developer so they can make fsck fix "
8596                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8597                         rec->start, rec->nr);
8598                 ret = -EINVAL;
8599                 goto out;
8600         }
8601
8602         /*
8603          * Ok great we all agreed on an extent record, let's go find the real
8604          * references and fix up the ones that don't match.
8605          */
8606         list_for_each_entry(back, &rec->backrefs, list) {
8607                 if (back->full_backref || !back->is_data)
8608                         continue;
8609
8610                 dback = to_data_backref(back);
8611
8612                 /*
8613                  * Still ignoring backrefs that don't have a real ref attached
8614                  * to them.
8615                  */
8616                 if (dback->found_ref == 0)
8617                         continue;
8618
8619                 if (dback->bytes == best->bytes &&
8620                     dback->disk_bytenr == best->bytenr)
8621                         continue;
8622
8623                 ret = repair_ref(info, path, dback, best);
8624                 if (ret)
8625                         goto out;
8626         }
8627
8628         /*
8629          * Ok we messed with the actual refs, which means we need to drop our
8630          * entire cache and go back and rescan.  I know this is a huge pain and
8631          * adds a lot of extra work, but it's the only way to be safe.  Once all
8632          * the backrefs agree we may not need to do anything to the extent
8633          * record itself.
8634          */
8635         ret = -EAGAIN;
8636 out:
8637         while (!list_empty(&entries)) {
8638                 entry = list_entry(entries.next, struct extent_entry, list);
8639                 list_del_init(&entry->list);
8640                 free(entry);
8641         }
8642         return ret;
8643 }
8644
8645 static int process_duplicates(struct cache_tree *extent_cache,
8646                               struct extent_record *rec)
8647 {
8648         struct extent_record *good, *tmp;
8649         struct cache_extent *cache;
8650         int ret;
8651
8652         /*
8653          * If we found a extent record for this extent then return, or if we
8654          * have more than one duplicate we are likely going to need to delete
8655          * something.
8656          */
8657         if (rec->found_rec || rec->num_duplicates > 1)
8658                 return 0;
8659
8660         /* Shouldn't happen but just in case */
8661         BUG_ON(!rec->num_duplicates);
8662
8663         /*
8664          * So this happens if we end up with a backref that doesn't match the
8665          * actual extent entry.  So either the backref is bad or the extent
8666          * entry is bad.  Either way we want to have the extent_record actually
8667          * reflect what we found in the extent_tree, so we need to take the
8668          * duplicate out and use that as the extent_record since the only way we
8669          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8670          */
8671         remove_cache_extent(extent_cache, &rec->cache);
8672
8673         good = to_extent_record(rec->dups.next);
8674         list_del_init(&good->list);
8675         INIT_LIST_HEAD(&good->backrefs);
8676         INIT_LIST_HEAD(&good->dups);
8677         good->cache.start = good->start;
8678         good->cache.size = good->nr;
8679         good->content_checked = 0;
8680         good->owner_ref_checked = 0;
8681         good->num_duplicates = 0;
8682         good->refs = rec->refs;
8683         list_splice_init(&rec->backrefs, &good->backrefs);
8684         while (1) {
8685                 cache = lookup_cache_extent(extent_cache, good->start,
8686                                             good->nr);
8687                 if (!cache)
8688                         break;
8689                 tmp = container_of(cache, struct extent_record, cache);
8690
8691                 /*
8692                  * If we find another overlapping extent and it's found_rec is
8693                  * set then it's a duplicate and we need to try and delete
8694                  * something.
8695                  */
8696                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8697                         if (list_empty(&good->list))
8698                                 list_add_tail(&good->list,
8699                                               &duplicate_extents);
8700                         good->num_duplicates += tmp->num_duplicates + 1;
8701                         list_splice_init(&tmp->dups, &good->dups);
8702                         list_del_init(&tmp->list);
8703                         list_add_tail(&tmp->list, &good->dups);
8704                         remove_cache_extent(extent_cache, &tmp->cache);
8705                         continue;
8706                 }
8707
8708                 /*
8709                  * Ok we have another non extent item backed extent rec, so lets
8710                  * just add it to this extent and carry on like we did above.
8711                  */
8712                 good->refs += tmp->refs;
8713                 list_splice_init(&tmp->backrefs, &good->backrefs);
8714                 remove_cache_extent(extent_cache, &tmp->cache);
8715                 free(tmp);
8716         }
8717         ret = insert_cache_extent(extent_cache, &good->cache);
8718         BUG_ON(ret);
8719         free(rec);
8720         return good->num_duplicates ? 0 : 1;
8721 }
8722
8723 static int delete_duplicate_records(struct btrfs_root *root,
8724                                     struct extent_record *rec)
8725 {
8726         struct btrfs_trans_handle *trans;
8727         LIST_HEAD(delete_list);
8728         struct btrfs_path path;
8729         struct extent_record *tmp, *good, *n;
8730         int nr_del = 0;
8731         int ret = 0, err;
8732         struct btrfs_key key;
8733
8734         btrfs_init_path(&path);
8735
8736         good = rec;
8737         /* Find the record that covers all of the duplicates. */
8738         list_for_each_entry(tmp, &rec->dups, list) {
8739                 if (good->start < tmp->start)
8740                         continue;
8741                 if (good->nr > tmp->nr)
8742                         continue;
8743
8744                 if (tmp->start + tmp->nr < good->start + good->nr) {
8745                         fprintf(stderr, "Ok we have overlapping extents that "
8746                                 "aren't completely covered by each other, this "
8747                                 "is going to require more careful thought.  "
8748                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8749                                 tmp->start, tmp->nr, good->start, good->nr);
8750                         abort();
8751                 }
8752                 good = tmp;
8753         }
8754
8755         if (good != rec)
8756                 list_add_tail(&rec->list, &delete_list);
8757
8758         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8759                 if (tmp == good)
8760                         continue;
8761                 list_move_tail(&tmp->list, &delete_list);
8762         }
8763
8764         root = root->fs_info->extent_root;
8765         trans = btrfs_start_transaction(root, 1);
8766         if (IS_ERR(trans)) {
8767                 ret = PTR_ERR(trans);
8768                 goto out;
8769         }
8770
8771         list_for_each_entry(tmp, &delete_list, list) {
8772                 if (tmp->found_rec == 0)
8773                         continue;
8774                 key.objectid = tmp->start;
8775                 key.type = BTRFS_EXTENT_ITEM_KEY;
8776                 key.offset = tmp->nr;
8777
8778                 /* Shouldn't happen but just in case */
8779                 if (tmp->metadata) {
8780                         fprintf(stderr, "Well this shouldn't happen, extent "
8781                                 "record overlaps but is metadata? "
8782                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8783                         abort();
8784                 }
8785
8786                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8787                 if (ret) {
8788                         if (ret > 0)
8789                                 ret = -EINVAL;
8790                         break;
8791                 }
8792                 ret = btrfs_del_item(trans, root, &path);
8793                 if (ret)
8794                         break;
8795                 btrfs_release_path(&path);
8796                 nr_del++;
8797         }
8798         err = btrfs_commit_transaction(trans, root);
8799         if (err && !ret)
8800                 ret = err;
8801 out:
8802         while (!list_empty(&delete_list)) {
8803                 tmp = to_extent_record(delete_list.next);
8804                 list_del_init(&tmp->list);
8805                 if (tmp == rec)
8806                         continue;
8807                 free(tmp);
8808         }
8809
8810         while (!list_empty(&rec->dups)) {
8811                 tmp = to_extent_record(rec->dups.next);
8812                 list_del_init(&tmp->list);
8813                 free(tmp);
8814         }
8815
8816         btrfs_release_path(&path);
8817
8818         if (!ret && !nr_del)
8819                 rec->num_duplicates = 0;
8820
8821         return ret ? ret : nr_del;
8822 }
8823
8824 static int find_possible_backrefs(struct btrfs_fs_info *info,
8825                                   struct btrfs_path *path,
8826                                   struct cache_tree *extent_cache,
8827                                   struct extent_record *rec)
8828 {
8829         struct btrfs_root *root;
8830         struct extent_backref *back;
8831         struct data_backref *dback;
8832         struct cache_extent *cache;
8833         struct btrfs_file_extent_item *fi;
8834         struct btrfs_key key;
8835         u64 bytenr, bytes;
8836         int ret;
8837
8838         list_for_each_entry(back, &rec->backrefs, list) {
8839                 /* Don't care about full backrefs (poor unloved backrefs) */
8840                 if (back->full_backref || !back->is_data)
8841                         continue;
8842
8843                 dback = to_data_backref(back);
8844
8845                 /* We found this one, we don't need to do a lookup */
8846                 if (dback->found_ref)
8847                         continue;
8848
8849                 key.objectid = dback->root;
8850                 key.type = BTRFS_ROOT_ITEM_KEY;
8851                 key.offset = (u64)-1;
8852
8853                 root = btrfs_read_fs_root(info, &key);
8854
8855                 /* No root, definitely a bad ref, skip */
8856                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8857                         continue;
8858                 /* Other err, exit */
8859                 if (IS_ERR(root))
8860                         return PTR_ERR(root);
8861
8862                 key.objectid = dback->owner;
8863                 key.type = BTRFS_EXTENT_DATA_KEY;
8864                 key.offset = dback->offset;
8865                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8866                 if (ret) {
8867                         btrfs_release_path(path);
8868                         if (ret < 0)
8869                                 return ret;
8870                         /* Didn't find it, we can carry on */
8871                         ret = 0;
8872                         continue;
8873                 }
8874
8875                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8876                                     struct btrfs_file_extent_item);
8877                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8878                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8879                 btrfs_release_path(path);
8880                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8881                 if (cache) {
8882                         struct extent_record *tmp;
8883                         tmp = container_of(cache, struct extent_record, cache);
8884
8885                         /*
8886                          * If we found an extent record for the bytenr for this
8887                          * particular backref then we can't add it to our
8888                          * current extent record.  We only want to add backrefs
8889                          * that don't have a corresponding extent item in the
8890                          * extent tree since they likely belong to this record
8891                          * and we need to fix it if it doesn't match bytenrs.
8892                          */
8893                         if  (tmp->found_rec)
8894                                 continue;
8895                 }
8896
8897                 dback->found_ref += 1;
8898                 dback->disk_bytenr = bytenr;
8899                 dback->bytes = bytes;
8900
8901                 /*
8902                  * Set this so the verify backref code knows not to trust the
8903                  * values in this backref.
8904                  */
8905                 back->broken = 1;
8906         }
8907
8908         return 0;
8909 }
8910
8911 /*
8912  * Record orphan data ref into corresponding root.
8913  *
8914  * Return 0 if the extent item contains data ref and recorded.
8915  * Return 1 if the extent item contains no useful data ref
8916  *   On that case, it may contains only shared_dataref or metadata backref
8917  *   or the file extent exists(this should be handled by the extent bytenr
8918  *   recovery routine)
8919  * Return <0 if something goes wrong.
8920  */
8921 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8922                                       struct extent_record *rec)
8923 {
8924         struct btrfs_key key;
8925         struct btrfs_root *dest_root;
8926         struct extent_backref *back;
8927         struct data_backref *dback;
8928         struct orphan_data_extent *orphan;
8929         struct btrfs_path path;
8930         int recorded_data_ref = 0;
8931         int ret = 0;
8932
8933         if (rec->metadata)
8934                 return 1;
8935         btrfs_init_path(&path);
8936         list_for_each_entry(back, &rec->backrefs, list) {
8937                 if (back->full_backref || !back->is_data ||
8938                     !back->found_extent_tree)
8939                         continue;
8940                 dback = to_data_backref(back);
8941                 if (dback->found_ref)
8942                         continue;
8943                 key.objectid = dback->root;
8944                 key.type = BTRFS_ROOT_ITEM_KEY;
8945                 key.offset = (u64)-1;
8946
8947                 dest_root = btrfs_read_fs_root(fs_info, &key);
8948
8949                 /* For non-exist root we just skip it */
8950                 if (IS_ERR(dest_root) || !dest_root)
8951                         continue;
8952
8953                 key.objectid = dback->owner;
8954                 key.type = BTRFS_EXTENT_DATA_KEY;
8955                 key.offset = dback->offset;
8956
8957                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8958                 btrfs_release_path(&path);
8959                 /*
8960                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8961                  * we need to record it for inode/file extent rebuild.
8962                  * For ret > 0, we record it only for file extent rebuild.
8963                  * For ret == 0, the file extent exists but only bytenr
8964                  * mismatch, let the original bytenr fix routine to handle,
8965                  * don't record it.
8966                  */
8967                 if (ret == 0)
8968                         continue;
8969                 ret = 0;
8970                 orphan = malloc(sizeof(*orphan));
8971                 if (!orphan) {
8972                         ret = -ENOMEM;
8973                         goto out;
8974                 }
8975                 INIT_LIST_HEAD(&orphan->list);
8976                 orphan->root = dback->root;
8977                 orphan->objectid = dback->owner;
8978                 orphan->offset = dback->offset;
8979                 orphan->disk_bytenr = rec->cache.start;
8980                 orphan->disk_len = rec->cache.size;
8981                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8982                 recorded_data_ref = 1;
8983         }
8984 out:
8985         btrfs_release_path(&path);
8986         if (!ret)
8987                 return !recorded_data_ref;
8988         else
8989                 return ret;
8990 }
8991
8992 /*
8993  * when an incorrect extent item is found, this will delete
8994  * all of the existing entries for it and recreate them
8995  * based on what the tree scan found.
8996  */
8997 static int fixup_extent_refs(struct btrfs_fs_info *info,
8998                              struct cache_tree *extent_cache,
8999                              struct extent_record *rec)
9000 {
9001         struct btrfs_trans_handle *trans = NULL;
9002         int ret;
9003         struct btrfs_path path;
9004         struct list_head *cur = rec->backrefs.next;
9005         struct cache_extent *cache;
9006         struct extent_backref *back;
9007         int allocated = 0;
9008         u64 flags = 0;
9009
9010         if (rec->flag_block_full_backref)
9011                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9012
9013         btrfs_init_path(&path);
9014         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9015                 /*
9016                  * Sometimes the backrefs themselves are so broken they don't
9017                  * get attached to any meaningful rec, so first go back and
9018                  * check any of our backrefs that we couldn't find and throw
9019                  * them into the list if we find the backref so that
9020                  * verify_backrefs can figure out what to do.
9021                  */
9022                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9023                 if (ret < 0)
9024                         goto out;
9025         }
9026
9027         /* step one, make sure all of the backrefs agree */
9028         ret = verify_backrefs(info, &path, rec);
9029         if (ret < 0)
9030                 goto out;
9031
9032         trans = btrfs_start_transaction(info->extent_root, 1);
9033         if (IS_ERR(trans)) {
9034                 ret = PTR_ERR(trans);
9035                 goto out;
9036         }
9037
9038         /* step two, delete all the existing records */
9039         ret = delete_extent_records(trans, info->extent_root, &path,
9040                                     rec->start);
9041
9042         if (ret < 0)
9043                 goto out;
9044
9045         /* was this block corrupt?  If so, don't add references to it */
9046         cache = lookup_cache_extent(info->corrupt_blocks,
9047                                     rec->start, rec->max_size);
9048         if (cache) {
9049                 ret = 0;
9050                 goto out;
9051         }
9052
9053         /* step three, recreate all the refs we did find */
9054         while(cur != &rec->backrefs) {
9055                 back = to_extent_backref(cur);
9056                 cur = cur->next;
9057
9058                 /*
9059                  * if we didn't find any references, don't create a
9060                  * new extent record
9061                  */
9062                 if (!back->found_ref)
9063                         continue;
9064
9065                 rec->bad_full_backref = 0;
9066                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9067                 allocated = 1;
9068
9069                 if (ret)
9070                         goto out;
9071         }
9072 out:
9073         if (trans) {
9074                 int err = btrfs_commit_transaction(trans, info->extent_root);
9075                 if (!ret)
9076                         ret = err;
9077         }
9078
9079         if (!ret)
9080                 fprintf(stderr, "Repaired extent references for %llu\n",
9081                                 (unsigned long long)rec->start);
9082
9083         btrfs_release_path(&path);
9084         return ret;
9085 }
9086
9087 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9088                               struct extent_record *rec)
9089 {
9090         struct btrfs_trans_handle *trans;
9091         struct btrfs_root *root = fs_info->extent_root;
9092         struct btrfs_path path;
9093         struct btrfs_extent_item *ei;
9094         struct btrfs_key key;
9095         u64 flags;
9096         int ret = 0;
9097
9098         key.objectid = rec->start;
9099         if (rec->metadata) {
9100                 key.type = BTRFS_METADATA_ITEM_KEY;
9101                 key.offset = rec->info_level;
9102         } else {
9103                 key.type = BTRFS_EXTENT_ITEM_KEY;
9104                 key.offset = rec->max_size;
9105         }
9106
9107         trans = btrfs_start_transaction(root, 0);
9108         if (IS_ERR(trans))
9109                 return PTR_ERR(trans);
9110
9111         btrfs_init_path(&path);
9112         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9113         if (ret < 0) {
9114                 btrfs_release_path(&path);
9115                 btrfs_commit_transaction(trans, root);
9116                 return ret;
9117         } else if (ret) {
9118                 fprintf(stderr, "Didn't find extent for %llu\n",
9119                         (unsigned long long)rec->start);
9120                 btrfs_release_path(&path);
9121                 btrfs_commit_transaction(trans, root);
9122                 return -ENOENT;
9123         }
9124
9125         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9126                             struct btrfs_extent_item);
9127         flags = btrfs_extent_flags(path.nodes[0], ei);
9128         if (rec->flag_block_full_backref) {
9129                 fprintf(stderr, "setting full backref on %llu\n",
9130                         (unsigned long long)key.objectid);
9131                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9132         } else {
9133                 fprintf(stderr, "clearing full backref on %llu\n",
9134                         (unsigned long long)key.objectid);
9135                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9136         }
9137         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9138         btrfs_mark_buffer_dirty(path.nodes[0]);
9139         btrfs_release_path(&path);
9140         ret = btrfs_commit_transaction(trans, root);
9141         if (!ret)
9142                 fprintf(stderr, "Repaired extent flags for %llu\n",
9143                                 (unsigned long long)rec->start);
9144
9145         return ret;
9146 }
9147
9148 /* right now we only prune from the extent allocation tree */
9149 static int prune_one_block(struct btrfs_trans_handle *trans,
9150                            struct btrfs_fs_info *info,
9151                            struct btrfs_corrupt_block *corrupt)
9152 {
9153         int ret;
9154         struct btrfs_path path;
9155         struct extent_buffer *eb;
9156         u64 found;
9157         int slot;
9158         int nritems;
9159         int level = corrupt->level + 1;
9160
9161         btrfs_init_path(&path);
9162 again:
9163         /* we want to stop at the parent to our busted block */
9164         path.lowest_level = level;
9165
9166         ret = btrfs_search_slot(trans, info->extent_root,
9167                                 &corrupt->key, &path, -1, 1);
9168
9169         if (ret < 0)
9170                 goto out;
9171
9172         eb = path.nodes[level];
9173         if (!eb) {
9174                 ret = -ENOENT;
9175                 goto out;
9176         }
9177
9178         /*
9179          * hopefully the search gave us the block we want to prune,
9180          * lets try that first
9181          */
9182         slot = path.slots[level];
9183         found =  btrfs_node_blockptr(eb, slot);
9184         if (found == corrupt->cache.start)
9185                 goto del_ptr;
9186
9187         nritems = btrfs_header_nritems(eb);
9188
9189         /* the search failed, lets scan this node and hope we find it */
9190         for (slot = 0; slot < nritems; slot++) {
9191                 found =  btrfs_node_blockptr(eb, slot);
9192                 if (found == corrupt->cache.start)
9193                         goto del_ptr;
9194         }
9195         /*
9196          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9197          * to this block
9198          */
9199         if (eb == info->extent_root->node) {
9200                 ret = -ENOENT;
9201                 goto out;
9202         } else {
9203                 level++;
9204                 btrfs_release_path(&path);
9205                 goto again;
9206         }
9207
9208 del_ptr:
9209         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9210         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9211
9212 out:
9213         btrfs_release_path(&path);
9214         return ret;
9215 }
9216
9217 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9218 {
9219         struct btrfs_trans_handle *trans = NULL;
9220         struct cache_extent *cache;
9221         struct btrfs_corrupt_block *corrupt;
9222
9223         while (1) {
9224                 cache = search_cache_extent(info->corrupt_blocks, 0);
9225                 if (!cache)
9226                         break;
9227                 if (!trans) {
9228                         trans = btrfs_start_transaction(info->extent_root, 1);
9229                         if (IS_ERR(trans))
9230                                 return PTR_ERR(trans);
9231                 }
9232                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9233                 prune_one_block(trans, info, corrupt);
9234                 remove_cache_extent(info->corrupt_blocks, cache);
9235         }
9236         if (trans)
9237                 return btrfs_commit_transaction(trans, info->extent_root);
9238         return 0;
9239 }
9240
9241 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9242 {
9243         struct btrfs_block_group_cache *cache;
9244         u64 start, end;
9245         int ret;
9246
9247         while (1) {
9248                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9249                                             &start, &end, EXTENT_DIRTY);
9250                 if (ret)
9251                         break;
9252                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9253         }
9254
9255         start = 0;
9256         while (1) {
9257                 cache = btrfs_lookup_first_block_group(fs_info, start);
9258                 if (!cache)
9259                         break;
9260                 if (cache->cached)
9261                         cache->cached = 0;
9262                 start = cache->key.objectid + cache->key.offset;
9263         }
9264 }
9265
9266 static int check_extent_refs(struct btrfs_root *root,
9267                              struct cache_tree *extent_cache)
9268 {
9269         struct extent_record *rec;
9270         struct cache_extent *cache;
9271         int ret = 0;
9272         int had_dups = 0;
9273
9274         if (repair) {
9275                 /*
9276                  * if we're doing a repair, we have to make sure
9277                  * we don't allocate from the problem extents.
9278                  * In the worst case, this will be all the
9279                  * extents in the FS
9280                  */
9281                 cache = search_cache_extent(extent_cache, 0);
9282                 while(cache) {
9283                         rec = container_of(cache, struct extent_record, cache);
9284                         set_extent_dirty(root->fs_info->excluded_extents,
9285                                          rec->start,
9286                                          rec->start + rec->max_size - 1);
9287                         cache = next_cache_extent(cache);
9288                 }
9289
9290                 /* pin down all the corrupted blocks too */
9291                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9292                 while(cache) {
9293                         set_extent_dirty(root->fs_info->excluded_extents,
9294                                          cache->start,
9295                                          cache->start + cache->size - 1);
9296                         cache = next_cache_extent(cache);
9297                 }
9298                 prune_corrupt_blocks(root->fs_info);
9299                 reset_cached_block_groups(root->fs_info);
9300         }
9301
9302         reset_cached_block_groups(root->fs_info);
9303
9304         /*
9305          * We need to delete any duplicate entries we find first otherwise we
9306          * could mess up the extent tree when we have backrefs that actually
9307          * belong to a different extent item and not the weird duplicate one.
9308          */
9309         while (repair && !list_empty(&duplicate_extents)) {
9310                 rec = to_extent_record(duplicate_extents.next);
9311                 list_del_init(&rec->list);
9312
9313                 /* Sometimes we can find a backref before we find an actual
9314                  * extent, so we need to process it a little bit to see if there
9315                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9316                  * if this is a backref screwup.  If we need to delete stuff
9317                  * process_duplicates() will return 0, otherwise it will return
9318                  * 1 and we
9319                  */
9320                 if (process_duplicates(extent_cache, rec))
9321                         continue;
9322                 ret = delete_duplicate_records(root, rec);
9323                 if (ret < 0)
9324                         return ret;
9325                 /*
9326                  * delete_duplicate_records will return the number of entries
9327                  * deleted, so if it's greater than 0 then we know we actually
9328                  * did something and we need to remove.
9329                  */
9330                 if (ret)
9331                         had_dups = 1;
9332         }
9333
9334         if (had_dups)
9335                 return -EAGAIN;
9336
9337         while(1) {
9338                 int cur_err = 0;
9339                 int fix = 0;
9340
9341                 cache = search_cache_extent(extent_cache, 0);
9342                 if (!cache)
9343                         break;
9344                 rec = container_of(cache, struct extent_record, cache);
9345                 if (rec->num_duplicates) {
9346                         fprintf(stderr, "extent item %llu has multiple extent "
9347                                 "items\n", (unsigned long long)rec->start);
9348                         cur_err = 1;
9349                 }
9350
9351                 if (rec->refs != rec->extent_item_refs) {
9352                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9353                                 (unsigned long long)rec->start,
9354                                 (unsigned long long)rec->nr);
9355                         fprintf(stderr, "extent item %llu, found %llu\n",
9356                                 (unsigned long long)rec->extent_item_refs,
9357                                 (unsigned long long)rec->refs);
9358                         ret = record_orphan_data_extents(root->fs_info, rec);
9359                         if (ret < 0)
9360                                 goto repair_abort;
9361                         fix = ret;
9362                         cur_err = 1;
9363                 }
9364                 if (all_backpointers_checked(rec, 1)) {
9365                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9366                                 (unsigned long long)rec->start,
9367                                 (unsigned long long)rec->nr);
9368                         fix = 1;
9369                         cur_err = 1;
9370                 }
9371                 if (!rec->owner_ref_checked) {
9372                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9373                                 (unsigned long long)rec->start,
9374                                 (unsigned long long)rec->nr);
9375                         fix = 1;
9376                         cur_err = 1;
9377                 }
9378
9379                 if (repair && fix) {
9380                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9381                         if (ret)
9382                                 goto repair_abort;
9383                 }
9384
9385
9386                 if (rec->bad_full_backref) {
9387                         fprintf(stderr, "bad full backref, on [%llu]\n",
9388                                 (unsigned long long)rec->start);
9389                         if (repair) {
9390                                 ret = fixup_extent_flags(root->fs_info, rec);
9391                                 if (ret)
9392                                         goto repair_abort;
9393                                 fix = 1;
9394                         }
9395                         cur_err = 1;
9396                 }
9397                 /*
9398                  * Although it's not a extent ref's problem, we reuse this
9399                  * routine for error reporting.
9400                  * No repair function yet.
9401                  */
9402                 if (rec->crossing_stripes) {
9403                         fprintf(stderr,
9404                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9405                                 rec->start, rec->start + rec->max_size);
9406                         cur_err = 1;
9407                 }
9408
9409                 if (rec->wrong_chunk_type) {
9410                         fprintf(stderr,
9411                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9412                                 rec->start, rec->start + rec->max_size);
9413                         cur_err = 1;
9414                 }
9415
9416                 remove_cache_extent(extent_cache, cache);
9417                 free_all_extent_backrefs(rec);
9418                 if (!init_extent_tree && repair && (!cur_err || fix))
9419                         clear_extent_dirty(root->fs_info->excluded_extents,
9420                                            rec->start,
9421                                            rec->start + rec->max_size - 1);
9422                 free(rec);
9423         }
9424 repair_abort:
9425         if (repair) {
9426                 if (ret && ret != -EAGAIN) {
9427                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9428                         exit(1);
9429                 } else if (!ret) {
9430                         struct btrfs_trans_handle *trans;
9431
9432                         root = root->fs_info->extent_root;
9433                         trans = btrfs_start_transaction(root, 1);
9434                         if (IS_ERR(trans)) {
9435                                 ret = PTR_ERR(trans);
9436                                 goto repair_abort;
9437                         }
9438
9439                         btrfs_fix_block_accounting(trans, root);
9440                         ret = btrfs_commit_transaction(trans, root);
9441                         if (ret)
9442                                 goto repair_abort;
9443                 }
9444                 return ret;
9445         }
9446         return 0;
9447 }
9448
9449 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9450 {
9451         u64 stripe_size;
9452
9453         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9454                 stripe_size = length;
9455                 stripe_size /= num_stripes;
9456         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9457                 stripe_size = length * 2;
9458                 stripe_size /= num_stripes;
9459         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9460                 stripe_size = length;
9461                 stripe_size /= (num_stripes - 1);
9462         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9463                 stripe_size = length;
9464                 stripe_size /= (num_stripes - 2);
9465         } else {
9466                 stripe_size = length;
9467         }
9468         return stripe_size;
9469 }
9470
9471 /*
9472  * Check the chunk with its block group/dev list ref:
9473  * Return 0 if all refs seems valid.
9474  * Return 1 if part of refs seems valid, need later check for rebuild ref
9475  * like missing block group and needs to search extent tree to rebuild them.
9476  * Return -1 if essential refs are missing and unable to rebuild.
9477  */
9478 static int check_chunk_refs(struct chunk_record *chunk_rec,
9479                             struct block_group_tree *block_group_cache,
9480                             struct device_extent_tree *dev_extent_cache,
9481                             int silent)
9482 {
9483         struct cache_extent *block_group_item;
9484         struct block_group_record *block_group_rec;
9485         struct cache_extent *dev_extent_item;
9486         struct device_extent_record *dev_extent_rec;
9487         u64 devid;
9488         u64 offset;
9489         u64 length;
9490         int metadump_v2 = 0;
9491         int i;
9492         int ret = 0;
9493
9494         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9495                                                chunk_rec->offset,
9496                                                chunk_rec->length);
9497         if (block_group_item) {
9498                 block_group_rec = container_of(block_group_item,
9499                                                struct block_group_record,
9500                                                cache);
9501                 if (chunk_rec->length != block_group_rec->offset ||
9502                     chunk_rec->offset != block_group_rec->objectid ||
9503                     (!metadump_v2 &&
9504                      chunk_rec->type_flags != block_group_rec->flags)) {
9505                         if (!silent)
9506                                 fprintf(stderr,
9507                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9508                                         chunk_rec->objectid,
9509                                         chunk_rec->type,
9510                                         chunk_rec->offset,
9511                                         chunk_rec->length,
9512                                         chunk_rec->offset,
9513                                         chunk_rec->type_flags,
9514                                         block_group_rec->objectid,
9515                                         block_group_rec->type,
9516                                         block_group_rec->offset,
9517                                         block_group_rec->offset,
9518                                         block_group_rec->objectid,
9519                                         block_group_rec->flags);
9520                         ret = -1;
9521                 } else {
9522                         list_del_init(&block_group_rec->list);
9523                         chunk_rec->bg_rec = block_group_rec;
9524                 }
9525         } else {
9526                 if (!silent)
9527                         fprintf(stderr,
9528                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9529                                 chunk_rec->objectid,
9530                                 chunk_rec->type,
9531                                 chunk_rec->offset,
9532                                 chunk_rec->length,
9533                                 chunk_rec->offset,
9534                                 chunk_rec->type_flags);
9535                 ret = 1;
9536         }
9537
9538         if (metadump_v2)
9539                 return ret;
9540
9541         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9542                                     chunk_rec->num_stripes);
9543         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9544                 devid = chunk_rec->stripes[i].devid;
9545                 offset = chunk_rec->stripes[i].offset;
9546                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9547                                                        devid, offset, length);
9548                 if (dev_extent_item) {
9549                         dev_extent_rec = container_of(dev_extent_item,
9550                                                 struct device_extent_record,
9551                                                 cache);
9552                         if (dev_extent_rec->objectid != devid ||
9553                             dev_extent_rec->offset != offset ||
9554                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9555                             dev_extent_rec->length != length) {
9556                                 if (!silent)
9557                                         fprintf(stderr,
9558                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9559                                                 chunk_rec->objectid,
9560                                                 chunk_rec->type,
9561                                                 chunk_rec->offset,
9562                                                 chunk_rec->stripes[i].devid,
9563                                                 chunk_rec->stripes[i].offset,
9564                                                 dev_extent_rec->objectid,
9565                                                 dev_extent_rec->offset,
9566                                                 dev_extent_rec->length);
9567                                 ret = -1;
9568                         } else {
9569                                 list_move(&dev_extent_rec->chunk_list,
9570                                           &chunk_rec->dextents);
9571                         }
9572                 } else {
9573                         if (!silent)
9574                                 fprintf(stderr,
9575                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9576                                         chunk_rec->objectid,
9577                                         chunk_rec->type,
9578                                         chunk_rec->offset,
9579                                         chunk_rec->stripes[i].devid,
9580                                         chunk_rec->stripes[i].offset);
9581                         ret = -1;
9582                 }
9583         }
9584         return ret;
9585 }
9586
9587 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9588 int check_chunks(struct cache_tree *chunk_cache,
9589                  struct block_group_tree *block_group_cache,
9590                  struct device_extent_tree *dev_extent_cache,
9591                  struct list_head *good, struct list_head *bad,
9592                  struct list_head *rebuild, int silent)
9593 {
9594         struct cache_extent *chunk_item;
9595         struct chunk_record *chunk_rec;
9596         struct block_group_record *bg_rec;
9597         struct device_extent_record *dext_rec;
9598         int err;
9599         int ret = 0;
9600
9601         chunk_item = first_cache_extent(chunk_cache);
9602         while (chunk_item) {
9603                 chunk_rec = container_of(chunk_item, struct chunk_record,
9604                                          cache);
9605                 err = check_chunk_refs(chunk_rec, block_group_cache,
9606                                        dev_extent_cache, silent);
9607                 if (err < 0)
9608                         ret = err;
9609                 if (err == 0 && good)
9610                         list_add_tail(&chunk_rec->list, good);
9611                 if (err > 0 && rebuild)
9612                         list_add_tail(&chunk_rec->list, rebuild);
9613                 if (err < 0 && bad)
9614                         list_add_tail(&chunk_rec->list, bad);
9615                 chunk_item = next_cache_extent(chunk_item);
9616         }
9617
9618         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9619                 if (!silent)
9620                         fprintf(stderr,
9621                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9622                                 bg_rec->objectid,
9623                                 bg_rec->offset,
9624                                 bg_rec->flags);
9625                 if (!ret)
9626                         ret = 1;
9627         }
9628
9629         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9630                             chunk_list) {
9631                 if (!silent)
9632                         fprintf(stderr,
9633                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9634                                 dext_rec->objectid,
9635                                 dext_rec->offset,
9636                                 dext_rec->length);
9637                 if (!ret)
9638                         ret = 1;
9639         }
9640         return ret;
9641 }
9642
9643
9644 static int check_device_used(struct device_record *dev_rec,
9645                              struct device_extent_tree *dext_cache)
9646 {
9647         struct cache_extent *cache;
9648         struct device_extent_record *dev_extent_rec;
9649         u64 total_byte = 0;
9650
9651         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9652         while (cache) {
9653                 dev_extent_rec = container_of(cache,
9654                                               struct device_extent_record,
9655                                               cache);
9656                 if (dev_extent_rec->objectid != dev_rec->devid)
9657                         break;
9658
9659                 list_del_init(&dev_extent_rec->device_list);
9660                 total_byte += dev_extent_rec->length;
9661                 cache = next_cache_extent(cache);
9662         }
9663
9664         if (total_byte != dev_rec->byte_used) {
9665                 fprintf(stderr,
9666                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9667                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9668                         dev_rec->type, dev_rec->offset);
9669                 return -1;
9670         } else {
9671                 return 0;
9672         }
9673 }
9674
9675 /* check btrfs_dev_item -> btrfs_dev_extent */
9676 static int check_devices(struct rb_root *dev_cache,
9677                          struct device_extent_tree *dev_extent_cache)
9678 {
9679         struct rb_node *dev_node;
9680         struct device_record *dev_rec;
9681         struct device_extent_record *dext_rec;
9682         int err;
9683         int ret = 0;
9684
9685         dev_node = rb_first(dev_cache);
9686         while (dev_node) {
9687                 dev_rec = container_of(dev_node, struct device_record, node);
9688                 err = check_device_used(dev_rec, dev_extent_cache);
9689                 if (err)
9690                         ret = err;
9691
9692                 dev_node = rb_next(dev_node);
9693         }
9694         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9695                             device_list) {
9696                 fprintf(stderr,
9697                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9698                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9699                 if (!ret)
9700                         ret = 1;
9701         }
9702         return ret;
9703 }
9704
9705 static int add_root_item_to_list(struct list_head *head,
9706                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9707                                   u8 level, u8 drop_level,
9708                                   int level_size, struct btrfs_key *drop_key)
9709 {
9710
9711         struct root_item_record *ri_rec;
9712         ri_rec = malloc(sizeof(*ri_rec));
9713         if (!ri_rec)
9714                 return -ENOMEM;
9715         ri_rec->bytenr = bytenr;
9716         ri_rec->objectid = objectid;
9717         ri_rec->level = level;
9718         ri_rec->level_size = level_size;
9719         ri_rec->drop_level = drop_level;
9720         ri_rec->last_snapshot = last_snapshot;
9721         if (drop_key)
9722                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9723         list_add_tail(&ri_rec->list, head);
9724
9725         return 0;
9726 }
9727
9728 static void free_root_item_list(struct list_head *list)
9729 {
9730         struct root_item_record *ri_rec;
9731
9732         while (!list_empty(list)) {
9733                 ri_rec = list_first_entry(list, struct root_item_record,
9734                                           list);
9735                 list_del_init(&ri_rec->list);
9736                 free(ri_rec);
9737         }
9738 }
9739
9740 static int deal_root_from_list(struct list_head *list,
9741                                struct btrfs_root *root,
9742                                struct block_info *bits,
9743                                int bits_nr,
9744                                struct cache_tree *pending,
9745                                struct cache_tree *seen,
9746                                struct cache_tree *reada,
9747                                struct cache_tree *nodes,
9748                                struct cache_tree *extent_cache,
9749                                struct cache_tree *chunk_cache,
9750                                struct rb_root *dev_cache,
9751                                struct block_group_tree *block_group_cache,
9752                                struct device_extent_tree *dev_extent_cache)
9753 {
9754         int ret = 0;
9755         u64 last;
9756
9757         while (!list_empty(list)) {
9758                 struct root_item_record *rec;
9759                 struct extent_buffer *buf;
9760                 rec = list_entry(list->next,
9761                                  struct root_item_record, list);
9762                 last = 0;
9763                 buf = read_tree_block(root->fs_info->tree_root,
9764                                       rec->bytenr, rec->level_size, 0);
9765                 if (!extent_buffer_uptodate(buf)) {
9766                         free_extent_buffer(buf);
9767                         ret = -EIO;
9768                         break;
9769                 }
9770                 ret = add_root_to_pending(buf, extent_cache, pending,
9771                                     seen, nodes, rec->objectid);
9772                 if (ret < 0)
9773                         break;
9774                 /*
9775                  * To rebuild extent tree, we need deal with snapshot
9776                  * one by one, otherwise we deal with node firstly which
9777                  * can maximize readahead.
9778                  */
9779                 while (1) {
9780                         ret = run_next_block(root, bits, bits_nr, &last,
9781                                              pending, seen, reada, nodes,
9782                                              extent_cache, chunk_cache,
9783                                              dev_cache, block_group_cache,
9784                                              dev_extent_cache, rec);
9785                         if (ret != 0)
9786                                 break;
9787                 }
9788                 free_extent_buffer(buf);
9789                 list_del(&rec->list);
9790                 free(rec);
9791                 if (ret < 0)
9792                         break;
9793         }
9794         while (ret >= 0) {
9795                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9796                                      reada, nodes, extent_cache, chunk_cache,
9797                                      dev_cache, block_group_cache,
9798                                      dev_extent_cache, NULL);
9799                 if (ret != 0) {
9800                         if (ret > 0)
9801                                 ret = 0;
9802                         break;
9803                 }
9804         }
9805         return ret;
9806 }
9807
9808 static int check_chunks_and_extents(struct btrfs_root *root)
9809 {
9810         struct rb_root dev_cache;
9811         struct cache_tree chunk_cache;
9812         struct block_group_tree block_group_cache;
9813         struct device_extent_tree dev_extent_cache;
9814         struct cache_tree extent_cache;
9815         struct cache_tree seen;
9816         struct cache_tree pending;
9817         struct cache_tree reada;
9818         struct cache_tree nodes;
9819         struct extent_io_tree excluded_extents;
9820         struct cache_tree corrupt_blocks;
9821         struct btrfs_path path;
9822         struct btrfs_key key;
9823         struct btrfs_key found_key;
9824         int ret, err = 0;
9825         struct block_info *bits;
9826         int bits_nr;
9827         struct extent_buffer *leaf;
9828         int slot;
9829         struct btrfs_root_item ri;
9830         struct list_head dropping_trees;
9831         struct list_head normal_trees;
9832         struct btrfs_root *root1;
9833         u64 objectid;
9834         u32 level_size;
9835         u8 level;
9836
9837         dev_cache = RB_ROOT;
9838         cache_tree_init(&chunk_cache);
9839         block_group_tree_init(&block_group_cache);
9840         device_extent_tree_init(&dev_extent_cache);
9841
9842         cache_tree_init(&extent_cache);
9843         cache_tree_init(&seen);
9844         cache_tree_init(&pending);
9845         cache_tree_init(&nodes);
9846         cache_tree_init(&reada);
9847         cache_tree_init(&corrupt_blocks);
9848         extent_io_tree_init(&excluded_extents);
9849         INIT_LIST_HEAD(&dropping_trees);
9850         INIT_LIST_HEAD(&normal_trees);
9851
9852         if (repair) {
9853                 root->fs_info->excluded_extents = &excluded_extents;
9854                 root->fs_info->fsck_extent_cache = &extent_cache;
9855                 root->fs_info->free_extent_hook = free_extent_hook;
9856                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9857         }
9858
9859         bits_nr = 1024;
9860         bits = malloc(bits_nr * sizeof(struct block_info));
9861         if (!bits) {
9862                 perror("malloc");
9863                 exit(1);
9864         }
9865
9866         if (ctx.progress_enabled) {
9867                 ctx.tp = TASK_EXTENTS;
9868                 task_start(ctx.info);
9869         }
9870
9871 again:
9872         root1 = root->fs_info->tree_root;
9873         level = btrfs_header_level(root1->node);
9874         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9875                                     root1->node->start, 0, level, 0,
9876                                     root1->nodesize, NULL);
9877         if (ret < 0)
9878                 goto out;
9879         root1 = root->fs_info->chunk_root;
9880         level = btrfs_header_level(root1->node);
9881         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9882                                     root1->node->start, 0, level, 0,
9883                                     root1->nodesize, NULL);
9884         if (ret < 0)
9885                 goto out;
9886         btrfs_init_path(&path);
9887         key.offset = 0;
9888         key.objectid = 0;
9889         key.type = BTRFS_ROOT_ITEM_KEY;
9890         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9891                                         &key, &path, 0, 0);
9892         if (ret < 0)
9893                 goto out;
9894         while(1) {
9895                 leaf = path.nodes[0];
9896                 slot = path.slots[0];
9897                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9898                         ret = btrfs_next_leaf(root, &path);
9899                         if (ret != 0)
9900                                 break;
9901                         leaf = path.nodes[0];
9902                         slot = path.slots[0];
9903                 }
9904                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9905                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9906                         unsigned long offset;
9907                         u64 last_snapshot;
9908
9909                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9910                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9911                         last_snapshot = btrfs_root_last_snapshot(&ri);
9912                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9913                                 level = btrfs_root_level(&ri);
9914                                 level_size = root->nodesize;
9915                                 ret = add_root_item_to_list(&normal_trees,
9916                                                 found_key.objectid,
9917                                                 btrfs_root_bytenr(&ri),
9918                                                 last_snapshot, level,
9919                                                 0, level_size, NULL);
9920                                 if (ret < 0)
9921                                         goto out;
9922                         } else {
9923                                 level = btrfs_root_level(&ri);
9924                                 level_size = root->nodesize;
9925                                 objectid = found_key.objectid;
9926                                 btrfs_disk_key_to_cpu(&found_key,
9927                                                       &ri.drop_progress);
9928                                 ret = add_root_item_to_list(&dropping_trees,
9929                                                 objectid,
9930                                                 btrfs_root_bytenr(&ri),
9931                                                 last_snapshot, level,
9932                                                 ri.drop_level,
9933                                                 level_size, &found_key);
9934                                 if (ret < 0)
9935                                         goto out;
9936                         }
9937                 }
9938                 path.slots[0]++;
9939         }
9940         btrfs_release_path(&path);
9941
9942         /*
9943          * check_block can return -EAGAIN if it fixes something, please keep
9944          * this in mind when dealing with return values from these functions, if
9945          * we get -EAGAIN we want to fall through and restart the loop.
9946          */
9947         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9948                                   &seen, &reada, &nodes, &extent_cache,
9949                                   &chunk_cache, &dev_cache, &block_group_cache,
9950                                   &dev_extent_cache);
9951         if (ret < 0) {
9952                 if (ret == -EAGAIN)
9953                         goto loop;
9954                 goto out;
9955         }
9956         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9957                                   &pending, &seen, &reada, &nodes,
9958                                   &extent_cache, &chunk_cache, &dev_cache,
9959                                   &block_group_cache, &dev_extent_cache);
9960         if (ret < 0) {
9961                 if (ret == -EAGAIN)
9962                         goto loop;
9963                 goto out;
9964         }
9965
9966         ret = check_chunks(&chunk_cache, &block_group_cache,
9967                            &dev_extent_cache, NULL, NULL, NULL, 0);
9968         if (ret) {
9969                 if (ret == -EAGAIN)
9970                         goto loop;
9971                 err = ret;
9972         }
9973
9974         ret = check_extent_refs(root, &extent_cache);
9975         if (ret < 0) {
9976                 if (ret == -EAGAIN)
9977                         goto loop;
9978                 goto out;
9979         }
9980
9981         ret = check_devices(&dev_cache, &dev_extent_cache);
9982         if (ret && err)
9983                 ret = err;
9984
9985 out:
9986         task_stop(ctx.info);
9987         if (repair) {
9988                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9989                 extent_io_tree_cleanup(&excluded_extents);
9990                 root->fs_info->fsck_extent_cache = NULL;
9991                 root->fs_info->free_extent_hook = NULL;
9992                 root->fs_info->corrupt_blocks = NULL;
9993                 root->fs_info->excluded_extents = NULL;
9994         }
9995         free(bits);
9996         free_chunk_cache_tree(&chunk_cache);
9997         free_device_cache_tree(&dev_cache);
9998         free_block_group_tree(&block_group_cache);
9999         free_device_extent_tree(&dev_extent_cache);
10000         free_extent_cache_tree(&seen);
10001         free_extent_cache_tree(&pending);
10002         free_extent_cache_tree(&reada);
10003         free_extent_cache_tree(&nodes);
10004         free_root_item_list(&normal_trees);
10005         free_root_item_list(&dropping_trees);
10006         return ret;
10007 loop:
10008         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10009         free_extent_cache_tree(&seen);
10010         free_extent_cache_tree(&pending);
10011         free_extent_cache_tree(&reada);
10012         free_extent_cache_tree(&nodes);
10013         free_chunk_cache_tree(&chunk_cache);
10014         free_block_group_tree(&block_group_cache);
10015         free_device_cache_tree(&dev_cache);
10016         free_device_extent_tree(&dev_extent_cache);
10017         free_extent_record_cache(&extent_cache);
10018         free_root_item_list(&normal_trees);
10019         free_root_item_list(&dropping_trees);
10020         extent_io_tree_cleanup(&excluded_extents);
10021         goto again;
10022 }
10023
10024 /*
10025  * Check backrefs of a tree block given by @bytenr or @eb.
10026  *
10027  * @root:       the root containing the @bytenr or @eb
10028  * @eb:         tree block extent buffer, can be NULL
10029  * @bytenr:     bytenr of the tree block to search
10030  * @level:      tree level of the tree block
10031  * @owner:      owner of the tree block
10032  *
10033  * Return >0 for any error found and output error message
10034  * Return 0 for no error found
10035  */
10036 static int check_tree_block_ref(struct btrfs_root *root,
10037                                 struct extent_buffer *eb, u64 bytenr,
10038                                 int level, u64 owner)
10039 {
10040         struct btrfs_key key;
10041         struct btrfs_root *extent_root = root->fs_info->extent_root;
10042         struct btrfs_path path;
10043         struct btrfs_extent_item *ei;
10044         struct btrfs_extent_inline_ref *iref;
10045         struct extent_buffer *leaf;
10046         unsigned long end;
10047         unsigned long ptr;
10048         int slot;
10049         int skinny_level;
10050         int type;
10051         u32 nodesize = root->nodesize;
10052         u32 item_size;
10053         u64 offset;
10054         int tree_reloc_root = 0;
10055         int found_ref = 0;
10056         int err = 0;
10057         int ret;
10058
10059         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10060             btrfs_header_bytenr(root->node) == bytenr)
10061                 tree_reloc_root = 1;
10062
10063         btrfs_init_path(&path);
10064         key.objectid = bytenr;
10065         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10066                 key.type = BTRFS_METADATA_ITEM_KEY;
10067         else
10068                 key.type = BTRFS_EXTENT_ITEM_KEY;
10069         key.offset = (u64)-1;
10070
10071         /* Search for the backref in extent tree */
10072         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10073         if (ret < 0) {
10074                 err |= BACKREF_MISSING;
10075                 goto out;
10076         }
10077         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10078         if (ret) {
10079                 err |= BACKREF_MISSING;
10080                 goto out;
10081         }
10082
10083         leaf = path.nodes[0];
10084         slot = path.slots[0];
10085         btrfs_item_key_to_cpu(leaf, &key, slot);
10086
10087         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10088
10089         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10090                 skinny_level = (int)key.offset;
10091                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10092         } else {
10093                 struct btrfs_tree_block_info *info;
10094
10095                 info = (struct btrfs_tree_block_info *)(ei + 1);
10096                 skinny_level = btrfs_tree_block_level(leaf, info);
10097                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10098         }
10099
10100         if (eb) {
10101                 u64 header_gen;
10102                 u64 extent_gen;
10103
10104                 if (!(btrfs_extent_flags(leaf, ei) &
10105                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10106                         error(
10107                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10108                                 key.objectid, nodesize,
10109                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10110                         err = BACKREF_MISMATCH;
10111                 }
10112                 header_gen = btrfs_header_generation(eb);
10113                 extent_gen = btrfs_extent_generation(leaf, ei);
10114                 if (header_gen != extent_gen) {
10115                         error(
10116         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10117                                 key.objectid, nodesize, header_gen,
10118                                 extent_gen);
10119                         err = BACKREF_MISMATCH;
10120                 }
10121                 if (level != skinny_level) {
10122                         error(
10123                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10124                                 key.objectid, nodesize, level, skinny_level);
10125                         err = BACKREF_MISMATCH;
10126                 }
10127                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10128                         error(
10129                         "extent[%llu %u] is referred by other roots than %llu",
10130                                 key.objectid, nodesize, root->objectid);
10131                         err = BACKREF_MISMATCH;
10132                 }
10133         }
10134
10135         /*
10136          * Iterate the extent/metadata item to find the exact backref
10137          */
10138         item_size = btrfs_item_size_nr(leaf, slot);
10139         ptr = (unsigned long)iref;
10140         end = (unsigned long)ei + item_size;
10141         while (ptr < end) {
10142                 iref = (struct btrfs_extent_inline_ref *)ptr;
10143                 type = btrfs_extent_inline_ref_type(leaf, iref);
10144                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10145
10146                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10147                         (offset == root->objectid || offset == owner)) {
10148                         found_ref = 1;
10149                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10150                         /*
10151                          * Backref of tree reloc root points to itself, no need
10152                          * to check backref any more.
10153                          */
10154                         if (tree_reloc_root)
10155                                 found_ref = 1;
10156                         else
10157                         /* Check if the backref points to valid referencer */
10158                                 found_ref = !check_tree_block_ref(root, NULL,
10159                                                 offset, level + 1, owner);
10160                 }
10161
10162                 if (found_ref)
10163                         break;
10164                 ptr += btrfs_extent_inline_ref_size(type);
10165         }
10166
10167         /*
10168          * Inlined extent item doesn't have what we need, check
10169          * TREE_BLOCK_REF_KEY
10170          */
10171         if (!found_ref) {
10172                 btrfs_release_path(&path);
10173                 key.objectid = bytenr;
10174                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10175                 key.offset = root->objectid;
10176
10177                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10178                 if (!ret)
10179                         found_ref = 1;
10180         }
10181         if (!found_ref)
10182                 err |= BACKREF_MISSING;
10183 out:
10184         btrfs_release_path(&path);
10185         if (eb && (err & BACKREF_MISSING))
10186                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10187                         bytenr, nodesize, owner, level);
10188         return err;
10189 }
10190
10191 /*
10192  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10193  *
10194  * Return >0 any error found and output error message
10195  * Return 0 for no error found
10196  */
10197 static int check_extent_data_item(struct btrfs_root *root,
10198                                   struct extent_buffer *eb, int slot)
10199 {
10200         struct btrfs_file_extent_item *fi;
10201         struct btrfs_path path;
10202         struct btrfs_root *extent_root = root->fs_info->extent_root;
10203         struct btrfs_key fi_key;
10204         struct btrfs_key dbref_key;
10205         struct extent_buffer *leaf;
10206         struct btrfs_extent_item *ei;
10207         struct btrfs_extent_inline_ref *iref;
10208         struct btrfs_extent_data_ref *dref;
10209         u64 owner;
10210         u64 disk_bytenr;
10211         u64 disk_num_bytes;
10212         u64 extent_num_bytes;
10213         u64 extent_flags;
10214         u32 item_size;
10215         unsigned long end;
10216         unsigned long ptr;
10217         int type;
10218         u64 ref_root;
10219         int found_dbackref = 0;
10220         int err = 0;
10221         int ret;
10222
10223         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10224         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10225
10226         /* Nothing to check for hole and inline data extents */
10227         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10228             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10229                 return 0;
10230
10231         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10232         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10233         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10234
10235         /* Check unaligned disk_num_bytes and num_bytes */
10236         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10237                 error(
10238 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10239                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10240                         root->sectorsize);
10241                 err |= BYTES_UNALIGNED;
10242         } else {
10243                 data_bytes_allocated += disk_num_bytes;
10244         }
10245         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10246                 error(
10247 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10248                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10249                         root->sectorsize);
10250                 err |= BYTES_UNALIGNED;
10251         } else {
10252                 data_bytes_referenced += extent_num_bytes;
10253         }
10254         owner = btrfs_header_owner(eb);
10255
10256         /* Check the extent item of the file extent in extent tree */
10257         btrfs_init_path(&path);
10258         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10259         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10260         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10261
10262         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10263         if (ret)
10264                 goto out;
10265
10266         leaf = path.nodes[0];
10267         slot = path.slots[0];
10268         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10269
10270         extent_flags = btrfs_extent_flags(leaf, ei);
10271
10272         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10273                 error(
10274                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10275                     disk_bytenr, disk_num_bytes,
10276                     BTRFS_EXTENT_FLAG_DATA);
10277                 err |= BACKREF_MISMATCH;
10278         }
10279
10280         /* Check data backref inside that extent item */
10281         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10282         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10283         ptr = (unsigned long)iref;
10284         end = (unsigned long)ei + item_size;
10285         while (ptr < end) {
10286                 iref = (struct btrfs_extent_inline_ref *)ptr;
10287                 type = btrfs_extent_inline_ref_type(leaf, iref);
10288                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10289
10290                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10291                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10292                         if (ref_root == owner || ref_root == root->objectid)
10293                                 found_dbackref = 1;
10294                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10295                         found_dbackref = !check_tree_block_ref(root, NULL,
10296                                 btrfs_extent_inline_ref_offset(leaf, iref),
10297                                 0, owner);
10298                 }
10299
10300                 if (found_dbackref)
10301                         break;
10302                 ptr += btrfs_extent_inline_ref_size(type);
10303         }
10304
10305         if (!found_dbackref) {
10306                 btrfs_release_path(&path);
10307
10308                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10309                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10310                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10311                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10312                                 fi_key.objectid, fi_key.offset);
10313
10314                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10315                                         &dbref_key, &path, 0, 0);
10316                 if (!ret) {
10317                         found_dbackref = 1;
10318                         goto out;
10319                 }
10320
10321                 btrfs_release_path(&path);
10322
10323                 /*
10324                  * Neither inlined nor EXTENT_DATA_REF found, try
10325                  * SHARED_DATA_REF as last chance.
10326                  */
10327                 dbref_key.objectid = disk_bytenr;
10328                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10329                 dbref_key.offset = eb->start;
10330
10331                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10332                                         &dbref_key, &path, 0, 0);
10333                 if (!ret) {
10334                         found_dbackref = 1;
10335                         goto out;
10336                 }
10337         }
10338
10339 out:
10340         if (!found_dbackref)
10341                 err |= BACKREF_MISSING;
10342         btrfs_release_path(&path);
10343         if (err & BACKREF_MISSING) {
10344                 error("data extent[%llu %llu] backref lost",
10345                       disk_bytenr, disk_num_bytes);
10346         }
10347         return err;
10348 }
10349
10350 /*
10351  * Get real tree block level for the case like shared block
10352  * Return >= 0 as tree level
10353  * Return <0 for error
10354  */
10355 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10356 {
10357         struct extent_buffer *eb;
10358         struct btrfs_path path;
10359         struct btrfs_key key;
10360         struct btrfs_extent_item *ei;
10361         u64 flags;
10362         u64 transid;
10363         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10364         u8 backref_level;
10365         u8 header_level;
10366         int ret;
10367
10368         /* Search extent tree for extent generation and level */
10369         key.objectid = bytenr;
10370         key.type = BTRFS_METADATA_ITEM_KEY;
10371         key.offset = (u64)-1;
10372
10373         btrfs_init_path(&path);
10374         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10375         if (ret < 0)
10376                 goto release_out;
10377         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10378         if (ret < 0)
10379                 goto release_out;
10380         if (ret > 0) {
10381                 ret = -ENOENT;
10382                 goto release_out;
10383         }
10384
10385         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10386         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10387                             struct btrfs_extent_item);
10388         flags = btrfs_extent_flags(path.nodes[0], ei);
10389         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10390                 ret = -ENOENT;
10391                 goto release_out;
10392         }
10393
10394         /* Get transid for later read_tree_block() check */
10395         transid = btrfs_extent_generation(path.nodes[0], ei);
10396
10397         /* Get backref level as one source */
10398         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10399                 backref_level = key.offset;
10400         } else {
10401                 struct btrfs_tree_block_info *info;
10402
10403                 info = (struct btrfs_tree_block_info *)(ei + 1);
10404                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10405         }
10406         btrfs_release_path(&path);
10407
10408         /* Get level from tree block as an alternative source */
10409         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10410         if (!extent_buffer_uptodate(eb)) {
10411                 free_extent_buffer(eb);
10412                 return -EIO;
10413         }
10414         header_level = btrfs_header_level(eb);
10415         free_extent_buffer(eb);
10416
10417         if (header_level != backref_level)
10418                 return -EIO;
10419         return header_level;
10420
10421 release_out:
10422         btrfs_release_path(&path);
10423         return ret;
10424 }
10425
10426 /*
10427  * Check if a tree block backref is valid (points to a valid tree block)
10428  * if level == -1, level will be resolved
10429  * Return >0 for any error found and print error message
10430  */
10431 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10432                                     u64 bytenr, int level)
10433 {
10434         struct btrfs_root *root;
10435         struct btrfs_key key;
10436         struct btrfs_path path;
10437         struct extent_buffer *eb;
10438         struct extent_buffer *node;
10439         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10440         int err = 0;
10441         int ret;
10442
10443         /* Query level for level == -1 special case */
10444         if (level == -1)
10445                 level = query_tree_block_level(fs_info, bytenr);
10446         if (level < 0) {
10447                 err |= REFERENCER_MISSING;
10448                 goto out;
10449         }
10450
10451         key.objectid = root_id;
10452         key.type = BTRFS_ROOT_ITEM_KEY;
10453         key.offset = (u64)-1;
10454
10455         root = btrfs_read_fs_root(fs_info, &key);
10456         if (IS_ERR(root)) {
10457                 err |= REFERENCER_MISSING;
10458                 goto out;
10459         }
10460
10461         /* Read out the tree block to get item/node key */
10462         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10463         if (!extent_buffer_uptodate(eb)) {
10464                 err |= REFERENCER_MISSING;
10465                 free_extent_buffer(eb);
10466                 goto out;
10467         }
10468
10469         /* Empty tree, no need to check key */
10470         if (!btrfs_header_nritems(eb) && !level) {
10471                 free_extent_buffer(eb);
10472                 goto out;
10473         }
10474
10475         if (level)
10476                 btrfs_node_key_to_cpu(eb, &key, 0);
10477         else
10478                 btrfs_item_key_to_cpu(eb, &key, 0);
10479
10480         free_extent_buffer(eb);
10481
10482         btrfs_init_path(&path);
10483         path.lowest_level = level;
10484         /* Search with the first key, to ensure we can reach it */
10485         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10486         if (ret < 0) {
10487                 err |= REFERENCER_MISSING;
10488                 goto release_out;
10489         }
10490
10491         node = path.nodes[level];
10492         if (btrfs_header_bytenr(node) != bytenr) {
10493                 error(
10494         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10495                         bytenr, nodesize, bytenr,
10496                         btrfs_header_bytenr(node));
10497                 err |= REFERENCER_MISMATCH;
10498         }
10499         if (btrfs_header_level(node) != level) {
10500                 error(
10501         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10502                         bytenr, nodesize, level,
10503                         btrfs_header_level(node));
10504                 err |= REFERENCER_MISMATCH;
10505         }
10506
10507 release_out:
10508         btrfs_release_path(&path);
10509 out:
10510         if (err & REFERENCER_MISSING) {
10511                 if (level < 0)
10512                         error("extent [%llu %d] lost referencer (owner: %llu)",
10513                                 bytenr, nodesize, root_id);
10514                 else
10515                         error(
10516                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10517                                 bytenr, nodesize, root_id, level);
10518         }
10519
10520         return err;
10521 }
10522
10523 /*
10524  * Check if tree block @eb is tree reloc root.
10525  * Return 0 if it's not or any problem happens
10526  * Return 1 if it's a tree reloc root
10527  */
10528 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10529                                  struct extent_buffer *eb)
10530 {
10531         struct btrfs_root *tree_reloc_root;
10532         struct btrfs_key key;
10533         u64 bytenr = btrfs_header_bytenr(eb);
10534         u64 owner = btrfs_header_owner(eb);
10535         int ret = 0;
10536
10537         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10538         key.offset = owner;
10539         key.type = BTRFS_ROOT_ITEM_KEY;
10540
10541         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10542         if (IS_ERR(tree_reloc_root))
10543                 return 0;
10544
10545         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10546                 ret = 1;
10547         btrfs_free_fs_root(tree_reloc_root);
10548         return ret;
10549 }
10550
10551 /*
10552  * Check referencer for shared block backref
10553  * If level == -1, this function will resolve the level.
10554  */
10555 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10556                                      u64 parent, u64 bytenr, int level)
10557 {
10558         struct extent_buffer *eb;
10559         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10560         u32 nr;
10561         int found_parent = 0;
10562         int i;
10563
10564         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10565         if (!extent_buffer_uptodate(eb))
10566                 goto out;
10567
10568         if (level == -1)
10569                 level = query_tree_block_level(fs_info, bytenr);
10570         if (level < 0)
10571                 goto out;
10572
10573         /* It's possible it's a tree reloc root */
10574         if (parent == bytenr) {
10575                 if (is_tree_reloc_root(fs_info, eb))
10576                         found_parent = 1;
10577                 goto out;
10578         }
10579
10580         if (level + 1 != btrfs_header_level(eb))
10581                 goto out;
10582
10583         nr = btrfs_header_nritems(eb);
10584         for (i = 0; i < nr; i++) {
10585                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10586                         found_parent = 1;
10587                         break;
10588                 }
10589         }
10590 out:
10591         free_extent_buffer(eb);
10592         if (!found_parent) {
10593                 error(
10594         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10595                         bytenr, nodesize, parent, level);
10596                 return REFERENCER_MISSING;
10597         }
10598         return 0;
10599 }
10600
10601 /*
10602  * Check referencer for normal (inlined) data ref
10603  * If len == 0, it will be resolved by searching in extent tree
10604  */
10605 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10606                                      u64 root_id, u64 objectid, u64 offset,
10607                                      u64 bytenr, u64 len, u32 count)
10608 {
10609         struct btrfs_root *root;
10610         struct btrfs_root *extent_root = fs_info->extent_root;
10611         struct btrfs_key key;
10612         struct btrfs_path path;
10613         struct extent_buffer *leaf;
10614         struct btrfs_file_extent_item *fi;
10615         u32 found_count = 0;
10616         int slot;
10617         int ret = 0;
10618
10619         if (!len) {
10620                 key.objectid = bytenr;
10621                 key.type = BTRFS_EXTENT_ITEM_KEY;
10622                 key.offset = (u64)-1;
10623
10624                 btrfs_init_path(&path);
10625                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10626                 if (ret < 0)
10627                         goto out;
10628                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10629                 if (ret)
10630                         goto out;
10631                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10632                 if (key.objectid != bytenr ||
10633                     key.type != BTRFS_EXTENT_ITEM_KEY)
10634                         goto out;
10635                 len = key.offset;
10636                 btrfs_release_path(&path);
10637         }
10638         key.objectid = root_id;
10639         key.type = BTRFS_ROOT_ITEM_KEY;
10640         key.offset = (u64)-1;
10641         btrfs_init_path(&path);
10642
10643         root = btrfs_read_fs_root(fs_info, &key);
10644         if (IS_ERR(root))
10645                 goto out;
10646
10647         key.objectid = objectid;
10648         key.type = BTRFS_EXTENT_DATA_KEY;
10649         /*
10650          * It can be nasty as data backref offset is
10651          * file offset - file extent offset, which is smaller or
10652          * equal to original backref offset.  The only special case is
10653          * overflow.  So we need to special check and do further search.
10654          */
10655         key.offset = offset & (1ULL << 63) ? 0 : offset;
10656
10657         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10658         if (ret < 0)
10659                 goto out;
10660
10661         /*
10662          * Search afterwards to get correct one
10663          * NOTE: As we must do a comprehensive check on the data backref to
10664          * make sure the dref count also matches, we must iterate all file
10665          * extents for that inode.
10666          */
10667         while (1) {
10668                 leaf = path.nodes[0];
10669                 slot = path.slots[0];
10670
10671                 if (slot >= btrfs_header_nritems(leaf))
10672                         goto next;
10673                 btrfs_item_key_to_cpu(leaf, &key, slot);
10674                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10675                         break;
10676                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10677                 /*
10678                  * Except normal disk bytenr and disk num bytes, we still
10679                  * need to do extra check on dbackref offset as
10680                  * dbackref offset = file_offset - file_extent_offset
10681                  */
10682                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10683                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10684                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10685                     offset)
10686                         found_count++;
10687
10688 next:
10689                 ret = btrfs_next_item(root, &path);
10690                 if (ret)
10691                         break;
10692         }
10693 out:
10694         btrfs_release_path(&path);
10695         if (found_count != count) {
10696                 error(
10697 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10698                         bytenr, len, root_id, objectid, offset, count, found_count);
10699                 return REFERENCER_MISSING;
10700         }
10701         return 0;
10702 }
10703
10704 /*
10705  * Check if the referencer of a shared data backref exists
10706  */
10707 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10708                                      u64 parent, u64 bytenr)
10709 {
10710         struct extent_buffer *eb;
10711         struct btrfs_key key;
10712         struct btrfs_file_extent_item *fi;
10713         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10714         u32 nr;
10715         int found_parent = 0;
10716         int i;
10717
10718         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10719         if (!extent_buffer_uptodate(eb))
10720                 goto out;
10721
10722         nr = btrfs_header_nritems(eb);
10723         for (i = 0; i < nr; i++) {
10724                 btrfs_item_key_to_cpu(eb, &key, i);
10725                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10726                         continue;
10727
10728                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10729                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10730                         continue;
10731
10732                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10733                         found_parent = 1;
10734                         break;
10735                 }
10736         }
10737
10738 out:
10739         free_extent_buffer(eb);
10740         if (!found_parent) {
10741                 error("shared extent %llu referencer lost (parent: %llu)",
10742                         bytenr, parent);
10743                 return REFERENCER_MISSING;
10744         }
10745         return 0;
10746 }
10747
10748 /*
10749  * This function will check a given extent item, including its backref and
10750  * itself (like crossing stripe boundary and type)
10751  *
10752  * Since we don't use extent_record anymore, introduce new error bit
10753  */
10754 static int check_extent_item(struct btrfs_fs_info *fs_info,
10755                              struct extent_buffer *eb, int slot)
10756 {
10757         struct btrfs_extent_item *ei;
10758         struct btrfs_extent_inline_ref *iref;
10759         struct btrfs_extent_data_ref *dref;
10760         unsigned long end;
10761         unsigned long ptr;
10762         int type;
10763         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10764         u32 item_size = btrfs_item_size_nr(eb, slot);
10765         u64 flags;
10766         u64 offset;
10767         int metadata = 0;
10768         int level;
10769         struct btrfs_key key;
10770         int ret;
10771         int err = 0;
10772
10773         btrfs_item_key_to_cpu(eb, &key, slot);
10774         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10775                 bytes_used += key.offset;
10776         else
10777                 bytes_used += nodesize;
10778
10779         if (item_size < sizeof(*ei)) {
10780                 /*
10781                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10782                  * old thing when on disk format is still un-determined.
10783                  * No need to care about it anymore
10784                  */
10785                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10786                 return -ENOTTY;
10787         }
10788
10789         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10790         flags = btrfs_extent_flags(eb, ei);
10791
10792         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10793                 metadata = 1;
10794         if (metadata && check_crossing_stripes(global_info, key.objectid,
10795                                                eb->len)) {
10796                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10797                       key.objectid, key.objectid + nodesize);
10798                 err |= CROSSING_STRIPE_BOUNDARY;
10799         }
10800
10801         ptr = (unsigned long)(ei + 1);
10802
10803         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10804                 /* Old EXTENT_ITEM metadata */
10805                 struct btrfs_tree_block_info *info;
10806
10807                 info = (struct btrfs_tree_block_info *)ptr;
10808                 level = btrfs_tree_block_level(eb, info);
10809                 ptr += sizeof(struct btrfs_tree_block_info);
10810         } else {
10811                 /* New METADATA_ITEM */
10812                 level = key.offset;
10813         }
10814         end = (unsigned long)ei + item_size;
10815
10816 next:
10817         /* Reached extent item end normally */
10818         if (ptr == end)
10819                 goto out;
10820
10821         /* Beyond extent item end, wrong item size */
10822         if (ptr > end) {
10823                 err |= ITEM_SIZE_MISMATCH;
10824                 error("extent item at bytenr %llu slot %d has wrong size",
10825                         eb->start, slot);
10826                 goto out;
10827         }
10828
10829         /* Now check every backref in this extent item */
10830         iref = (struct btrfs_extent_inline_ref *)ptr;
10831         type = btrfs_extent_inline_ref_type(eb, iref);
10832         offset = btrfs_extent_inline_ref_offset(eb, iref);
10833         switch (type) {
10834         case BTRFS_TREE_BLOCK_REF_KEY:
10835                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10836                                                level);
10837                 err |= ret;
10838                 break;
10839         case BTRFS_SHARED_BLOCK_REF_KEY:
10840                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10841                                                  level);
10842                 err |= ret;
10843                 break;
10844         case BTRFS_EXTENT_DATA_REF_KEY:
10845                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10846                 ret = check_extent_data_backref(fs_info,
10847                                 btrfs_extent_data_ref_root(eb, dref),
10848                                 btrfs_extent_data_ref_objectid(eb, dref),
10849                                 btrfs_extent_data_ref_offset(eb, dref),
10850                                 key.objectid, key.offset,
10851                                 btrfs_extent_data_ref_count(eb, dref));
10852                 err |= ret;
10853                 break;
10854         case BTRFS_SHARED_DATA_REF_KEY:
10855                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10856                 err |= ret;
10857                 break;
10858         default:
10859                 error("extent[%llu %d %llu] has unknown ref type: %d",
10860                         key.objectid, key.type, key.offset, type);
10861                 err |= UNKNOWN_TYPE;
10862                 goto out;
10863         }
10864
10865         ptr += btrfs_extent_inline_ref_size(type);
10866         goto next;
10867
10868 out:
10869         return err;
10870 }
10871
10872 /*
10873  * Check if a dev extent item is referred correctly by its chunk
10874  */
10875 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10876                                  struct extent_buffer *eb, int slot)
10877 {
10878         struct btrfs_root *chunk_root = fs_info->chunk_root;
10879         struct btrfs_dev_extent *ptr;
10880         struct btrfs_path path;
10881         struct btrfs_key chunk_key;
10882         struct btrfs_key devext_key;
10883         struct btrfs_chunk *chunk;
10884         struct extent_buffer *l;
10885         int num_stripes;
10886         u64 length;
10887         int i;
10888         int found_chunk = 0;
10889         int ret;
10890
10891         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10892         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10893         length = btrfs_dev_extent_length(eb, ptr);
10894
10895         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10896         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10897         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10898
10899         btrfs_init_path(&path);
10900         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10901         if (ret)
10902                 goto out;
10903
10904         l = path.nodes[0];
10905         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10906         if (btrfs_chunk_length(l, chunk) != length)
10907                 goto out;
10908
10909         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10910         for (i = 0; i < num_stripes; i++) {
10911                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10912                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10913
10914                 if (devid == devext_key.objectid &&
10915                     offset == devext_key.offset) {
10916                         found_chunk = 1;
10917                         break;
10918                 }
10919         }
10920 out:
10921         btrfs_release_path(&path);
10922         if (!found_chunk) {
10923                 error(
10924                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10925                         devext_key.objectid, devext_key.offset, length);
10926                 return REFERENCER_MISSING;
10927         }
10928         return 0;
10929 }
10930
10931 /*
10932  * Check if the used space is correct with the dev item
10933  */
10934 static int check_dev_item(struct btrfs_fs_info *fs_info,
10935                           struct extent_buffer *eb, int slot)
10936 {
10937         struct btrfs_root *dev_root = fs_info->dev_root;
10938         struct btrfs_dev_item *dev_item;
10939         struct btrfs_path path;
10940         struct btrfs_key key;
10941         struct btrfs_dev_extent *ptr;
10942         u64 dev_id;
10943         u64 used;
10944         u64 total = 0;
10945         int ret;
10946
10947         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10948         dev_id = btrfs_device_id(eb, dev_item);
10949         used = btrfs_device_bytes_used(eb, dev_item);
10950
10951         key.objectid = dev_id;
10952         key.type = BTRFS_DEV_EXTENT_KEY;
10953         key.offset = 0;
10954
10955         btrfs_init_path(&path);
10956         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10957         if (ret < 0) {
10958                 btrfs_item_key_to_cpu(eb, &key, slot);
10959                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10960                         key.objectid, key.type, key.offset);
10961                 btrfs_release_path(&path);
10962                 return REFERENCER_MISSING;
10963         }
10964
10965         /* Iterate dev_extents to calculate the used space of a device */
10966         while (1) {
10967                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10968                         goto next;
10969
10970                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10971                 if (key.objectid > dev_id)
10972                         break;
10973                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10974                         goto next;
10975
10976                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10977                                      struct btrfs_dev_extent);
10978                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10979 next:
10980                 ret = btrfs_next_item(dev_root, &path);
10981                 if (ret)
10982                         break;
10983         }
10984         btrfs_release_path(&path);
10985
10986         if (used != total) {
10987                 btrfs_item_key_to_cpu(eb, &key, slot);
10988                 error(
10989 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10990                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10991                         BTRFS_DEV_EXTENT_KEY, dev_id);
10992                 return ACCOUNTING_MISMATCH;
10993         }
10994         return 0;
10995 }
10996
10997 /*
10998  * Check a block group item with its referener (chunk) and its used space
10999  * with extent/metadata item
11000  */
11001 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11002                                   struct extent_buffer *eb, int slot)
11003 {
11004         struct btrfs_root *extent_root = fs_info->extent_root;
11005         struct btrfs_root *chunk_root = fs_info->chunk_root;
11006         struct btrfs_block_group_item *bi;
11007         struct btrfs_block_group_item bg_item;
11008         struct btrfs_path path;
11009         struct btrfs_key bg_key;
11010         struct btrfs_key chunk_key;
11011         struct btrfs_key extent_key;
11012         struct btrfs_chunk *chunk;
11013         struct extent_buffer *leaf;
11014         struct btrfs_extent_item *ei;
11015         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11016         u64 flags;
11017         u64 bg_flags;
11018         u64 used;
11019         u64 total = 0;
11020         int ret;
11021         int err = 0;
11022
11023         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11024         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11025         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11026         used = btrfs_block_group_used(&bg_item);
11027         bg_flags = btrfs_block_group_flags(&bg_item);
11028
11029         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11030         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11031         chunk_key.offset = bg_key.objectid;
11032
11033         btrfs_init_path(&path);
11034         /* Search for the referencer chunk */
11035         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11036         if (ret) {
11037                 error(
11038                 "block group[%llu %llu] did not find the related chunk item",
11039                         bg_key.objectid, bg_key.offset);
11040                 err |= REFERENCER_MISSING;
11041         } else {
11042                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11043                                         struct btrfs_chunk);
11044                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11045                                                 bg_key.offset) {
11046                         error(
11047         "block group[%llu %llu] related chunk item length does not match",
11048                                 bg_key.objectid, bg_key.offset);
11049                         err |= REFERENCER_MISMATCH;
11050                 }
11051         }
11052         btrfs_release_path(&path);
11053
11054         /* Search from the block group bytenr */
11055         extent_key.objectid = bg_key.objectid;
11056         extent_key.type = 0;
11057         extent_key.offset = 0;
11058
11059         btrfs_init_path(&path);
11060         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11061         if (ret < 0)
11062                 goto out;
11063
11064         /* Iterate extent tree to account used space */
11065         while (1) {
11066                 leaf = path.nodes[0];
11067
11068                 /* Search slot can point to the last item beyond leaf nritems */
11069                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11070                         goto next;
11071
11072                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11073                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11074                         break;
11075
11076                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11077                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11078                         goto next;
11079                 if (extent_key.objectid < bg_key.objectid)
11080                         goto next;
11081
11082                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11083                         total += nodesize;
11084                 else
11085                         total += extent_key.offset;
11086
11087                 ei = btrfs_item_ptr(leaf, path.slots[0],
11088                                     struct btrfs_extent_item);
11089                 flags = btrfs_extent_flags(leaf, ei);
11090                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11091                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11092                                 error(
11093                         "bad extent[%llu, %llu) type mismatch with chunk",
11094                                         extent_key.objectid,
11095                                         extent_key.objectid + extent_key.offset);
11096                                 err |= CHUNK_TYPE_MISMATCH;
11097                         }
11098                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11099                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11100                                     BTRFS_BLOCK_GROUP_METADATA))) {
11101                                 error(
11102                         "bad extent[%llu, %llu) type mismatch with chunk",
11103                                         extent_key.objectid,
11104                                         extent_key.objectid + nodesize);
11105                                 err |= CHUNK_TYPE_MISMATCH;
11106                         }
11107                 }
11108 next:
11109                 ret = btrfs_next_item(extent_root, &path);
11110                 if (ret)
11111                         break;
11112         }
11113
11114 out:
11115         btrfs_release_path(&path);
11116
11117         if (total != used) {
11118                 error(
11119                 "block group[%llu %llu] used %llu but extent items used %llu",
11120                         bg_key.objectid, bg_key.offset, used, total);
11121                 err |= ACCOUNTING_MISMATCH;
11122         }
11123         return err;
11124 }
11125
11126 /*
11127  * Check a chunk item.
11128  * Including checking all referred dev_extents and block group
11129  */
11130 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11131                             struct extent_buffer *eb, int slot)
11132 {
11133         struct btrfs_root *extent_root = fs_info->extent_root;
11134         struct btrfs_root *dev_root = fs_info->dev_root;
11135         struct btrfs_path path;
11136         struct btrfs_key chunk_key;
11137         struct btrfs_key bg_key;
11138         struct btrfs_key devext_key;
11139         struct btrfs_chunk *chunk;
11140         struct extent_buffer *leaf;
11141         struct btrfs_block_group_item *bi;
11142         struct btrfs_block_group_item bg_item;
11143         struct btrfs_dev_extent *ptr;
11144         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11145         u64 length;
11146         u64 chunk_end;
11147         u64 type;
11148         u64 profile;
11149         int num_stripes;
11150         u64 offset;
11151         u64 objectid;
11152         int i;
11153         int ret;
11154         int err = 0;
11155
11156         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11157         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11158         length = btrfs_chunk_length(eb, chunk);
11159         chunk_end = chunk_key.offset + length;
11160         if (!IS_ALIGNED(length, sectorsize)) {
11161                 error("chunk[%llu %llu) not aligned to %u",
11162                         chunk_key.offset, chunk_end, sectorsize);
11163                 err |= BYTES_UNALIGNED;
11164                 goto out;
11165         }
11166
11167         type = btrfs_chunk_type(eb, chunk);
11168         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11169         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11170                 error("chunk[%llu %llu) has no chunk type",
11171                         chunk_key.offset, chunk_end);
11172                 err |= UNKNOWN_TYPE;
11173         }
11174         if (profile && (profile & (profile - 1))) {
11175                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11176                         chunk_key.offset, chunk_end, profile);
11177                 err |= UNKNOWN_TYPE;
11178         }
11179
11180         bg_key.objectid = chunk_key.offset;
11181         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11182         bg_key.offset = length;
11183
11184         btrfs_init_path(&path);
11185         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11186         if (ret) {
11187                 error(
11188                 "chunk[%llu %llu) did not find the related block group item",
11189                         chunk_key.offset, chunk_end);
11190                 err |= REFERENCER_MISSING;
11191         } else{
11192                 leaf = path.nodes[0];
11193                 bi = btrfs_item_ptr(leaf, path.slots[0],
11194                                     struct btrfs_block_group_item);
11195                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11196                                    sizeof(bg_item));
11197                 if (btrfs_block_group_flags(&bg_item) != type) {
11198                         error(
11199 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11200                                 chunk_key.offset, chunk_end, type,
11201                                 btrfs_block_group_flags(&bg_item));
11202                         err |= REFERENCER_MISSING;
11203                 }
11204         }
11205
11206         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11207         for (i = 0; i < num_stripes; i++) {
11208                 btrfs_release_path(&path);
11209                 btrfs_init_path(&path);
11210                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11211                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11212                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11213
11214                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11215                                         0, 0);
11216                 if (ret)
11217                         goto not_match_dev;
11218
11219                 leaf = path.nodes[0];
11220                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11221                                      struct btrfs_dev_extent);
11222                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11223                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11224                 if (objectid != chunk_key.objectid ||
11225                     offset != chunk_key.offset ||
11226                     btrfs_dev_extent_length(leaf, ptr) != length)
11227                         goto not_match_dev;
11228                 continue;
11229 not_match_dev:
11230                 err |= BACKREF_MISSING;
11231                 error(
11232                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11233                         chunk_key.objectid, chunk_end, i);
11234                 continue;
11235         }
11236         btrfs_release_path(&path);
11237 out:
11238         return err;
11239 }
11240
11241 /*
11242  * Main entry function to check known items and update related accounting info
11243  */
11244 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11245 {
11246         struct btrfs_fs_info *fs_info = root->fs_info;
11247         struct btrfs_key key;
11248         int slot = 0;
11249         int type;
11250         struct btrfs_extent_data_ref *dref;
11251         int ret;
11252         int err = 0;
11253
11254 next:
11255         btrfs_item_key_to_cpu(eb, &key, slot);
11256         type = key.type;
11257
11258         switch (type) {
11259         case BTRFS_EXTENT_DATA_KEY:
11260                 ret = check_extent_data_item(root, eb, slot);
11261                 err |= ret;
11262                 break;
11263         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11264                 ret = check_block_group_item(fs_info, eb, slot);
11265                 err |= ret;
11266                 break;
11267         case BTRFS_DEV_ITEM_KEY:
11268                 ret = check_dev_item(fs_info, eb, slot);
11269                 err |= ret;
11270                 break;
11271         case BTRFS_CHUNK_ITEM_KEY:
11272                 ret = check_chunk_item(fs_info, eb, slot);
11273                 err |= ret;
11274                 break;
11275         case BTRFS_DEV_EXTENT_KEY:
11276                 ret = check_dev_extent_item(fs_info, eb, slot);
11277                 err |= ret;
11278                 break;
11279         case BTRFS_EXTENT_ITEM_KEY:
11280         case BTRFS_METADATA_ITEM_KEY:
11281                 ret = check_extent_item(fs_info, eb, slot);
11282                 err |= ret;
11283                 break;
11284         case BTRFS_EXTENT_CSUM_KEY:
11285                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11286                 break;
11287         case BTRFS_TREE_BLOCK_REF_KEY:
11288                 ret = check_tree_block_backref(fs_info, key.offset,
11289                                                key.objectid, -1);
11290                 err |= ret;
11291                 break;
11292         case BTRFS_EXTENT_DATA_REF_KEY:
11293                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11294                 ret = check_extent_data_backref(fs_info,
11295                                 btrfs_extent_data_ref_root(eb, dref),
11296                                 btrfs_extent_data_ref_objectid(eb, dref),
11297                                 btrfs_extent_data_ref_offset(eb, dref),
11298                                 key.objectid, 0,
11299                                 btrfs_extent_data_ref_count(eb, dref));
11300                 err |= ret;
11301                 break;
11302         case BTRFS_SHARED_BLOCK_REF_KEY:
11303                 ret = check_shared_block_backref(fs_info, key.offset,
11304                                                  key.objectid, -1);
11305                 err |= ret;
11306                 break;
11307         case BTRFS_SHARED_DATA_REF_KEY:
11308                 ret = check_shared_data_backref(fs_info, key.offset,
11309                                                 key.objectid);
11310                 err |= ret;
11311                 break;
11312         default:
11313                 break;
11314         }
11315
11316         if (++slot < btrfs_header_nritems(eb))
11317                 goto next;
11318
11319         return err;
11320 }
11321
11322 /*
11323  * Helper function for later fs/subvol tree check.  To determine if a tree
11324  * block should be checked.
11325  * This function will ensure only the direct referencer with lowest rootid to
11326  * check a fs/subvolume tree block.
11327  *
11328  * Backref check at extent tree would detect errors like missing subvolume
11329  * tree, so we can do aggressive check to reduce duplicated checks.
11330  */
11331 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11332 {
11333         struct btrfs_root *extent_root = root->fs_info->extent_root;
11334         struct btrfs_key key;
11335         struct btrfs_path path;
11336         struct extent_buffer *leaf;
11337         int slot;
11338         struct btrfs_extent_item *ei;
11339         unsigned long ptr;
11340         unsigned long end;
11341         int type;
11342         u32 item_size;
11343         u64 offset;
11344         struct btrfs_extent_inline_ref *iref;
11345         int ret;
11346
11347         btrfs_init_path(&path);
11348         key.objectid = btrfs_header_bytenr(eb);
11349         key.type = BTRFS_METADATA_ITEM_KEY;
11350         key.offset = (u64)-1;
11351
11352         /*
11353          * Any failure in backref resolving means we can't determine
11354          * whom the tree block belongs to.
11355          * So in that case, we need to check that tree block
11356          */
11357         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11358         if (ret < 0)
11359                 goto need_check;
11360
11361         ret = btrfs_previous_extent_item(extent_root, &path,
11362                                          btrfs_header_bytenr(eb));
11363         if (ret)
11364                 goto need_check;
11365
11366         leaf = path.nodes[0];
11367         slot = path.slots[0];
11368         btrfs_item_key_to_cpu(leaf, &key, slot);
11369         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11370
11371         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11372                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11373         } else {
11374                 struct btrfs_tree_block_info *info;
11375
11376                 info = (struct btrfs_tree_block_info *)(ei + 1);
11377                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11378         }
11379
11380         item_size = btrfs_item_size_nr(leaf, slot);
11381         ptr = (unsigned long)iref;
11382         end = (unsigned long)ei + item_size;
11383         while (ptr < end) {
11384                 iref = (struct btrfs_extent_inline_ref *)ptr;
11385                 type = btrfs_extent_inline_ref_type(leaf, iref);
11386                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11387
11388                 /*
11389                  * We only check the tree block if current root is
11390                  * the lowest referencer of it.
11391                  */
11392                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11393                     offset < root->objectid) {
11394                         btrfs_release_path(&path);
11395                         return 0;
11396                 }
11397
11398                 ptr += btrfs_extent_inline_ref_size(type);
11399         }
11400         /*
11401          * Normally we should also check keyed tree block ref, but that may be
11402          * very time consuming.  Inlined ref should already make us skip a lot
11403          * of refs now.  So skip search keyed tree block ref.
11404          */
11405
11406 need_check:
11407         btrfs_release_path(&path);
11408         return 1;
11409 }
11410
11411 /*
11412  * Traversal function for tree block. We will do:
11413  * 1) Skip shared fs/subvolume tree blocks
11414  * 2) Update related bytes accounting
11415  * 3) Pre-order traversal
11416  */
11417 static int traverse_tree_block(struct btrfs_root *root,
11418                                 struct extent_buffer *node)
11419 {
11420         struct extent_buffer *eb;
11421         struct btrfs_key key;
11422         struct btrfs_key drop_key;
11423         int level;
11424         u64 nr;
11425         int i;
11426         int err = 0;
11427         int ret;
11428
11429         /*
11430          * Skip shared fs/subvolume tree block, in that case they will
11431          * be checked by referencer with lowest rootid
11432          */
11433         if (is_fstree(root->objectid) && !should_check(root, node))
11434                 return 0;
11435
11436         /* Update bytes accounting */
11437         total_btree_bytes += node->len;
11438         if (fs_root_objectid(btrfs_header_owner(node)))
11439                 total_fs_tree_bytes += node->len;
11440         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11441                 total_extent_tree_bytes += node->len;
11442         if (!found_old_backref &&
11443             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11444             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11445             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11446                 found_old_backref = 1;
11447
11448         /* pre-order tranversal, check itself first */
11449         level = btrfs_header_level(node);
11450         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11451                                    btrfs_header_level(node),
11452                                    btrfs_header_owner(node));
11453         err |= ret;
11454         if (err)
11455                 error(
11456         "check %s failed root %llu bytenr %llu level %d, force continue check",
11457                         level ? "node":"leaf", root->objectid,
11458                         btrfs_header_bytenr(node), btrfs_header_level(node));
11459
11460         if (!level) {
11461                 btree_space_waste += btrfs_leaf_free_space(root, node);
11462                 ret = check_leaf_items(root, node);
11463                 err |= ret;
11464                 return err;
11465         }
11466
11467         nr = btrfs_header_nritems(node);
11468         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11469         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11470                 sizeof(struct btrfs_key_ptr);
11471
11472         /* Then check all its children */
11473         for (i = 0; i < nr; i++) {
11474                 u64 blocknr = btrfs_node_blockptr(node, i);
11475
11476                 btrfs_node_key_to_cpu(node, &key, i);
11477                 if (level == root->root_item.drop_level &&
11478                     is_dropped_key(&key, &drop_key))
11479                         continue;
11480
11481                 /*
11482                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11483                  * to call the function itself.
11484                  */
11485                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11486                 if (extent_buffer_uptodate(eb)) {
11487                         ret = traverse_tree_block(root, eb);
11488                         err |= ret;
11489                 }
11490                 free_extent_buffer(eb);
11491         }
11492
11493         return err;
11494 }
11495
11496 /*
11497  * Low memory usage version check_chunks_and_extents.
11498  */
11499 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11500 {
11501         struct btrfs_path path;
11502         struct btrfs_key key;
11503         struct btrfs_root *root1;
11504         struct btrfs_root *cur_root;
11505         int err = 0;
11506         int ret;
11507
11508         root1 = root->fs_info->chunk_root;
11509         ret = traverse_tree_block(root1, root1->node);
11510         err |= ret;
11511
11512         root1 = root->fs_info->tree_root;
11513         ret = traverse_tree_block(root1, root1->node);
11514         err |= ret;
11515
11516         btrfs_init_path(&path);
11517         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11518         key.offset = 0;
11519         key.type = BTRFS_ROOT_ITEM_KEY;
11520
11521         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11522         if (ret) {
11523                 error("cannot find extent treet in tree_root");
11524                 goto out;
11525         }
11526
11527         while (1) {
11528                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11529                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11530                         goto next;
11531                 key.offset = (u64)-1;
11532
11533                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11534                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11535                                         &key);
11536                 else
11537                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11538                 if (IS_ERR(cur_root) || !cur_root) {
11539                         error("failed to read tree: %lld", key.objectid);
11540                         goto next;
11541                 }
11542
11543                 ret = traverse_tree_block(cur_root, cur_root->node);
11544                 err |= ret;
11545
11546                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11547                         btrfs_free_fs_root(cur_root);
11548 next:
11549                 ret = btrfs_next_item(root1, &path);
11550                 if (ret)
11551                         goto out;
11552         }
11553
11554 out:
11555         btrfs_release_path(&path);
11556         return err;
11557 }
11558
11559 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11560                            struct btrfs_root *root, int overwrite)
11561 {
11562         struct extent_buffer *c;
11563         struct extent_buffer *old = root->node;
11564         int level;
11565         int ret;
11566         struct btrfs_disk_key disk_key = {0,0,0};
11567
11568         level = 0;
11569
11570         if (overwrite) {
11571                 c = old;
11572                 extent_buffer_get(c);
11573                 goto init;
11574         }
11575         c = btrfs_alloc_free_block(trans, root,
11576                                    root->nodesize,
11577                                    root->root_key.objectid,
11578                                    &disk_key, level, 0, 0);
11579         if (IS_ERR(c)) {
11580                 c = old;
11581                 extent_buffer_get(c);
11582                 overwrite = 1;
11583         }
11584 init:
11585         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11586         btrfs_set_header_level(c, level);
11587         btrfs_set_header_bytenr(c, c->start);
11588         btrfs_set_header_generation(c, trans->transid);
11589         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11590         btrfs_set_header_owner(c, root->root_key.objectid);
11591
11592         write_extent_buffer(c, root->fs_info->fsid,
11593                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11594
11595         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11596                             btrfs_header_chunk_tree_uuid(c),
11597                             BTRFS_UUID_SIZE);
11598
11599         btrfs_mark_buffer_dirty(c);
11600         /*
11601          * this case can happen in the following case:
11602          *
11603          * 1.overwrite previous root.
11604          *
11605          * 2.reinit reloc data root, this is because we skip pin
11606          * down reloc data tree before which means we can allocate
11607          * same block bytenr here.
11608          */
11609         if (old->start == c->start) {
11610                 btrfs_set_root_generation(&root->root_item,
11611                                           trans->transid);
11612                 root->root_item.level = btrfs_header_level(root->node);
11613                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11614                                         &root->root_key, &root->root_item);
11615                 if (ret) {
11616                         free_extent_buffer(c);
11617                         return ret;
11618                 }
11619         }
11620         free_extent_buffer(old);
11621         root->node = c;
11622         add_root_to_dirty_list(root);
11623         return 0;
11624 }
11625
11626 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11627                                 struct extent_buffer *eb, int tree_root)
11628 {
11629         struct extent_buffer *tmp;
11630         struct btrfs_root_item *ri;
11631         struct btrfs_key key;
11632         u64 bytenr;
11633         u32 nodesize;
11634         int level = btrfs_header_level(eb);
11635         int nritems;
11636         int ret;
11637         int i;
11638
11639         /*
11640          * If we have pinned this block before, don't pin it again.
11641          * This can not only avoid forever loop with broken filesystem
11642          * but also give us some speedups.
11643          */
11644         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11645                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11646                 return 0;
11647
11648         btrfs_pin_extent(fs_info, eb->start, eb->len);
11649
11650         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11651         nritems = btrfs_header_nritems(eb);
11652         for (i = 0; i < nritems; i++) {
11653                 if (level == 0) {
11654                         btrfs_item_key_to_cpu(eb, &key, i);
11655                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11656                                 continue;
11657                         /* Skip the extent root and reloc roots */
11658                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11659                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11660                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11661                                 continue;
11662                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11663                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11664
11665                         /*
11666                          * If at any point we start needing the real root we
11667                          * will have to build a stump root for the root we are
11668                          * in, but for now this doesn't actually use the root so
11669                          * just pass in extent_root.
11670                          */
11671                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11672                                               nodesize, 0);
11673                         if (!extent_buffer_uptodate(tmp)) {
11674                                 fprintf(stderr, "Error reading root block\n");
11675                                 return -EIO;
11676                         }
11677                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11678                         free_extent_buffer(tmp);
11679                         if (ret)
11680                                 return ret;
11681                 } else {
11682                         bytenr = btrfs_node_blockptr(eb, i);
11683
11684                         /* If we aren't the tree root don't read the block */
11685                         if (level == 1 && !tree_root) {
11686                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11687                                 continue;
11688                         }
11689
11690                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11691                                               nodesize, 0);
11692                         if (!extent_buffer_uptodate(tmp)) {
11693                                 fprintf(stderr, "Error reading tree block\n");
11694                                 return -EIO;
11695                         }
11696                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11697                         free_extent_buffer(tmp);
11698                         if (ret)
11699                                 return ret;
11700                 }
11701         }
11702
11703         return 0;
11704 }
11705
11706 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11707 {
11708         int ret;
11709
11710         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11711         if (ret)
11712                 return ret;
11713
11714         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11715 }
11716
11717 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11718 {
11719         struct btrfs_block_group_cache *cache;
11720         struct btrfs_path path;
11721         struct extent_buffer *leaf;
11722         struct btrfs_chunk *chunk;
11723         struct btrfs_key key;
11724         int ret;
11725         u64 start;
11726
11727         btrfs_init_path(&path);
11728         key.objectid = 0;
11729         key.type = BTRFS_CHUNK_ITEM_KEY;
11730         key.offset = 0;
11731         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11732         if (ret < 0) {
11733                 btrfs_release_path(&path);
11734                 return ret;
11735         }
11736
11737         /*
11738          * We do this in case the block groups were screwed up and had alloc
11739          * bits that aren't actually set on the chunks.  This happens with
11740          * restored images every time and could happen in real life I guess.
11741          */
11742         fs_info->avail_data_alloc_bits = 0;
11743         fs_info->avail_metadata_alloc_bits = 0;
11744         fs_info->avail_system_alloc_bits = 0;
11745
11746         /* First we need to create the in-memory block groups */
11747         while (1) {
11748                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11749                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11750                         if (ret < 0) {
11751                                 btrfs_release_path(&path);
11752                                 return ret;
11753                         }
11754                         if (ret) {
11755                                 ret = 0;
11756                                 break;
11757                         }
11758                 }
11759                 leaf = path.nodes[0];
11760                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11761                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11762                         path.slots[0]++;
11763                         continue;
11764                 }
11765
11766                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11767                 btrfs_add_block_group(fs_info, 0,
11768                                       btrfs_chunk_type(leaf, chunk),
11769                                       key.objectid, key.offset,
11770                                       btrfs_chunk_length(leaf, chunk));
11771                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11772                                  key.offset + btrfs_chunk_length(leaf, chunk));
11773                 path.slots[0]++;
11774         }
11775         start = 0;
11776         while (1) {
11777                 cache = btrfs_lookup_first_block_group(fs_info, start);
11778                 if (!cache)
11779                         break;
11780                 cache->cached = 1;
11781                 start = cache->key.objectid + cache->key.offset;
11782         }
11783
11784         btrfs_release_path(&path);
11785         return 0;
11786 }
11787
11788 static int reset_balance(struct btrfs_trans_handle *trans,
11789                          struct btrfs_fs_info *fs_info)
11790 {
11791         struct btrfs_root *root = fs_info->tree_root;
11792         struct btrfs_path path;
11793         struct extent_buffer *leaf;
11794         struct btrfs_key key;
11795         int del_slot, del_nr = 0;
11796         int ret;
11797         int found = 0;
11798
11799         btrfs_init_path(&path);
11800         key.objectid = BTRFS_BALANCE_OBJECTID;
11801         key.type = BTRFS_BALANCE_ITEM_KEY;
11802         key.offset = 0;
11803         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11804         if (ret) {
11805                 if (ret > 0)
11806                         ret = 0;
11807                 if (!ret)
11808                         goto reinit_data_reloc;
11809                 else
11810                         goto out;
11811         }
11812
11813         ret = btrfs_del_item(trans, root, &path);
11814         if (ret)
11815                 goto out;
11816         btrfs_release_path(&path);
11817
11818         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11819         key.type = BTRFS_ROOT_ITEM_KEY;
11820         key.offset = 0;
11821         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11822         if (ret < 0)
11823                 goto out;
11824         while (1) {
11825                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11826                         if (!found)
11827                                 break;
11828
11829                         if (del_nr) {
11830                                 ret = btrfs_del_items(trans, root, &path,
11831                                                       del_slot, del_nr);
11832                                 del_nr = 0;
11833                                 if (ret)
11834                                         goto out;
11835                         }
11836                         key.offset++;
11837                         btrfs_release_path(&path);
11838
11839                         found = 0;
11840                         ret = btrfs_search_slot(trans, root, &key, &path,
11841                                                 -1, 1);
11842                         if (ret < 0)
11843                                 goto out;
11844                         continue;
11845                 }
11846                 found = 1;
11847                 leaf = path.nodes[0];
11848                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11849                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11850                         break;
11851                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11852                         path.slots[0]++;
11853                         continue;
11854                 }
11855                 if (!del_nr) {
11856                         del_slot = path.slots[0];
11857                         del_nr = 1;
11858                 } else {
11859                         del_nr++;
11860                 }
11861                 path.slots[0]++;
11862         }
11863
11864         if (del_nr) {
11865                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11866                 if (ret)
11867                         goto out;
11868         }
11869         btrfs_release_path(&path);
11870
11871 reinit_data_reloc:
11872         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11873         key.type = BTRFS_ROOT_ITEM_KEY;
11874         key.offset = (u64)-1;
11875         root = btrfs_read_fs_root(fs_info, &key);
11876         if (IS_ERR(root)) {
11877                 fprintf(stderr, "Error reading data reloc tree\n");
11878                 ret = PTR_ERR(root);
11879                 goto out;
11880         }
11881         record_root_in_trans(trans, root);
11882         ret = btrfs_fsck_reinit_root(trans, root, 0);
11883         if (ret)
11884                 goto out;
11885         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11886 out:
11887         btrfs_release_path(&path);
11888         return ret;
11889 }
11890
11891 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11892                               struct btrfs_fs_info *fs_info)
11893 {
11894         u64 start = 0;
11895         int ret;
11896
11897         /*
11898          * The only reason we don't do this is because right now we're just
11899          * walking the trees we find and pinning down their bytes, we don't look
11900          * at any of the leaves.  In order to do mixed groups we'd have to check
11901          * the leaves of any fs roots and pin down the bytes for any file
11902          * extents we find.  Not hard but why do it if we don't have to?
11903          */
11904         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11905                 fprintf(stderr, "We don't support re-initing the extent tree "
11906                         "for mixed block groups yet, please notify a btrfs "
11907                         "developer you want to do this so they can add this "
11908                         "functionality.\n");
11909                 return -EINVAL;
11910         }
11911
11912         /*
11913          * first we need to walk all of the trees except the extent tree and pin
11914          * down the bytes that are in use so we don't overwrite any existing
11915          * metadata.
11916          */
11917         ret = pin_metadata_blocks(fs_info);
11918         if (ret) {
11919                 fprintf(stderr, "error pinning down used bytes\n");
11920                 return ret;
11921         }
11922
11923         /*
11924          * Need to drop all the block groups since we're going to recreate all
11925          * of them again.
11926          */
11927         btrfs_free_block_groups(fs_info);
11928         ret = reset_block_groups(fs_info);
11929         if (ret) {
11930                 fprintf(stderr, "error resetting the block groups\n");
11931                 return ret;
11932         }
11933
11934         /* Ok we can allocate now, reinit the extent root */
11935         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11936         if (ret) {
11937                 fprintf(stderr, "extent root initialization failed\n");
11938                 /*
11939                  * When the transaction code is updated we should end the
11940                  * transaction, but for now progs only knows about commit so
11941                  * just return an error.
11942                  */
11943                 return ret;
11944         }
11945
11946         /*
11947          * Now we have all the in-memory block groups setup so we can make
11948          * allocations properly, and the metadata we care about is safe since we
11949          * pinned all of it above.
11950          */
11951         while (1) {
11952                 struct btrfs_block_group_cache *cache;
11953
11954                 cache = btrfs_lookup_first_block_group(fs_info, start);
11955                 if (!cache)
11956                         break;
11957                 start = cache->key.objectid + cache->key.offset;
11958                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11959                                         &cache->key, &cache->item,
11960                                         sizeof(cache->item));
11961                 if (ret) {
11962                         fprintf(stderr, "Error adding block group\n");
11963                         return ret;
11964                 }
11965                 btrfs_extent_post_op(trans, fs_info->extent_root);
11966         }
11967
11968         ret = reset_balance(trans, fs_info);
11969         if (ret)
11970                 fprintf(stderr, "error resetting the pending balance\n");
11971
11972         return ret;
11973 }
11974
11975 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11976 {
11977         struct btrfs_path path;
11978         struct btrfs_trans_handle *trans;
11979         struct btrfs_key key;
11980         int ret;
11981
11982         printf("Recowing metadata block %llu\n", eb->start);
11983         key.objectid = btrfs_header_owner(eb);
11984         key.type = BTRFS_ROOT_ITEM_KEY;
11985         key.offset = (u64)-1;
11986
11987         root = btrfs_read_fs_root(root->fs_info, &key);
11988         if (IS_ERR(root)) {
11989                 fprintf(stderr, "Couldn't find owner root %llu\n",
11990                         key.objectid);
11991                 return PTR_ERR(root);
11992         }
11993
11994         trans = btrfs_start_transaction(root, 1);
11995         if (IS_ERR(trans))
11996                 return PTR_ERR(trans);
11997
11998         btrfs_init_path(&path);
11999         path.lowest_level = btrfs_header_level(eb);
12000         if (path.lowest_level)
12001                 btrfs_node_key_to_cpu(eb, &key, 0);
12002         else
12003                 btrfs_item_key_to_cpu(eb, &key, 0);
12004
12005         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12006         btrfs_commit_transaction(trans, root);
12007         btrfs_release_path(&path);
12008         return ret;
12009 }
12010
12011 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12012 {
12013         struct btrfs_path path;
12014         struct btrfs_trans_handle *trans;
12015         struct btrfs_key key;
12016         int ret;
12017
12018         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12019                bad->key.type, bad->key.offset);
12020         key.objectid = bad->root_id;
12021         key.type = BTRFS_ROOT_ITEM_KEY;
12022         key.offset = (u64)-1;
12023
12024         root = btrfs_read_fs_root(root->fs_info, &key);
12025         if (IS_ERR(root)) {
12026                 fprintf(stderr, "Couldn't find owner root %llu\n",
12027                         key.objectid);
12028                 return PTR_ERR(root);
12029         }
12030
12031         trans = btrfs_start_transaction(root, 1);
12032         if (IS_ERR(trans))
12033                 return PTR_ERR(trans);
12034
12035         btrfs_init_path(&path);
12036         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12037         if (ret) {
12038                 if (ret > 0)
12039                         ret = 0;
12040                 goto out;
12041         }
12042         ret = btrfs_del_item(trans, root, &path);
12043 out:
12044         btrfs_commit_transaction(trans, root);
12045         btrfs_release_path(&path);
12046         return ret;
12047 }
12048
12049 static int zero_log_tree(struct btrfs_root *root)
12050 {
12051         struct btrfs_trans_handle *trans;
12052         int ret;
12053
12054         trans = btrfs_start_transaction(root, 1);
12055         if (IS_ERR(trans)) {
12056                 ret = PTR_ERR(trans);
12057                 return ret;
12058         }
12059         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12060         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12061         ret = btrfs_commit_transaction(trans, root);
12062         return ret;
12063 }
12064
12065 static int populate_csum(struct btrfs_trans_handle *trans,
12066                          struct btrfs_root *csum_root, char *buf, u64 start,
12067                          u64 len)
12068 {
12069         u64 offset = 0;
12070         u64 sectorsize;
12071         int ret = 0;
12072
12073         while (offset < len) {
12074                 sectorsize = csum_root->sectorsize;
12075                 ret = read_extent_data(csum_root, buf, start + offset,
12076                                        &sectorsize, 0);
12077                 if (ret)
12078                         break;
12079                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12080                                             start + offset, buf, sectorsize);
12081                 if (ret)
12082                         break;
12083                 offset += sectorsize;
12084         }
12085         return ret;
12086 }
12087
12088 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12089                                       struct btrfs_root *csum_root,
12090                                       struct btrfs_root *cur_root)
12091 {
12092         struct btrfs_path path;
12093         struct btrfs_key key;
12094         struct extent_buffer *node;
12095         struct btrfs_file_extent_item *fi;
12096         char *buf = NULL;
12097         u64 start = 0;
12098         u64 len = 0;
12099         int slot = 0;
12100         int ret = 0;
12101
12102         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12103         if (!buf)
12104                 return -ENOMEM;
12105
12106         btrfs_init_path(&path);
12107         key.objectid = 0;
12108         key.offset = 0;
12109         key.type = 0;
12110         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12111         if (ret < 0)
12112                 goto out;
12113         /* Iterate all regular file extents and fill its csum */
12114         while (1) {
12115                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12116
12117                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12118                         goto next;
12119                 node = path.nodes[0];
12120                 slot = path.slots[0];
12121                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12122                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12123                         goto next;
12124                 start = btrfs_file_extent_disk_bytenr(node, fi);
12125                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12126
12127                 ret = populate_csum(trans, csum_root, buf, start, len);
12128                 if (ret == -EEXIST)
12129                         ret = 0;
12130                 if (ret < 0)
12131                         goto out;
12132 next:
12133                 /*
12134                  * TODO: if next leaf is corrupted, jump to nearest next valid
12135                  * leaf.
12136                  */
12137                 ret = btrfs_next_item(cur_root, &path);
12138                 if (ret < 0)
12139                         goto out;
12140                 if (ret > 0) {
12141                         ret = 0;
12142                         goto out;
12143                 }
12144         }
12145
12146 out:
12147         btrfs_release_path(&path);
12148         free(buf);
12149         return ret;
12150 }
12151
12152 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12153                                   struct btrfs_root *csum_root)
12154 {
12155         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12156         struct btrfs_path path;
12157         struct btrfs_root *tree_root = fs_info->tree_root;
12158         struct btrfs_root *cur_root;
12159         struct extent_buffer *node;
12160         struct btrfs_key key;
12161         int slot = 0;
12162         int ret = 0;
12163
12164         btrfs_init_path(&path);
12165         key.objectid = BTRFS_FS_TREE_OBJECTID;
12166         key.offset = 0;
12167         key.type = BTRFS_ROOT_ITEM_KEY;
12168         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12169         if (ret < 0)
12170                 goto out;
12171         if (ret > 0) {
12172                 ret = -ENOENT;
12173                 goto out;
12174         }
12175
12176         while (1) {
12177                 node = path.nodes[0];
12178                 slot = path.slots[0];
12179                 btrfs_item_key_to_cpu(node, &key, slot);
12180                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12181                         goto out;
12182                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12183                         goto next;
12184                 if (!is_fstree(key.objectid))
12185                         goto next;
12186                 key.offset = (u64)-1;
12187
12188                 cur_root = btrfs_read_fs_root(fs_info, &key);
12189                 if (IS_ERR(cur_root) || !cur_root) {
12190                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12191                                 key.objectid);
12192                         goto out;
12193                 }
12194                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12195                                 cur_root);
12196                 if (ret < 0)
12197                         goto out;
12198 next:
12199                 ret = btrfs_next_item(tree_root, &path);
12200                 if (ret > 0) {
12201                         ret = 0;
12202                         goto out;
12203                 }
12204                 if (ret < 0)
12205                         goto out;
12206         }
12207
12208 out:
12209         btrfs_release_path(&path);
12210         return ret;
12211 }
12212
12213 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12214                                       struct btrfs_root *csum_root)
12215 {
12216         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12217         struct btrfs_path path;
12218         struct btrfs_extent_item *ei;
12219         struct extent_buffer *leaf;
12220         char *buf;
12221         struct btrfs_key key;
12222         int ret;
12223
12224         btrfs_init_path(&path);
12225         key.objectid = 0;
12226         key.type = BTRFS_EXTENT_ITEM_KEY;
12227         key.offset = 0;
12228         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12229         if (ret < 0) {
12230                 btrfs_release_path(&path);
12231                 return ret;
12232         }
12233
12234         buf = malloc(csum_root->sectorsize);
12235         if (!buf) {
12236                 btrfs_release_path(&path);
12237                 return -ENOMEM;
12238         }
12239
12240         while (1) {
12241                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12242                         ret = btrfs_next_leaf(extent_root, &path);
12243                         if (ret < 0)
12244                                 break;
12245                         if (ret) {
12246                                 ret = 0;
12247                                 break;
12248                         }
12249                 }
12250                 leaf = path.nodes[0];
12251
12252                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12253                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12254                         path.slots[0]++;
12255                         continue;
12256                 }
12257
12258                 ei = btrfs_item_ptr(leaf, path.slots[0],
12259                                     struct btrfs_extent_item);
12260                 if (!(btrfs_extent_flags(leaf, ei) &
12261                       BTRFS_EXTENT_FLAG_DATA)) {
12262                         path.slots[0]++;
12263                         continue;
12264                 }
12265
12266                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12267                                     key.offset);
12268                 if (ret)
12269                         break;
12270                 path.slots[0]++;
12271         }
12272
12273         btrfs_release_path(&path);
12274         free(buf);
12275         return ret;
12276 }
12277
12278 /*
12279  * Recalculate the csum and put it into the csum tree.
12280  *
12281  * Extent tree init will wipe out all the extent info, so in that case, we
12282  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12283  * will use fs/subvol trees to init the csum tree.
12284  */
12285 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12286                           struct btrfs_root *csum_root,
12287                           int search_fs_tree)
12288 {
12289         if (search_fs_tree)
12290                 return fill_csum_tree_from_fs(trans, csum_root);
12291         else
12292                 return fill_csum_tree_from_extent(trans, csum_root);
12293 }
12294
12295 static void free_roots_info_cache(void)
12296 {
12297         if (!roots_info_cache)
12298                 return;
12299
12300         while (!cache_tree_empty(roots_info_cache)) {
12301                 struct cache_extent *entry;
12302                 struct root_item_info *rii;
12303
12304                 entry = first_cache_extent(roots_info_cache);
12305                 if (!entry)
12306                         break;
12307                 remove_cache_extent(roots_info_cache, entry);
12308                 rii = container_of(entry, struct root_item_info, cache_extent);
12309                 free(rii);
12310         }
12311
12312         free(roots_info_cache);
12313         roots_info_cache = NULL;
12314 }
12315
12316 static int build_roots_info_cache(struct btrfs_fs_info *info)
12317 {
12318         int ret = 0;
12319         struct btrfs_key key;
12320         struct extent_buffer *leaf;
12321         struct btrfs_path path;
12322
12323         if (!roots_info_cache) {
12324                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12325                 if (!roots_info_cache)
12326                         return -ENOMEM;
12327                 cache_tree_init(roots_info_cache);
12328         }
12329
12330         btrfs_init_path(&path);
12331         key.objectid = 0;
12332         key.type = BTRFS_EXTENT_ITEM_KEY;
12333         key.offset = 0;
12334         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12335         if (ret < 0)
12336                 goto out;
12337         leaf = path.nodes[0];
12338
12339         while (1) {
12340                 struct btrfs_key found_key;
12341                 struct btrfs_extent_item *ei;
12342                 struct btrfs_extent_inline_ref *iref;
12343                 int slot = path.slots[0];
12344                 int type;
12345                 u64 flags;
12346                 u64 root_id;
12347                 u8 level;
12348                 struct cache_extent *entry;
12349                 struct root_item_info *rii;
12350
12351                 if (slot >= btrfs_header_nritems(leaf)) {
12352                         ret = btrfs_next_leaf(info->extent_root, &path);
12353                         if (ret < 0) {
12354                                 break;
12355                         } else if (ret) {
12356                                 ret = 0;
12357                                 break;
12358                         }
12359                         leaf = path.nodes[0];
12360                         slot = path.slots[0];
12361                 }
12362
12363                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12364
12365                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12366                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12367                         goto next;
12368
12369                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12370                 flags = btrfs_extent_flags(leaf, ei);
12371
12372                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12373                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12374                         goto next;
12375
12376                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12377                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12378                         level = found_key.offset;
12379                 } else {
12380                         struct btrfs_tree_block_info *binfo;
12381
12382                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12383                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12384                         level = btrfs_tree_block_level(leaf, binfo);
12385                 }
12386
12387                 /*
12388                  * For a root extent, it must be of the following type and the
12389                  * first (and only one) iref in the item.
12390                  */
12391                 type = btrfs_extent_inline_ref_type(leaf, iref);
12392                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12393                         goto next;
12394
12395                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12396                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12397                 if (!entry) {
12398                         rii = malloc(sizeof(struct root_item_info));
12399                         if (!rii) {
12400                                 ret = -ENOMEM;
12401                                 goto out;
12402                         }
12403                         rii->cache_extent.start = root_id;
12404                         rii->cache_extent.size = 1;
12405                         rii->level = (u8)-1;
12406                         entry = &rii->cache_extent;
12407                         ret = insert_cache_extent(roots_info_cache, entry);
12408                         ASSERT(ret == 0);
12409                 } else {
12410                         rii = container_of(entry, struct root_item_info,
12411                                            cache_extent);
12412                 }
12413
12414                 ASSERT(rii->cache_extent.start == root_id);
12415                 ASSERT(rii->cache_extent.size == 1);
12416
12417                 if (level > rii->level || rii->level == (u8)-1) {
12418                         rii->level = level;
12419                         rii->bytenr = found_key.objectid;
12420                         rii->gen = btrfs_extent_generation(leaf, ei);
12421                         rii->node_count = 1;
12422                 } else if (level == rii->level) {
12423                         rii->node_count++;
12424                 }
12425 next:
12426                 path.slots[0]++;
12427         }
12428
12429 out:
12430         btrfs_release_path(&path);
12431
12432         return ret;
12433 }
12434
12435 static int maybe_repair_root_item(struct btrfs_path *path,
12436                                   const struct btrfs_key *root_key,
12437                                   const int read_only_mode)
12438 {
12439         const u64 root_id = root_key->objectid;
12440         struct cache_extent *entry;
12441         struct root_item_info *rii;
12442         struct btrfs_root_item ri;
12443         unsigned long offset;
12444
12445         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12446         if (!entry) {
12447                 fprintf(stderr,
12448                         "Error: could not find extent items for root %llu\n",
12449                         root_key->objectid);
12450                 return -ENOENT;
12451         }
12452
12453         rii = container_of(entry, struct root_item_info, cache_extent);
12454         ASSERT(rii->cache_extent.start == root_id);
12455         ASSERT(rii->cache_extent.size == 1);
12456
12457         if (rii->node_count != 1) {
12458                 fprintf(stderr,
12459                         "Error: could not find btree root extent for root %llu\n",
12460                         root_id);
12461                 return -ENOENT;
12462         }
12463
12464         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12465         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12466
12467         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12468             btrfs_root_level(&ri) != rii->level ||
12469             btrfs_root_generation(&ri) != rii->gen) {
12470
12471                 /*
12472                  * If we're in repair mode but our caller told us to not update
12473                  * the root item, i.e. just check if it needs to be updated, don't
12474                  * print this message, since the caller will call us again shortly
12475                  * for the same root item without read only mode (the caller will
12476                  * open a transaction first).
12477                  */
12478                 if (!(read_only_mode && repair))
12479                         fprintf(stderr,
12480                                 "%sroot item for root %llu,"
12481                                 " current bytenr %llu, current gen %llu, current level %u,"
12482                                 " new bytenr %llu, new gen %llu, new level %u\n",
12483                                 (read_only_mode ? "" : "fixing "),
12484                                 root_id,
12485                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12486                                 btrfs_root_level(&ri),
12487                                 rii->bytenr, rii->gen, rii->level);
12488
12489                 if (btrfs_root_generation(&ri) > rii->gen) {
12490                         fprintf(stderr,
12491                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12492                                 root_id, btrfs_root_generation(&ri), rii->gen);
12493                         return -EINVAL;
12494                 }
12495
12496                 if (!read_only_mode) {
12497                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12498                         btrfs_set_root_level(&ri, rii->level);
12499                         btrfs_set_root_generation(&ri, rii->gen);
12500                         write_extent_buffer(path->nodes[0], &ri,
12501                                             offset, sizeof(ri));
12502                 }
12503
12504                 return 1;
12505         }
12506
12507         return 0;
12508 }
12509
12510 /*
12511  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12512  * caused read-only snapshots to be corrupted if they were created at a moment
12513  * when the source subvolume/snapshot had orphan items. The issue was that the
12514  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12515  * node instead of the post orphan cleanup root node.
12516  * So this function, and its callees, just detects and fixes those cases. Even
12517  * though the regression was for read-only snapshots, this function applies to
12518  * any snapshot/subvolume root.
12519  * This must be run before any other repair code - not doing it so, makes other
12520  * repair code delete or modify backrefs in the extent tree for example, which
12521  * will result in an inconsistent fs after repairing the root items.
12522  */
12523 static int repair_root_items(struct btrfs_fs_info *info)
12524 {
12525         struct btrfs_path path;
12526         struct btrfs_key key;
12527         struct extent_buffer *leaf;
12528         struct btrfs_trans_handle *trans = NULL;
12529         int ret = 0;
12530         int bad_roots = 0;
12531         int need_trans = 0;
12532
12533         btrfs_init_path(&path);
12534
12535         ret = build_roots_info_cache(info);
12536         if (ret)
12537                 goto out;
12538
12539         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12540         key.type = BTRFS_ROOT_ITEM_KEY;
12541         key.offset = 0;
12542
12543 again:
12544         /*
12545          * Avoid opening and committing transactions if a leaf doesn't have
12546          * any root items that need to be fixed, so that we avoid rotating
12547          * backup roots unnecessarily.
12548          */
12549         if (need_trans) {
12550                 trans = btrfs_start_transaction(info->tree_root, 1);
12551                 if (IS_ERR(trans)) {
12552                         ret = PTR_ERR(trans);
12553                         goto out;
12554                 }
12555         }
12556
12557         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12558                                 0, trans ? 1 : 0);
12559         if (ret < 0)
12560                 goto out;
12561         leaf = path.nodes[0];
12562
12563         while (1) {
12564                 struct btrfs_key found_key;
12565
12566                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12567                         int no_more_keys = find_next_key(&path, &key);
12568
12569                         btrfs_release_path(&path);
12570                         if (trans) {
12571                                 ret = btrfs_commit_transaction(trans,
12572                                                                info->tree_root);
12573                                 trans = NULL;
12574                                 if (ret < 0)
12575                                         goto out;
12576                         }
12577                         need_trans = 0;
12578                         if (no_more_keys)
12579                                 break;
12580                         goto again;
12581                 }
12582
12583                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12584
12585                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12586                         goto next;
12587                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12588                         goto next;
12589
12590                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12591                 if (ret < 0)
12592                         goto out;
12593                 if (ret) {
12594                         if (!trans && repair) {
12595                                 need_trans = 1;
12596                                 key = found_key;
12597                                 btrfs_release_path(&path);
12598                                 goto again;
12599                         }
12600                         bad_roots++;
12601                 }
12602 next:
12603                 path.slots[0]++;
12604         }
12605         ret = 0;
12606 out:
12607         free_roots_info_cache();
12608         btrfs_release_path(&path);
12609         if (trans)
12610                 btrfs_commit_transaction(trans, info->tree_root);
12611         if (ret < 0)
12612                 return ret;
12613
12614         return bad_roots;
12615 }
12616
12617 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12618 {
12619         struct btrfs_trans_handle *trans;
12620         struct btrfs_block_group_cache *bg_cache;
12621         u64 current = 0;
12622         int ret = 0;
12623
12624         /* Clear all free space cache inodes and its extent data */
12625         while (1) {
12626                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12627                 if (!bg_cache)
12628                         break;
12629                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12630                 if (ret < 0)
12631                         return ret;
12632                 current = bg_cache->key.objectid + bg_cache->key.offset;
12633         }
12634
12635         /* Don't forget to set cache_generation to -1 */
12636         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12637         if (IS_ERR(trans)) {
12638                 error("failed to update super block cache generation");
12639                 return PTR_ERR(trans);
12640         }
12641         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12642         btrfs_commit_transaction(trans, fs_info->tree_root);
12643
12644         return ret;
12645 }
12646
12647 const char * const cmd_check_usage[] = {
12648         "btrfs check [options] <device>",
12649         "Check structural integrity of a filesystem (unmounted).",
12650         "Check structural integrity of an unmounted filesystem. Verify internal",
12651         "trees' consistency and item connectivity. In the repair mode try to",
12652         "fix the problems found. ",
12653         "WARNING: the repair mode is considered dangerous",
12654         "",
12655         "-s|--super <superblock>     use this superblock copy",
12656         "-b|--backup                 use the first valid backup root copy",
12657         "--repair                    try to repair the filesystem",
12658         "--readonly                  run in read-only mode (default)",
12659         "--init-csum-tree            create a new CRC tree",
12660         "--init-extent-tree          create a new extent tree",
12661         "--mode <MODE>               allows choice of memory/IO trade-offs",
12662         "                            where MODE is one of:",
12663         "                            original - read inodes and extents to memory (requires",
12664         "                                       more memory, does less IO)",
12665         "                            lowmem   - try to use less memory but read blocks again",
12666         "                                       when needed",
12667         "--check-data-csum           verify checksums of data blocks",
12668         "-Q|--qgroup-report          print a report on qgroup consistency",
12669         "-E|--subvol-extents <subvolid>",
12670         "                            print subvolume extents and sharing state",
12671         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12672         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12673         "-p|--progress               indicate progress",
12674         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12675         NULL
12676 };
12677
12678 int cmd_check(int argc, char **argv)
12679 {
12680         struct cache_tree root_cache;
12681         struct btrfs_root *root;
12682         struct btrfs_fs_info *info;
12683         u64 bytenr = 0;
12684         u64 subvolid = 0;
12685         u64 tree_root_bytenr = 0;
12686         u64 chunk_root_bytenr = 0;
12687         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12688         int ret;
12689         int err = 0;
12690         u64 num;
12691         int init_csum_tree = 0;
12692         int readonly = 0;
12693         int clear_space_cache = 0;
12694         int qgroup_report = 0;
12695         int qgroups_repaired = 0;
12696         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12697
12698         while(1) {
12699                 int c;
12700                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12701                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12702                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12703                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12704                 static const struct option long_options[] = {
12705                         { "super", required_argument, NULL, 's' },
12706                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12707                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12708                         { "init-csum-tree", no_argument, NULL,
12709                                 GETOPT_VAL_INIT_CSUM },
12710                         { "init-extent-tree", no_argument, NULL,
12711                                 GETOPT_VAL_INIT_EXTENT },
12712                         { "check-data-csum", no_argument, NULL,
12713                                 GETOPT_VAL_CHECK_CSUM },
12714                         { "backup", no_argument, NULL, 'b' },
12715                         { "subvol-extents", required_argument, NULL, 'E' },
12716                         { "qgroup-report", no_argument, NULL, 'Q' },
12717                         { "tree-root", required_argument, NULL, 'r' },
12718                         { "chunk-root", required_argument, NULL,
12719                                 GETOPT_VAL_CHUNK_TREE },
12720                         { "progress", no_argument, NULL, 'p' },
12721                         { "mode", required_argument, NULL,
12722                                 GETOPT_VAL_MODE },
12723                         { "clear-space-cache", required_argument, NULL,
12724                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12725                         { NULL, 0, NULL, 0}
12726                 };
12727
12728                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12729                 if (c < 0)
12730                         break;
12731                 switch(c) {
12732                         case 'a': /* ignored */ break;
12733                         case 'b':
12734                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12735                                 break;
12736                         case 's':
12737                                 num = arg_strtou64(optarg);
12738                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12739                                         error(
12740                                         "super mirror should be less than %d",
12741                                                 BTRFS_SUPER_MIRROR_MAX);
12742                                         exit(1);
12743                                 }
12744                                 bytenr = btrfs_sb_offset(((int)num));
12745                                 printf("using SB copy %llu, bytenr %llu\n", num,
12746                                        (unsigned long long)bytenr);
12747                                 break;
12748                         case 'Q':
12749                                 qgroup_report = 1;
12750                                 break;
12751                         case 'E':
12752                                 subvolid = arg_strtou64(optarg);
12753                                 break;
12754                         case 'r':
12755                                 tree_root_bytenr = arg_strtou64(optarg);
12756                                 break;
12757                         case GETOPT_VAL_CHUNK_TREE:
12758                                 chunk_root_bytenr = arg_strtou64(optarg);
12759                                 break;
12760                         case 'p':
12761                                 ctx.progress_enabled = true;
12762                                 break;
12763                         case '?':
12764                         case 'h':
12765                                 usage(cmd_check_usage);
12766                         case GETOPT_VAL_REPAIR:
12767                                 printf("enabling repair mode\n");
12768                                 repair = 1;
12769                                 ctree_flags |= OPEN_CTREE_WRITES;
12770                                 break;
12771                         case GETOPT_VAL_READONLY:
12772                                 readonly = 1;
12773                                 break;
12774                         case GETOPT_VAL_INIT_CSUM:
12775                                 printf("Creating a new CRC tree\n");
12776                                 init_csum_tree = 1;
12777                                 repair = 1;
12778                                 ctree_flags |= OPEN_CTREE_WRITES;
12779                                 break;
12780                         case GETOPT_VAL_INIT_EXTENT:
12781                                 init_extent_tree = 1;
12782                                 ctree_flags |= (OPEN_CTREE_WRITES |
12783                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12784                                 repair = 1;
12785                                 break;
12786                         case GETOPT_VAL_CHECK_CSUM:
12787                                 check_data_csum = 1;
12788                                 break;
12789                         case GETOPT_VAL_MODE:
12790                                 check_mode = parse_check_mode(optarg);
12791                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12792                                         error("unknown mode: %s", optarg);
12793                                         exit(1);
12794                                 }
12795                                 break;
12796                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12797                                 if (strcmp(optarg, "v1") == 0) {
12798                                         clear_space_cache = 1;
12799                                 } else if (strcmp(optarg, "v2") == 0) {
12800                                         clear_space_cache = 2;
12801                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12802                                 } else {
12803                                         error(
12804                 "invalid argument to --clear-space-cache, must be v1 or v2");
12805                                         exit(1);
12806                                 }
12807                                 ctree_flags |= OPEN_CTREE_WRITES;
12808                                 break;
12809                 }
12810         }
12811
12812         if (check_argc_exact(argc - optind, 1))
12813                 usage(cmd_check_usage);
12814
12815         if (ctx.progress_enabled) {
12816                 ctx.tp = TASK_NOTHING;
12817                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12818         }
12819
12820         /* This check is the only reason for --readonly to exist */
12821         if (readonly && repair) {
12822                 error("repair options are not compatible with --readonly");
12823                 exit(1);
12824         }
12825
12826         /*
12827          * Not supported yet
12828          */
12829         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12830                 error("low memory mode doesn't support repair yet");
12831                 exit(1);
12832         }
12833
12834         radix_tree_init();
12835         cache_tree_init(&root_cache);
12836
12837         if((ret = check_mounted(argv[optind])) < 0) {
12838                 error("could not check mount status: %s", strerror(-ret));
12839                 err |= !!ret;
12840                 goto err_out;
12841         } else if(ret) {
12842                 error("%s is currently mounted, aborting", argv[optind]);
12843                 ret = -EBUSY;
12844                 err |= !!ret;
12845                 goto err_out;
12846         }
12847
12848         /* only allow partial opening under repair mode */
12849         if (repair)
12850                 ctree_flags |= OPEN_CTREE_PARTIAL;
12851
12852         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12853                                   chunk_root_bytenr, ctree_flags);
12854         if (!info) {
12855                 error("cannot open file system");
12856                 ret = -EIO;
12857                 err |= !!ret;
12858                 goto err_out;
12859         }
12860
12861         global_info = info;
12862         root = info->fs_root;
12863         if (clear_space_cache == 1) {
12864                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12865                         error(
12866                 "free space cache v2 detected, use --clear-space-cache v2");
12867                         ret = 1;
12868                         goto close_out;
12869                 }
12870                 printf("Clearing free space cache\n");
12871                 ret = clear_free_space_cache(info);
12872                 if (ret) {
12873                         error("failed to clear free space cache");
12874                         ret = 1;
12875                 } else {
12876                         printf("Free space cache cleared\n");
12877                 }
12878                 goto close_out;
12879         } else if (clear_space_cache == 2) {
12880                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12881                         printf("no free space cache v2 to clear\n");
12882                         ret = 0;
12883                         goto close_out;
12884                 }
12885                 printf("Clear free space cache v2\n");
12886                 ret = btrfs_clear_free_space_tree(info);
12887                 if (ret) {
12888                         error("failed to clear free space cache v2: %d", ret);
12889                         ret = 1;
12890                 } else {
12891                         printf("free space cache v2 cleared\n");
12892                 }
12893                 goto close_out;
12894         }
12895
12896         /*
12897          * repair mode will force us to commit transaction which
12898          * will make us fail to load log tree when mounting.
12899          */
12900         if (repair && btrfs_super_log_root(info->super_copy)) {
12901                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12902                 if (!ret) {
12903                         ret = 1;
12904                         err |= !!ret;
12905                         goto close_out;
12906                 }
12907                 ret = zero_log_tree(root);
12908                 err |= !!ret;
12909                 if (ret) {
12910                         error("failed to zero log tree: %d", ret);
12911                         goto close_out;
12912                 }
12913         }
12914
12915         uuid_unparse(info->super_copy->fsid, uuidbuf);
12916         if (qgroup_report) {
12917                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12918                        uuidbuf);
12919                 ret = qgroup_verify_all(info);
12920                 err |= !!ret;
12921                 if (ret == 0)
12922                         report_qgroups(1);
12923                 goto close_out;
12924         }
12925         if (subvolid) {
12926                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12927                        subvolid, argv[optind], uuidbuf);
12928                 ret = print_extent_state(info, subvolid);
12929                 err |= !!ret;
12930                 goto close_out;
12931         }
12932         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12933
12934         if (!extent_buffer_uptodate(info->tree_root->node) ||
12935             !extent_buffer_uptodate(info->dev_root->node) ||
12936             !extent_buffer_uptodate(info->chunk_root->node)) {
12937                 error("critical roots corrupted, unable to check the filesystem");
12938                 err |= !!ret;
12939                 ret = -EIO;
12940                 goto close_out;
12941         }
12942
12943         if (init_extent_tree || init_csum_tree) {
12944                 struct btrfs_trans_handle *trans;
12945
12946                 trans = btrfs_start_transaction(info->extent_root, 0);
12947                 if (IS_ERR(trans)) {
12948                         error("error starting transaction");
12949                         ret = PTR_ERR(trans);
12950                         err |= !!ret;
12951                         goto close_out;
12952                 }
12953
12954                 if (init_extent_tree) {
12955                         printf("Creating a new extent tree\n");
12956                         ret = reinit_extent_tree(trans, info);
12957                         err |= !!ret;
12958                         if (ret)
12959                                 goto close_out;
12960                 }
12961
12962                 if (init_csum_tree) {
12963                         printf("Reinitialize checksum tree\n");
12964                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12965                         if (ret) {
12966                                 error("checksum tree initialization failed: %d",
12967                                                 ret);
12968                                 ret = -EIO;
12969                                 err |= !!ret;
12970                                 goto close_out;
12971                         }
12972
12973                         ret = fill_csum_tree(trans, info->csum_root,
12974                                              init_extent_tree);
12975                         err |= !!ret;
12976                         if (ret) {
12977                                 error("checksum tree refilling failed: %d", ret);
12978                                 return -EIO;
12979                         }
12980                 }
12981                 /*
12982                  * Ok now we commit and run the normal fsck, which will add
12983                  * extent entries for all of the items it finds.
12984                  */
12985                 ret = btrfs_commit_transaction(trans, info->extent_root);
12986                 err |= !!ret;
12987                 if (ret)
12988                         goto close_out;
12989         }
12990         if (!extent_buffer_uptodate(info->extent_root->node)) {
12991                 error("critical: extent_root, unable to check the filesystem");
12992                 ret = -EIO;
12993                 err |= !!ret;
12994                 goto close_out;
12995         }
12996         if (!extent_buffer_uptodate(info->csum_root->node)) {
12997                 error("critical: csum_root, unable to check the filesystem");
12998                 ret = -EIO;
12999                 err |= !!ret;
13000                 goto close_out;
13001         }
13002
13003         if (!ctx.progress_enabled)
13004                 fprintf(stderr, "checking extents\n");
13005         if (check_mode == CHECK_MODE_LOWMEM)
13006                 ret = check_chunks_and_extents_v2(root);
13007         else
13008                 ret = check_chunks_and_extents(root);
13009         err |= !!ret;
13010         if (ret)
13011                 error(
13012                 "errors found in extent allocation tree or chunk allocation");
13013
13014         ret = repair_root_items(info);
13015         err |= !!ret;
13016         if (ret < 0) {
13017                 error("failed to repair root items: %s", strerror(-ret));
13018                 goto close_out;
13019         }
13020         if (repair) {
13021                 fprintf(stderr, "Fixed %d roots.\n", ret);
13022                 ret = 0;
13023         } else if (ret > 0) {
13024                 fprintf(stderr,
13025                        "Found %d roots with an outdated root item.\n",
13026                        ret);
13027                 fprintf(stderr,
13028                         "Please run a filesystem check with the option --repair to fix them.\n");
13029                 ret = 1;
13030                 err |= !!ret;
13031                 goto close_out;
13032         }
13033
13034         if (!ctx.progress_enabled) {
13035                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13036                         fprintf(stderr, "checking free space tree\n");
13037                 else
13038                         fprintf(stderr, "checking free space cache\n");
13039         }
13040         ret = check_space_cache(root);
13041         err |= !!ret;
13042         if (ret) {
13043                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13044                         error("errors found in free space tree");
13045                 else
13046                         error("errors found in free space cache");
13047                 goto out;
13048         }
13049
13050         /*
13051          * We used to have to have these hole extents in between our real
13052          * extents so if we don't have this flag set we need to make sure there
13053          * are no gaps in the file extents for inodes, otherwise we can just
13054          * ignore it when this happens.
13055          */
13056         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13057         if (!ctx.progress_enabled)
13058                 fprintf(stderr, "checking fs roots\n");
13059         if (check_mode == CHECK_MODE_LOWMEM)
13060                 ret = check_fs_roots_v2(root->fs_info);
13061         else
13062                 ret = check_fs_roots(root, &root_cache);
13063         err |= !!ret;
13064         if (ret) {
13065                 error("errors found in fs roots");
13066                 goto out;
13067         }
13068
13069         fprintf(stderr, "checking csums\n");
13070         ret = check_csums(root);
13071         err |= !!ret;
13072         if (ret) {
13073                 error("errors found in csum tree");
13074                 goto out;
13075         }
13076
13077         fprintf(stderr, "checking root refs\n");
13078         /* For low memory mode, check_fs_roots_v2 handles root refs */
13079         if (check_mode != CHECK_MODE_LOWMEM) {
13080                 ret = check_root_refs(root, &root_cache);
13081                 err |= !!ret;
13082                 if (ret) {
13083                         error("errors found in root refs");
13084                         goto out;
13085                 }
13086         }
13087
13088         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13089                 struct extent_buffer *eb;
13090
13091                 eb = list_first_entry(&root->fs_info->recow_ebs,
13092                                       struct extent_buffer, recow);
13093                 list_del_init(&eb->recow);
13094                 ret = recow_extent_buffer(root, eb);
13095                 err |= !!ret;
13096                 if (ret) {
13097                         error("fails to fix transid errors");
13098                         break;
13099                 }
13100         }
13101
13102         while (!list_empty(&delete_items)) {
13103                 struct bad_item *bad;
13104
13105                 bad = list_first_entry(&delete_items, struct bad_item, list);
13106                 list_del_init(&bad->list);
13107                 if (repair) {
13108                         ret = delete_bad_item(root, bad);
13109                         err |= !!ret;
13110                 }
13111                 free(bad);
13112         }
13113
13114         if (info->quota_enabled) {
13115                 fprintf(stderr, "checking quota groups\n");
13116                 ret = qgroup_verify_all(info);
13117                 err |= !!ret;
13118                 if (ret) {
13119                         error("failed to check quota groups");
13120                         goto out;
13121                 }
13122                 report_qgroups(0);
13123                 ret = repair_qgroups(info, &qgroups_repaired);
13124                 err |= !!ret;
13125                 if (err) {
13126                         error("failed to repair quota groups");
13127                         goto out;
13128                 }
13129                 ret = 0;
13130         }
13131
13132         if (!list_empty(&root->fs_info->recow_ebs)) {
13133                 error("transid errors in file system");
13134                 ret = 1;
13135                 err |= !!ret;
13136         }
13137 out:
13138         if (found_old_backref) { /*
13139                  * there was a disk format change when mixed
13140                  * backref was in testing tree. The old format
13141                  * existed about one week.
13142                  */
13143                 printf("\n * Found old mixed backref format. "
13144                        "The old format is not supported! *"
13145                        "\n * Please mount the FS in readonly mode, "
13146                        "backup data and re-format the FS. *\n\n");
13147                 err |= 1;
13148         }
13149         printf("found %llu bytes used, ",
13150                (unsigned long long)bytes_used);
13151         if (err)
13152                 printf("error(s) found\n");
13153         else
13154                 printf("no error found\n");
13155         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13156         printf("total tree bytes: %llu\n",
13157                (unsigned long long)total_btree_bytes);
13158         printf("total fs tree bytes: %llu\n",
13159                (unsigned long long)total_fs_tree_bytes);
13160         printf("total extent tree bytes: %llu\n",
13161                (unsigned long long)total_extent_tree_bytes);
13162         printf("btree space waste bytes: %llu\n",
13163                (unsigned long long)btree_space_waste);
13164         printf("file data blocks allocated: %llu\n referenced %llu\n",
13165                 (unsigned long long)data_bytes_allocated,
13166                 (unsigned long long)data_bytes_referenced);
13167
13168         free_qgroup_counts();
13169         free_root_recs_tree(&root_cache);
13170 close_out:
13171         close_ctree(root);
13172 err_out:
13173         if (ctx.progress_enabled)
13174                 task_deinit(ctx.info);
13175
13176         return err;
13177 }