btrfs-progs: check: remove unused argument from calc_extent_flag
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 found;
4703         unsigned int extent_type;
4704         unsigned int is_hole;
4705         int ret;
4706         int err = 0;
4707
4708         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4709
4710         extent_type = btrfs_file_extent_type(node, fi);
4711         /* Skip if file extent is inline */
4712         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4713                 struct btrfs_item *e = btrfs_item_nr(slot);
4714                 u32 item_inline_len;
4715
4716                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4717                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4718                 if (extent_num_bytes == 0 ||
4719                     extent_num_bytes != item_inline_len)
4720                         err |= FILE_EXTENT_ERROR;
4721                 *size += extent_num_bytes;
4722                 return err;
4723         }
4724
4725         /* Check extent type */
4726         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4727                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4728                 err |= FILE_EXTENT_ERROR;
4729                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4730                       root->objectid, fkey->objectid, fkey->offset);
4731                 return err;
4732         }
4733
4734         /* Check REG_EXTENT/PREALLOC_EXTENT */
4735         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4736         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4737         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4738         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4739
4740         /* Check EXTENT_DATA datasum */
4741         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4742         if (found > 0 && nodatasum) {
4743                 err |= ODD_CSUM_ITEM;
4744                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4745                       root->objectid, fkey->objectid, fkey->offset);
4746         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4747                    !is_hole &&
4748                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4749                 err |= CSUM_ITEM_MISSING;
4750                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4753                 err |= ODD_CSUM_ITEM;
4754                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         }
4757
4758         /* Check EXTENT_DATA hole */
4759         if (no_holes && is_hole) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         } else if (!no_holes && *end != fkey->offset) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         }
4768
4769         *end += extent_num_bytes;
4770         if (!is_hole)
4771                 *size += extent_num_bytes;
4772
4773         return err;
4774 }
4775
4776 /*
4777  * Check INODE_ITEM and related ITEMs (the same inode number)
4778  * 1. check link count
4779  * 2. check inode ref/extref
4780  * 3. check dir item/index
4781  *
4782  * @ext_ref:    the EXTENDED_IREF feature
4783  *
4784  * Return 0 if no error occurred.
4785  * Return >0 for error or hit the traversal is done(by error bitmap)
4786  */
4787 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4788                             unsigned int ext_ref)
4789 {
4790         struct extent_buffer *node;
4791         struct btrfs_inode_item *ii;
4792         struct btrfs_key key;
4793         u64 inode_id;
4794         u32 mode;
4795         u64 nlink;
4796         u64 nbytes;
4797         u64 isize;
4798         u64 size = 0;
4799         u64 refs = 0;
4800         u64 extent_end = 0;
4801         u64 extent_size = 0;
4802         unsigned int dir;
4803         unsigned int nodatasum;
4804         int slot;
4805         int ret;
4806         int err = 0;
4807
4808         node = path->nodes[0];
4809         slot = path->slots[0];
4810
4811         btrfs_item_key_to_cpu(node, &key, slot);
4812         inode_id = key.objectid;
4813
4814         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4815                 ret = btrfs_next_item(root, path);
4816                 if (ret > 0)
4817                         err |= LAST_ITEM;
4818                 return err;
4819         }
4820
4821         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4822         isize = btrfs_inode_size(node, ii);
4823         nbytes = btrfs_inode_nbytes(node, ii);
4824         mode = btrfs_inode_mode(node, ii);
4825         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4826         nlink = btrfs_inode_nlink(node, ii);
4827         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4828
4829         while (1) {
4830                 ret = btrfs_next_item(root, path);
4831                 if (ret < 0) {
4832                         /* out will fill 'err' rusing current statistics */
4833                         goto out;
4834                 } else if (ret > 0) {
4835                         err |= LAST_ITEM;
4836                         goto out;
4837                 }
4838
4839                 node = path->nodes[0];
4840                 slot = path->slots[0];
4841                 btrfs_item_key_to_cpu(node, &key, slot);
4842                 if (key.objectid != inode_id)
4843                         goto out;
4844
4845                 switch (key.type) {
4846                 case BTRFS_INODE_REF_KEY:
4847                         ret = check_inode_ref(root, &key, node, slot, &refs,
4848                                               mode);
4849                         err |= ret;
4850                         break;
4851                 case BTRFS_INODE_EXTREF_KEY:
4852                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4853                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4854                                         root->objectid, key.objectid,
4855                                         key.offset);
4856                         ret = check_inode_extref(root, &key, node, slot, &refs,
4857                                                  mode);
4858                         err |= ret;
4859                         break;
4860                 case BTRFS_DIR_ITEM_KEY:
4861                 case BTRFS_DIR_INDEX_KEY:
4862                         if (!dir) {
4863                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4864                                         root->objectid, inode_id,
4865                                         imode_to_type(mode), key.objectid,
4866                                         key.offset);
4867                         }
4868                         ret = check_dir_item(root, &key, node, slot, &size,
4869                                              ext_ref);
4870                         err |= ret;
4871                         break;
4872                 case BTRFS_EXTENT_DATA_KEY:
4873                         if (dir) {
4874                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4875                                         root->objectid, inode_id, key.objectid,
4876                                         key.offset);
4877                         }
4878                         ret = check_file_extent(root, &key, node, slot,
4879                                                 nodatasum, &extent_size,
4880                                                 &extent_end);
4881                         err |= ret;
4882                         break;
4883                 case BTRFS_XATTR_ITEM_KEY:
4884                         break;
4885                 default:
4886                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4887                               key.objectid, key.type, key.offset);
4888                 }
4889         }
4890
4891 out:
4892         /* verify INODE_ITEM nlink/isize/nbytes */
4893         if (dir) {
4894                 if (nlink != 1) {
4895                         err |= LINK_COUNT_ERROR;
4896                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4897                               root->objectid, inode_id, nlink);
4898                 }
4899
4900                 /*
4901                  * Just a warning, as dir inode nbytes is just an
4902                  * instructive value.
4903                  */
4904                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4905                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4906                                 root->objectid, inode_id, root->nodesize);
4907                 }
4908
4909                 if (isize != size) {
4910                         err |= ISIZE_ERROR;
4911                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4912                               root->objectid, inode_id, isize, size);
4913                 }
4914         } else {
4915                 if (nlink != refs) {
4916                         err |= LINK_COUNT_ERROR;
4917                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4918                               root->objectid, inode_id, nlink, refs);
4919                 } else if (!nlink) {
4920                         err |= ORPHAN_ITEM;
4921                 }
4922
4923                 if (!nbytes && !no_holes && extent_end < isize) {
4924                         err |= NBYTES_ERROR;
4925                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4926                               root->objectid, inode_id, isize);
4927                 }
4928
4929                 if (nbytes != extent_size) {
4930                         err |= NBYTES_ERROR;
4931                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4932                               root->objectid, inode_id, nbytes, extent_size);
4933                 }
4934         }
4935
4936         return err;
4937 }
4938
4939 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4940 {
4941         struct btrfs_path path;
4942         struct btrfs_key key;
4943         int err = 0;
4944         int ret;
4945
4946         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4947         key.type = BTRFS_INODE_ITEM_KEY;
4948         key.offset = 0;
4949
4950         /* For root being dropped, we don't need to check first inode */
4951         if (btrfs_root_refs(&root->root_item) == 0 &&
4952             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4953             key.objectid)
4954                 return 0;
4955
4956         btrfs_init_path(&path);
4957
4958         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4959         if (ret < 0)
4960                 goto out;
4961         if (ret > 0) {
4962                 ret = 0;
4963                 err |= INODE_ITEM_MISSING;
4964         }
4965
4966         err |= check_inode_item(root, &path, ext_ref);
4967         err &= ~LAST_ITEM;
4968         if (err && !ret)
4969                 ret = -EIO;
4970 out:
4971         btrfs_release_path(&path);
4972         return ret;
4973 }
4974
4975 /*
4976  * Iterate all item on the tree and call check_inode_item() to check.
4977  *
4978  * @root:       the root of the tree to be checked.
4979  * @ext_ref:    the EXTENDED_IREF feature
4980  *
4981  * Return 0 if no error found.
4982  * Return <0 for error.
4983  */
4984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4985 {
4986         struct btrfs_path path;
4987         struct node_refs nrefs;
4988         struct btrfs_root_item *root_item = &root->root_item;
4989         int ret, wret;
4990         int level;
4991
4992         /*
4993          * We need to manually check the first inode item(256)
4994          * As the following traversal function will only start from
4995          * the first inode item in the leaf, if inode item(256) is missing
4996          * we will just skip it forever.
4997          */
4998         ret = check_fs_first_inode(root, ext_ref);
4999         if (ret < 0)
5000                 return ret;
5001
5002         memset(&nrefs, 0, sizeof(nrefs));
5003         level = btrfs_header_level(root->node);
5004         btrfs_init_path(&path);
5005
5006         if (btrfs_root_refs(root_item) > 0 ||
5007             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5008                 path.nodes[level] = root->node;
5009                 path.slots[level] = 0;
5010                 extent_buffer_get(root->node);
5011         } else {
5012                 struct btrfs_key key;
5013
5014                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5015                 level = root_item->drop_level;
5016                 path.lowest_level = level;
5017                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5018                 if (ret < 0)
5019                         goto out;
5020                 ret = 0;
5021         }
5022
5023         while (1) {
5024                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5025                 if (wret < 0)
5026                         ret = wret;
5027                 if (wret != 0)
5028                         break;
5029
5030                 wret = walk_up_tree_v2(root, &path, &level);
5031                 if (wret < 0)
5032                         ret = wret;
5033                 if (wret != 0)
5034                         break;
5035         }
5036
5037 out:
5038         btrfs_release_path(&path);
5039         return ret;
5040 }
5041
5042 /*
5043  * Find the relative ref for root_ref and root_backref.
5044  *
5045  * @root:       the root of the root tree.
5046  * @ref_key:    the key of the root ref.
5047  *
5048  * Return 0 if no error occurred.
5049  */
5050 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5051                           struct extent_buffer *node, int slot)
5052 {
5053         struct btrfs_path path;
5054         struct btrfs_key key;
5055         struct btrfs_root_ref *ref;
5056         struct btrfs_root_ref *backref;
5057         char ref_name[BTRFS_NAME_LEN] = {0};
5058         char backref_name[BTRFS_NAME_LEN] = {0};
5059         u64 ref_dirid;
5060         u64 ref_seq;
5061         u32 ref_namelen;
5062         u64 backref_dirid;
5063         u64 backref_seq;
5064         u32 backref_namelen;
5065         u32 len;
5066         int ret;
5067         int err = 0;
5068
5069         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5070         ref_dirid = btrfs_root_ref_dirid(node, ref);
5071         ref_seq = btrfs_root_ref_sequence(node, ref);
5072         ref_namelen = btrfs_root_ref_name_len(node, ref);
5073
5074         if (ref_namelen <= BTRFS_NAME_LEN) {
5075                 len = ref_namelen;
5076         } else {
5077                 len = BTRFS_NAME_LEN;
5078                 warning("%s[%llu %llu] ref_name too long",
5079                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5080                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5081                         ref_key->offset);
5082         }
5083         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5084
5085         /* Find relative root_ref */
5086         key.objectid = ref_key->offset;
5087         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5088         key.offset = ref_key->objectid;
5089
5090         btrfs_init_path(&path);
5091         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5092         if (ret) {
5093                 err |= ROOT_REF_MISSING;
5094                 error("%s[%llu %llu] couldn't find relative ref",
5095                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5096                       "ROOT_REF" : "ROOT_BACKREF",
5097                       ref_key->objectid, ref_key->offset);
5098                 goto out;
5099         }
5100
5101         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5102                                  struct btrfs_root_ref);
5103         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5104         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5105         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5106
5107         if (backref_namelen <= BTRFS_NAME_LEN) {
5108                 len = backref_namelen;
5109         } else {
5110                 len = BTRFS_NAME_LEN;
5111                 warning("%s[%llu %llu] ref_name too long",
5112                         key.type == BTRFS_ROOT_REF_KEY ?
5113                         "ROOT_REF" : "ROOT_BACKREF",
5114                         key.objectid, key.offset);
5115         }
5116         read_extent_buffer(path.nodes[0], backref_name,
5117                            (unsigned long)(backref + 1), len);
5118
5119         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5120             ref_namelen != backref_namelen ||
5121             strncmp(ref_name, backref_name, len)) {
5122                 err |= ROOT_REF_MISMATCH;
5123                 error("%s[%llu %llu] mismatch relative ref",
5124                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5125                       "ROOT_REF" : "ROOT_BACKREF",
5126                       ref_key->objectid, ref_key->offset);
5127         }
5128 out:
5129         btrfs_release_path(&path);
5130         return err;
5131 }
5132
5133 /*
5134  * Check all fs/file tree in low_memory mode.
5135  *
5136  * 1. for fs tree root item, call check_fs_root_v2()
5137  * 2. for fs tree root ref/backref, call check_root_ref()
5138  *
5139  * Return 0 if no error occurred.
5140  */
5141 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5142 {
5143         struct btrfs_root *tree_root = fs_info->tree_root;
5144         struct btrfs_root *cur_root = NULL;
5145         struct btrfs_path path;
5146         struct btrfs_key key;
5147         struct extent_buffer *node;
5148         unsigned int ext_ref;
5149         int slot;
5150         int ret;
5151         int err = 0;
5152
5153         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5154
5155         btrfs_init_path(&path);
5156         key.objectid = BTRFS_FS_TREE_OBJECTID;
5157         key.offset = 0;
5158         key.type = BTRFS_ROOT_ITEM_KEY;
5159
5160         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5161         if (ret < 0) {
5162                 err = ret;
5163                 goto out;
5164         } else if (ret > 0) {
5165                 err = -ENOENT;
5166                 goto out;
5167         }
5168
5169         while (1) {
5170                 node = path.nodes[0];
5171                 slot = path.slots[0];
5172                 btrfs_item_key_to_cpu(node, &key, slot);
5173                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5174                         goto out;
5175                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5176                     fs_root_objectid(key.objectid)) {
5177                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5178                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5179                                                                        &key);
5180                         } else {
5181                                 key.offset = (u64)-1;
5182                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5183                         }
5184
5185                         if (IS_ERR(cur_root)) {
5186                                 error("Fail to read fs/subvol tree: %lld",
5187                                       key.objectid);
5188                                 err = -EIO;
5189                                 goto next;
5190                         }
5191
5192                         ret = check_fs_root_v2(cur_root, ext_ref);
5193                         err |= ret;
5194
5195                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5196                                 btrfs_free_fs_root(cur_root);
5197                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5198                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5199                         ret = check_root_ref(tree_root, &key, node, slot);
5200                         err |= ret;
5201                 }
5202 next:
5203                 ret = btrfs_next_item(tree_root, &path);
5204                 if (ret > 0)
5205                         goto out;
5206                 if (ret < 0) {
5207                         err = ret;
5208                         goto out;
5209                 }
5210         }
5211
5212 out:
5213         btrfs_release_path(&path);
5214         return err;
5215 }
5216
5217 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5218 {
5219         struct list_head *cur = rec->backrefs.next;
5220         struct extent_backref *back;
5221         struct tree_backref *tback;
5222         struct data_backref *dback;
5223         u64 found = 0;
5224         int err = 0;
5225
5226         while(cur != &rec->backrefs) {
5227                 back = to_extent_backref(cur);
5228                 cur = cur->next;
5229                 if (!back->found_extent_tree) {
5230                         err = 1;
5231                         if (!print_errs)
5232                                 goto out;
5233                         if (back->is_data) {
5234                                 dback = to_data_backref(back);
5235                                 fprintf(stderr, "Backref %llu %s %llu"
5236                                         " owner %llu offset %llu num_refs %lu"
5237                                         " not found in extent tree\n",
5238                                         (unsigned long long)rec->start,
5239                                         back->full_backref ?
5240                                         "parent" : "root",
5241                                         back->full_backref ?
5242                                         (unsigned long long)dback->parent:
5243                                         (unsigned long long)dback->root,
5244                                         (unsigned long long)dback->owner,
5245                                         (unsigned long long)dback->offset,
5246                                         (unsigned long)dback->num_refs);
5247                         } else {
5248                                 tback = to_tree_backref(back);
5249                                 fprintf(stderr, "Backref %llu parent %llu"
5250                                         " root %llu not found in extent tree\n",
5251                                         (unsigned long long)rec->start,
5252                                         (unsigned long long)tback->parent,
5253                                         (unsigned long long)tback->root);
5254                         }
5255                 }
5256                 if (!back->is_data && !back->found_ref) {
5257                         err = 1;
5258                         if (!print_errs)
5259                                 goto out;
5260                         tback = to_tree_backref(back);
5261                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5262                                 (unsigned long long)rec->start,
5263                                 back->full_backref ? "parent" : "root",
5264                                 back->full_backref ?
5265                                 (unsigned long long)tback->parent :
5266                                 (unsigned long long)tback->root, back);
5267                 }
5268                 if (back->is_data) {
5269                         dback = to_data_backref(back);
5270                         if (dback->found_ref != dback->num_refs) {
5271                                 err = 1;
5272                                 if (!print_errs)
5273                                         goto out;
5274                                 fprintf(stderr, "Incorrect local backref count"
5275                                         " on %llu %s %llu owner %llu"
5276                                         " offset %llu found %u wanted %u back %p\n",
5277                                         (unsigned long long)rec->start,
5278                                         back->full_backref ?
5279                                         "parent" : "root",
5280                                         back->full_backref ?
5281                                         (unsigned long long)dback->parent:
5282                                         (unsigned long long)dback->root,
5283                                         (unsigned long long)dback->owner,
5284                                         (unsigned long long)dback->offset,
5285                                         dback->found_ref, dback->num_refs, back);
5286                         }
5287                         if (dback->disk_bytenr != rec->start) {
5288                                 err = 1;
5289                                 if (!print_errs)
5290                                         goto out;
5291                                 fprintf(stderr, "Backref disk bytenr does not"
5292                                         " match extent record, bytenr=%llu, "
5293                                         "ref bytenr=%llu\n",
5294                                         (unsigned long long)rec->start,
5295                                         (unsigned long long)dback->disk_bytenr);
5296                         }
5297
5298                         if (dback->bytes != rec->nr) {
5299                                 err = 1;
5300                                 if (!print_errs)
5301                                         goto out;
5302                                 fprintf(stderr, "Backref bytes do not match "
5303                                         "extent backref, bytenr=%llu, ref "
5304                                         "bytes=%llu, backref bytes=%llu\n",
5305                                         (unsigned long long)rec->start,
5306                                         (unsigned long long)rec->nr,
5307                                         (unsigned long long)dback->bytes);
5308                         }
5309                 }
5310                 if (!back->is_data) {
5311                         found += 1;
5312                 } else {
5313                         dback = to_data_backref(back);
5314                         found += dback->found_ref;
5315                 }
5316         }
5317         if (found != rec->refs) {
5318                 err = 1;
5319                 if (!print_errs)
5320                         goto out;
5321                 fprintf(stderr, "Incorrect global backref count "
5322                         "on %llu found %llu wanted %llu\n",
5323                         (unsigned long long)rec->start,
5324                         (unsigned long long)found,
5325                         (unsigned long long)rec->refs);
5326         }
5327 out:
5328         return err;
5329 }
5330
5331 static int free_all_extent_backrefs(struct extent_record *rec)
5332 {
5333         struct extent_backref *back;
5334         struct list_head *cur;
5335         while (!list_empty(&rec->backrefs)) {
5336                 cur = rec->backrefs.next;
5337                 back = to_extent_backref(cur);
5338                 list_del(cur);
5339                 free(back);
5340         }
5341         return 0;
5342 }
5343
5344 static void free_extent_record_cache(struct cache_tree *extent_cache)
5345 {
5346         struct cache_extent *cache;
5347         struct extent_record *rec;
5348
5349         while (1) {
5350                 cache = first_cache_extent(extent_cache);
5351                 if (!cache)
5352                         break;
5353                 rec = container_of(cache, struct extent_record, cache);
5354                 remove_cache_extent(extent_cache, cache);
5355                 free_all_extent_backrefs(rec);
5356                 free(rec);
5357         }
5358 }
5359
5360 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5361                                  struct extent_record *rec)
5362 {
5363         if (rec->content_checked && rec->owner_ref_checked &&
5364             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5365             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5366             !rec->bad_full_backref && !rec->crossing_stripes &&
5367             !rec->wrong_chunk_type) {
5368                 remove_cache_extent(extent_cache, &rec->cache);
5369                 free_all_extent_backrefs(rec);
5370                 list_del_init(&rec->list);
5371                 free(rec);
5372         }
5373         return 0;
5374 }
5375
5376 static int check_owner_ref(struct btrfs_root *root,
5377                             struct extent_record *rec,
5378                             struct extent_buffer *buf)
5379 {
5380         struct extent_backref *node;
5381         struct tree_backref *back;
5382         struct btrfs_root *ref_root;
5383         struct btrfs_key key;
5384         struct btrfs_path path;
5385         struct extent_buffer *parent;
5386         int level;
5387         int found = 0;
5388         int ret;
5389
5390         list_for_each_entry(node, &rec->backrefs, list) {
5391                 if (node->is_data)
5392                         continue;
5393                 if (!node->found_ref)
5394                         continue;
5395                 if (node->full_backref)
5396                         continue;
5397                 back = to_tree_backref(node);
5398                 if (btrfs_header_owner(buf) == back->root)
5399                         return 0;
5400         }
5401         BUG_ON(rec->is_root);
5402
5403         /* try to find the block by search corresponding fs tree */
5404         key.objectid = btrfs_header_owner(buf);
5405         key.type = BTRFS_ROOT_ITEM_KEY;
5406         key.offset = (u64)-1;
5407
5408         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5409         if (IS_ERR(ref_root))
5410                 return 1;
5411
5412         level = btrfs_header_level(buf);
5413         if (level == 0)
5414                 btrfs_item_key_to_cpu(buf, &key, 0);
5415         else
5416                 btrfs_node_key_to_cpu(buf, &key, 0);
5417
5418         btrfs_init_path(&path);
5419         path.lowest_level = level + 1;
5420         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5421         if (ret < 0)
5422                 return 0;
5423
5424         parent = path.nodes[level + 1];
5425         if (parent && buf->start == btrfs_node_blockptr(parent,
5426                                                         path.slots[level + 1]))
5427                 found = 1;
5428
5429         btrfs_release_path(&path);
5430         return found ? 0 : 1;
5431 }
5432
5433 static int is_extent_tree_record(struct extent_record *rec)
5434 {
5435         struct list_head *cur = rec->backrefs.next;
5436         struct extent_backref *node;
5437         struct tree_backref *back;
5438         int is_extent = 0;
5439
5440         while(cur != &rec->backrefs) {
5441                 node = to_extent_backref(cur);
5442                 cur = cur->next;
5443                 if (node->is_data)
5444                         return 0;
5445                 back = to_tree_backref(node);
5446                 if (node->full_backref)
5447                         return 0;
5448                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5449                         is_extent = 1;
5450         }
5451         return is_extent;
5452 }
5453
5454
5455 static int record_bad_block_io(struct btrfs_fs_info *info,
5456                                struct cache_tree *extent_cache,
5457                                u64 start, u64 len)
5458 {
5459         struct extent_record *rec;
5460         struct cache_extent *cache;
5461         struct btrfs_key key;
5462
5463         cache = lookup_cache_extent(extent_cache, start, len);
5464         if (!cache)
5465                 return 0;
5466
5467         rec = container_of(cache, struct extent_record, cache);
5468         if (!is_extent_tree_record(rec))
5469                 return 0;
5470
5471         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5472         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5473 }
5474
5475 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5476                        struct extent_buffer *buf, int slot)
5477 {
5478         if (btrfs_header_level(buf)) {
5479                 struct btrfs_key_ptr ptr1, ptr2;
5480
5481                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5482                                    sizeof(struct btrfs_key_ptr));
5483                 read_extent_buffer(buf, &ptr2,
5484                                    btrfs_node_key_ptr_offset(slot + 1),
5485                                    sizeof(struct btrfs_key_ptr));
5486                 write_extent_buffer(buf, &ptr1,
5487                                     btrfs_node_key_ptr_offset(slot + 1),
5488                                     sizeof(struct btrfs_key_ptr));
5489                 write_extent_buffer(buf, &ptr2,
5490                                     btrfs_node_key_ptr_offset(slot),
5491                                     sizeof(struct btrfs_key_ptr));
5492                 if (slot == 0) {
5493                         struct btrfs_disk_key key;
5494                         btrfs_node_key(buf, &key, 0);
5495                         btrfs_fixup_low_keys(root, path, &key,
5496                                              btrfs_header_level(buf) + 1);
5497                 }
5498         } else {
5499                 struct btrfs_item *item1, *item2;
5500                 struct btrfs_key k1, k2;
5501                 char *item1_data, *item2_data;
5502                 u32 item1_offset, item2_offset, item1_size, item2_size;
5503
5504                 item1 = btrfs_item_nr(slot);
5505                 item2 = btrfs_item_nr(slot + 1);
5506                 btrfs_item_key_to_cpu(buf, &k1, slot);
5507                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5508                 item1_offset = btrfs_item_offset(buf, item1);
5509                 item2_offset = btrfs_item_offset(buf, item2);
5510                 item1_size = btrfs_item_size(buf, item1);
5511                 item2_size = btrfs_item_size(buf, item2);
5512
5513                 item1_data = malloc(item1_size);
5514                 if (!item1_data)
5515                         return -ENOMEM;
5516                 item2_data = malloc(item2_size);
5517                 if (!item2_data) {
5518                         free(item1_data);
5519                         return -ENOMEM;
5520                 }
5521
5522                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5523                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5524
5525                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5526                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5527                 free(item1_data);
5528                 free(item2_data);
5529
5530                 btrfs_set_item_offset(buf, item1, item2_offset);
5531                 btrfs_set_item_offset(buf, item2, item1_offset);
5532                 btrfs_set_item_size(buf, item1, item2_size);
5533                 btrfs_set_item_size(buf, item2, item1_size);
5534
5535                 path->slots[0] = slot;
5536                 btrfs_set_item_key_unsafe(root, path, &k2);
5537                 path->slots[0] = slot + 1;
5538                 btrfs_set_item_key_unsafe(root, path, &k1);
5539         }
5540         return 0;
5541 }
5542
5543 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5544 {
5545         struct extent_buffer *buf;
5546         struct btrfs_key k1, k2;
5547         int i;
5548         int level = path->lowest_level;
5549         int ret = -EIO;
5550
5551         buf = path->nodes[level];
5552         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5553                 if (level) {
5554                         btrfs_node_key_to_cpu(buf, &k1, i);
5555                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5556                 } else {
5557                         btrfs_item_key_to_cpu(buf, &k1, i);
5558                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5559                 }
5560                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5561                         continue;
5562                 ret = swap_values(root, path, buf, i);
5563                 if (ret)
5564                         break;
5565                 btrfs_mark_buffer_dirty(buf);
5566                 i = 0;
5567         }
5568         return ret;
5569 }
5570
5571 static int delete_bogus_item(struct btrfs_root *root,
5572                              struct btrfs_path *path,
5573                              struct extent_buffer *buf, int slot)
5574 {
5575         struct btrfs_key key;
5576         int nritems = btrfs_header_nritems(buf);
5577
5578         btrfs_item_key_to_cpu(buf, &key, slot);
5579
5580         /* These are all the keys we can deal with missing. */
5581         if (key.type != BTRFS_DIR_INDEX_KEY &&
5582             key.type != BTRFS_EXTENT_ITEM_KEY &&
5583             key.type != BTRFS_METADATA_ITEM_KEY &&
5584             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5585             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5586                 return -1;
5587
5588         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5589                (unsigned long long)key.objectid, key.type,
5590                (unsigned long long)key.offset, slot, buf->start);
5591         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5592                               btrfs_item_nr_offset(slot + 1),
5593                               sizeof(struct btrfs_item) *
5594                               (nritems - slot - 1));
5595         btrfs_set_header_nritems(buf, nritems - 1);
5596         if (slot == 0) {
5597                 struct btrfs_disk_key disk_key;
5598
5599                 btrfs_item_key(buf, &disk_key, 0);
5600                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5601         }
5602         btrfs_mark_buffer_dirty(buf);
5603         return 0;
5604 }
5605
5606 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5607 {
5608         struct extent_buffer *buf;
5609         int i;
5610         int ret = 0;
5611
5612         /* We should only get this for leaves */
5613         BUG_ON(path->lowest_level);
5614         buf = path->nodes[0];
5615 again:
5616         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5617                 unsigned int shift = 0, offset;
5618
5619                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5620                     BTRFS_LEAF_DATA_SIZE(root)) {
5621                         if (btrfs_item_end_nr(buf, i) >
5622                             BTRFS_LEAF_DATA_SIZE(root)) {
5623                                 ret = delete_bogus_item(root, path, buf, i);
5624                                 if (!ret)
5625                                         goto again;
5626                                 fprintf(stderr, "item is off the end of the "
5627                                         "leaf, can't fix\n");
5628                                 ret = -EIO;
5629                                 break;
5630                         }
5631                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5632                                 btrfs_item_end_nr(buf, i);
5633                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5634                            btrfs_item_offset_nr(buf, i - 1)) {
5635                         if (btrfs_item_end_nr(buf, i) >
5636                             btrfs_item_offset_nr(buf, i - 1)) {
5637                                 ret = delete_bogus_item(root, path, buf, i);
5638                                 if (!ret)
5639                                         goto again;
5640                                 fprintf(stderr, "items overlap, can't fix\n");
5641                                 ret = -EIO;
5642                                 break;
5643                         }
5644                         shift = btrfs_item_offset_nr(buf, i - 1) -
5645                                 btrfs_item_end_nr(buf, i);
5646                 }
5647                 if (!shift)
5648                         continue;
5649
5650                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5651                        i, shift, (unsigned long long)buf->start);
5652                 offset = btrfs_item_offset_nr(buf, i);
5653                 memmove_extent_buffer(buf,
5654                                       btrfs_leaf_data(buf) + offset + shift,
5655                                       btrfs_leaf_data(buf) + offset,
5656                                       btrfs_item_size_nr(buf, i));
5657                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5658                                       offset + shift);
5659                 btrfs_mark_buffer_dirty(buf);
5660         }
5661
5662         /*
5663          * We may have moved things, in which case we want to exit so we don't
5664          * write those changes out.  Once we have proper abort functionality in
5665          * progs this can be changed to something nicer.
5666          */
5667         BUG_ON(ret);
5668         return ret;
5669 }
5670
5671 /*
5672  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5673  * then just return -EIO.
5674  */
5675 static int try_to_fix_bad_block(struct btrfs_root *root,
5676                                 struct extent_buffer *buf,
5677                                 enum btrfs_tree_block_status status)
5678 {
5679         struct btrfs_trans_handle *trans;
5680         struct ulist *roots;
5681         struct ulist_node *node;
5682         struct btrfs_root *search_root;
5683         struct btrfs_path path;
5684         struct ulist_iterator iter;
5685         struct btrfs_key root_key, key;
5686         int ret;
5687
5688         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5689             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5690                 return -EIO;
5691
5692         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5693         if (ret)
5694                 return -EIO;
5695
5696         btrfs_init_path(&path);
5697         ULIST_ITER_INIT(&iter);
5698         while ((node = ulist_next(roots, &iter))) {
5699                 root_key.objectid = node->val;
5700                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5701                 root_key.offset = (u64)-1;
5702
5703                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5704                 if (IS_ERR(root)) {
5705                         ret = -EIO;
5706                         break;
5707                 }
5708
5709
5710                 trans = btrfs_start_transaction(search_root, 0);
5711                 if (IS_ERR(trans)) {
5712                         ret = PTR_ERR(trans);
5713                         break;
5714                 }
5715
5716                 path.lowest_level = btrfs_header_level(buf);
5717                 path.skip_check_block = 1;
5718                 if (path.lowest_level)
5719                         btrfs_node_key_to_cpu(buf, &key, 0);
5720                 else
5721                         btrfs_item_key_to_cpu(buf, &key, 0);
5722                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5723                 if (ret) {
5724                         ret = -EIO;
5725                         btrfs_commit_transaction(trans, search_root);
5726                         break;
5727                 }
5728                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5729                         ret = fix_key_order(search_root, &path);
5730                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5731                         ret = fix_item_offset(search_root, &path);
5732                 if (ret) {
5733                         btrfs_commit_transaction(trans, search_root);
5734                         break;
5735                 }
5736                 btrfs_release_path(&path);
5737                 btrfs_commit_transaction(trans, search_root);
5738         }
5739         ulist_free(roots);
5740         btrfs_release_path(&path);
5741         return ret;
5742 }
5743
5744 static int check_block(struct btrfs_root *root,
5745                        struct cache_tree *extent_cache,
5746                        struct extent_buffer *buf, u64 flags)
5747 {
5748         struct extent_record *rec;
5749         struct cache_extent *cache;
5750         struct btrfs_key key;
5751         enum btrfs_tree_block_status status;
5752         int ret = 0;
5753         int level;
5754
5755         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5756         if (!cache)
5757                 return 1;
5758         rec = container_of(cache, struct extent_record, cache);
5759         rec->generation = btrfs_header_generation(buf);
5760
5761         level = btrfs_header_level(buf);
5762         if (btrfs_header_nritems(buf) > 0) {
5763
5764                 if (level == 0)
5765                         btrfs_item_key_to_cpu(buf, &key, 0);
5766                 else
5767                         btrfs_node_key_to_cpu(buf, &key, 0);
5768
5769                 rec->info_objectid = key.objectid;
5770         }
5771         rec->info_level = level;
5772
5773         if (btrfs_is_leaf(buf))
5774                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5775         else
5776                 status = btrfs_check_node(root, &rec->parent_key, buf);
5777
5778         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5779                 if (repair)
5780                         status = try_to_fix_bad_block(root, buf, status);
5781                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5782                         ret = -EIO;
5783                         fprintf(stderr, "bad block %llu\n",
5784                                 (unsigned long long)buf->start);
5785                 } else {
5786                         /*
5787                          * Signal to callers we need to start the scan over
5788                          * again since we'll have cowed blocks.
5789                          */
5790                         ret = -EAGAIN;
5791                 }
5792         } else {
5793                 rec->content_checked = 1;
5794                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5795                         rec->owner_ref_checked = 1;
5796                 else {
5797                         ret = check_owner_ref(root, rec, buf);
5798                         if (!ret)
5799                                 rec->owner_ref_checked = 1;
5800                 }
5801         }
5802         if (!ret)
5803                 maybe_free_extent_rec(extent_cache, rec);
5804         return ret;
5805 }
5806
5807 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5808                                                 u64 parent, u64 root)
5809 {
5810         struct list_head *cur = rec->backrefs.next;
5811         struct extent_backref *node;
5812         struct tree_backref *back;
5813
5814         while(cur != &rec->backrefs) {
5815                 node = to_extent_backref(cur);
5816                 cur = cur->next;
5817                 if (node->is_data)
5818                         continue;
5819                 back = to_tree_backref(node);
5820                 if (parent > 0) {
5821                         if (!node->full_backref)
5822                                 continue;
5823                         if (parent == back->parent)
5824                                 return back;
5825                 } else {
5826                         if (node->full_backref)
5827                                 continue;
5828                         if (back->root == root)
5829                                 return back;
5830                 }
5831         }
5832         return NULL;
5833 }
5834
5835 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5836                                                 u64 parent, u64 root)
5837 {
5838         struct tree_backref *ref = malloc(sizeof(*ref));
5839
5840         if (!ref)
5841                 return NULL;
5842         memset(&ref->node, 0, sizeof(ref->node));
5843         if (parent > 0) {
5844                 ref->parent = parent;
5845                 ref->node.full_backref = 1;
5846         } else {
5847                 ref->root = root;
5848                 ref->node.full_backref = 0;
5849         }
5850         list_add_tail(&ref->node.list, &rec->backrefs);
5851
5852         return ref;
5853 }
5854
5855 static struct data_backref *find_data_backref(struct extent_record *rec,
5856                                                 u64 parent, u64 root,
5857                                                 u64 owner, u64 offset,
5858                                                 int found_ref,
5859                                                 u64 disk_bytenr, u64 bytes)
5860 {
5861         struct list_head *cur = rec->backrefs.next;
5862         struct extent_backref *node;
5863         struct data_backref *back;
5864
5865         while(cur != &rec->backrefs) {
5866                 node = to_extent_backref(cur);
5867                 cur = cur->next;
5868                 if (!node->is_data)
5869                         continue;
5870                 back = to_data_backref(node);
5871                 if (parent > 0) {
5872                         if (!node->full_backref)
5873                                 continue;
5874                         if (parent == back->parent)
5875                                 return back;
5876                 } else {
5877                         if (node->full_backref)
5878                                 continue;
5879                         if (back->root == root && back->owner == owner &&
5880                             back->offset == offset) {
5881                                 if (found_ref && node->found_ref &&
5882                                     (back->bytes != bytes ||
5883                                     back->disk_bytenr != disk_bytenr))
5884                                         continue;
5885                                 return back;
5886                         }
5887                 }
5888         }
5889         return NULL;
5890 }
5891
5892 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5893                                                 u64 parent, u64 root,
5894                                                 u64 owner, u64 offset,
5895                                                 u64 max_size)
5896 {
5897         struct data_backref *ref = malloc(sizeof(*ref));
5898
5899         if (!ref)
5900                 return NULL;
5901         memset(&ref->node, 0, sizeof(ref->node));
5902         ref->node.is_data = 1;
5903
5904         if (parent > 0) {
5905                 ref->parent = parent;
5906                 ref->owner = 0;
5907                 ref->offset = 0;
5908                 ref->node.full_backref = 1;
5909         } else {
5910                 ref->root = root;
5911                 ref->owner = owner;
5912                 ref->offset = offset;
5913                 ref->node.full_backref = 0;
5914         }
5915         ref->bytes = max_size;
5916         ref->found_ref = 0;
5917         ref->num_refs = 0;
5918         list_add_tail(&ref->node.list, &rec->backrefs);
5919         if (max_size > rec->max_size)
5920                 rec->max_size = max_size;
5921         return ref;
5922 }
5923
5924 /* Check if the type of extent matches with its chunk */
5925 static void check_extent_type(struct extent_record *rec)
5926 {
5927         struct btrfs_block_group_cache *bg_cache;
5928
5929         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5930         if (!bg_cache)
5931                 return;
5932
5933         /* data extent, check chunk directly*/
5934         if (!rec->metadata) {
5935                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5936                         rec->wrong_chunk_type = 1;
5937                 return;
5938         }
5939
5940         /* metadata extent, check the obvious case first */
5941         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5942                                  BTRFS_BLOCK_GROUP_METADATA))) {
5943                 rec->wrong_chunk_type = 1;
5944                 return;
5945         }
5946
5947         /*
5948          * Check SYSTEM extent, as it's also marked as metadata, we can only
5949          * make sure it's a SYSTEM extent by its backref
5950          */
5951         if (!list_empty(&rec->backrefs)) {
5952                 struct extent_backref *node;
5953                 struct tree_backref *tback;
5954                 u64 bg_type;
5955
5956                 node = to_extent_backref(rec->backrefs.next);
5957                 if (node->is_data) {
5958                         /* tree block shouldn't have data backref */
5959                         rec->wrong_chunk_type = 1;
5960                         return;
5961                 }
5962                 tback = container_of(node, struct tree_backref, node);
5963
5964                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5965                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5966                 else
5967                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5968                 if (!(bg_cache->flags & bg_type))
5969                         rec->wrong_chunk_type = 1;
5970         }
5971 }
5972
5973 /*
5974  * Allocate a new extent record, fill default values from @tmpl and insert int
5975  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5976  * the cache, otherwise it fails.
5977  */
5978 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5979                 struct extent_record *tmpl)
5980 {
5981         struct extent_record *rec;
5982         int ret = 0;
5983
5984         rec = malloc(sizeof(*rec));
5985         if (!rec)
5986                 return -ENOMEM;
5987         rec->start = tmpl->start;
5988         rec->max_size = tmpl->max_size;
5989         rec->nr = max(tmpl->nr, tmpl->max_size);
5990         rec->found_rec = tmpl->found_rec;
5991         rec->content_checked = tmpl->content_checked;
5992         rec->owner_ref_checked = tmpl->owner_ref_checked;
5993         rec->num_duplicates = 0;
5994         rec->metadata = tmpl->metadata;
5995         rec->flag_block_full_backref = FLAG_UNSET;
5996         rec->bad_full_backref = 0;
5997         rec->crossing_stripes = 0;
5998         rec->wrong_chunk_type = 0;
5999         rec->is_root = tmpl->is_root;
6000         rec->refs = tmpl->refs;
6001         rec->extent_item_refs = tmpl->extent_item_refs;
6002         rec->parent_generation = tmpl->parent_generation;
6003         INIT_LIST_HEAD(&rec->backrefs);
6004         INIT_LIST_HEAD(&rec->dups);
6005         INIT_LIST_HEAD(&rec->list);
6006         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6007         rec->cache.start = tmpl->start;
6008         rec->cache.size = tmpl->nr;
6009         ret = insert_cache_extent(extent_cache, &rec->cache);
6010         if (ret) {
6011                 free(rec);
6012                 return ret;
6013         }
6014         bytes_used += rec->nr;
6015
6016         if (tmpl->metadata)
6017                 rec->crossing_stripes = check_crossing_stripes(global_info,
6018                                 rec->start, global_info->tree_root->nodesize);
6019         check_extent_type(rec);
6020         return ret;
6021 }
6022
6023 /*
6024  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6025  * some are hints:
6026  * - refs              - if found, increase refs
6027  * - is_root           - if found, set
6028  * - content_checked   - if found, set
6029  * - owner_ref_checked - if found, set
6030  *
6031  * If not found, create a new one, initialize and insert.
6032  */
6033 static int add_extent_rec(struct cache_tree *extent_cache,
6034                 struct extent_record *tmpl)
6035 {
6036         struct extent_record *rec;
6037         struct cache_extent *cache;
6038         int ret = 0;
6039         int dup = 0;
6040
6041         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6042         if (cache) {
6043                 rec = container_of(cache, struct extent_record, cache);
6044                 if (tmpl->refs)
6045                         rec->refs++;
6046                 if (rec->nr == 1)
6047                         rec->nr = max(tmpl->nr, tmpl->max_size);
6048
6049                 /*
6050                  * We need to make sure to reset nr to whatever the extent
6051                  * record says was the real size, this way we can compare it to
6052                  * the backrefs.
6053                  */
6054                 if (tmpl->found_rec) {
6055                         if (tmpl->start != rec->start || rec->found_rec) {
6056                                 struct extent_record *tmp;
6057
6058                                 dup = 1;
6059                                 if (list_empty(&rec->list))
6060                                         list_add_tail(&rec->list,
6061                                                       &duplicate_extents);
6062
6063                                 /*
6064                                  * We have to do this song and dance in case we
6065                                  * find an extent record that falls inside of
6066                                  * our current extent record but does not have
6067                                  * the same objectid.
6068                                  */
6069                                 tmp = malloc(sizeof(*tmp));
6070                                 if (!tmp)
6071                                         return -ENOMEM;
6072                                 tmp->start = tmpl->start;
6073                                 tmp->max_size = tmpl->max_size;
6074                                 tmp->nr = tmpl->nr;
6075                                 tmp->found_rec = 1;
6076                                 tmp->metadata = tmpl->metadata;
6077                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6078                                 INIT_LIST_HEAD(&tmp->list);
6079                                 list_add_tail(&tmp->list, &rec->dups);
6080                                 rec->num_duplicates++;
6081                         } else {
6082                                 rec->nr = tmpl->nr;
6083                                 rec->found_rec = 1;
6084                         }
6085                 }
6086
6087                 if (tmpl->extent_item_refs && !dup) {
6088                         if (rec->extent_item_refs) {
6089                                 fprintf(stderr, "block %llu rec "
6090                                         "extent_item_refs %llu, passed %llu\n",
6091                                         (unsigned long long)tmpl->start,
6092                                         (unsigned long long)
6093                                                         rec->extent_item_refs,
6094                                         (unsigned long long)tmpl->extent_item_refs);
6095                         }
6096                         rec->extent_item_refs = tmpl->extent_item_refs;
6097                 }
6098                 if (tmpl->is_root)
6099                         rec->is_root = 1;
6100                 if (tmpl->content_checked)
6101                         rec->content_checked = 1;
6102                 if (tmpl->owner_ref_checked)
6103                         rec->owner_ref_checked = 1;
6104                 memcpy(&rec->parent_key, &tmpl->parent_key,
6105                                 sizeof(tmpl->parent_key));
6106                 if (tmpl->parent_generation)
6107                         rec->parent_generation = tmpl->parent_generation;
6108                 if (rec->max_size < tmpl->max_size)
6109                         rec->max_size = tmpl->max_size;
6110
6111                 /*
6112                  * A metadata extent can't cross stripe_len boundary, otherwise
6113                  * kernel scrub won't be able to handle it.
6114                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6115                  * it.
6116                  */
6117                 if (tmpl->metadata)
6118                         rec->crossing_stripes = check_crossing_stripes(
6119                                         global_info, rec->start,
6120                                         global_info->tree_root->nodesize);
6121                 check_extent_type(rec);
6122                 maybe_free_extent_rec(extent_cache, rec);
6123                 return ret;
6124         }
6125
6126         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6127
6128         return ret;
6129 }
6130
6131 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6132                             u64 parent, u64 root, int found_ref)
6133 {
6134         struct extent_record *rec;
6135         struct tree_backref *back;
6136         struct cache_extent *cache;
6137         int ret;
6138
6139         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6140         if (!cache) {
6141                 struct extent_record tmpl;
6142
6143                 memset(&tmpl, 0, sizeof(tmpl));
6144                 tmpl.start = bytenr;
6145                 tmpl.nr = 1;
6146                 tmpl.metadata = 1;
6147
6148                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6149                 if (ret)
6150                         return ret;
6151
6152                 /* really a bug in cache_extent implement now */
6153                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6154                 if (!cache)
6155                         return -ENOENT;
6156         }
6157
6158         rec = container_of(cache, struct extent_record, cache);
6159         if (rec->start != bytenr) {
6160                 /*
6161                  * Several cause, from unaligned bytenr to over lapping extents
6162                  */
6163                 return -EEXIST;
6164         }
6165
6166         back = find_tree_backref(rec, parent, root);
6167         if (!back) {
6168                 back = alloc_tree_backref(rec, parent, root);
6169                 if (!back)
6170                         return -ENOMEM;
6171         }
6172
6173         if (found_ref) {
6174                 if (back->node.found_ref) {
6175                         fprintf(stderr, "Extent back ref already exists "
6176                                 "for %llu parent %llu root %llu \n",
6177                                 (unsigned long long)bytenr,
6178                                 (unsigned long long)parent,
6179                                 (unsigned long long)root);
6180                 }
6181                 back->node.found_ref = 1;
6182         } else {
6183                 if (back->node.found_extent_tree) {
6184                         fprintf(stderr, "Extent back ref already exists "
6185                                 "for %llu parent %llu root %llu \n",
6186                                 (unsigned long long)bytenr,
6187                                 (unsigned long long)parent,
6188                                 (unsigned long long)root);
6189                 }
6190                 back->node.found_extent_tree = 1;
6191         }
6192         check_extent_type(rec);
6193         maybe_free_extent_rec(extent_cache, rec);
6194         return 0;
6195 }
6196
6197 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6198                             u64 parent, u64 root, u64 owner, u64 offset,
6199                             u32 num_refs, int found_ref, u64 max_size)
6200 {
6201         struct extent_record *rec;
6202         struct data_backref *back;
6203         struct cache_extent *cache;
6204         int ret;
6205
6206         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6207         if (!cache) {
6208                 struct extent_record tmpl;
6209
6210                 memset(&tmpl, 0, sizeof(tmpl));
6211                 tmpl.start = bytenr;
6212                 tmpl.nr = 1;
6213                 tmpl.max_size = max_size;
6214
6215                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6216                 if (ret)
6217                         return ret;
6218
6219                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6220                 if (!cache)
6221                         abort();
6222         }
6223
6224         rec = container_of(cache, struct extent_record, cache);
6225         if (rec->max_size < max_size)
6226                 rec->max_size = max_size;
6227
6228         /*
6229          * If found_ref is set then max_size is the real size and must match the
6230          * existing refs.  So if we have already found a ref then we need to
6231          * make sure that this ref matches the existing one, otherwise we need
6232          * to add a new backref so we can notice that the backrefs don't match
6233          * and we need to figure out who is telling the truth.  This is to
6234          * account for that awful fsync bug I introduced where we'd end up with
6235          * a btrfs_file_extent_item that would have its length include multiple
6236          * prealloc extents or point inside of a prealloc extent.
6237          */
6238         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6239                                  bytenr, max_size);
6240         if (!back) {
6241                 back = alloc_data_backref(rec, parent, root, owner, offset,
6242                                           max_size);
6243                 BUG_ON(!back);
6244         }
6245
6246         if (found_ref) {
6247                 BUG_ON(num_refs != 1);
6248                 if (back->node.found_ref)
6249                         BUG_ON(back->bytes != max_size);
6250                 back->node.found_ref = 1;
6251                 back->found_ref += 1;
6252                 back->bytes = max_size;
6253                 back->disk_bytenr = bytenr;
6254                 rec->refs += 1;
6255                 rec->content_checked = 1;
6256                 rec->owner_ref_checked = 1;
6257         } else {
6258                 if (back->node.found_extent_tree) {
6259                         fprintf(stderr, "Extent back ref already exists "
6260                                 "for %llu parent %llu root %llu "
6261                                 "owner %llu offset %llu num_refs %lu\n",
6262                                 (unsigned long long)bytenr,
6263                                 (unsigned long long)parent,
6264                                 (unsigned long long)root,
6265                                 (unsigned long long)owner,
6266                                 (unsigned long long)offset,
6267                                 (unsigned long)num_refs);
6268                 }
6269                 back->num_refs = num_refs;
6270                 back->node.found_extent_tree = 1;
6271         }
6272         maybe_free_extent_rec(extent_cache, rec);
6273         return 0;
6274 }
6275
6276 static int add_pending(struct cache_tree *pending,
6277                        struct cache_tree *seen, u64 bytenr, u32 size)
6278 {
6279         int ret;
6280         ret = add_cache_extent(seen, bytenr, size);
6281         if (ret)
6282                 return ret;
6283         add_cache_extent(pending, bytenr, size);
6284         return 0;
6285 }
6286
6287 static int pick_next_pending(struct cache_tree *pending,
6288                         struct cache_tree *reada,
6289                         struct cache_tree *nodes,
6290                         u64 last, struct block_info *bits, int bits_nr,
6291                         int *reada_bits)
6292 {
6293         unsigned long node_start = last;
6294         struct cache_extent *cache;
6295         int ret;
6296
6297         cache = search_cache_extent(reada, 0);
6298         if (cache) {
6299                 bits[0].start = cache->start;
6300                 bits[0].size = cache->size;
6301                 *reada_bits = 1;
6302                 return 1;
6303         }
6304         *reada_bits = 0;
6305         if (node_start > 32768)
6306                 node_start -= 32768;
6307
6308         cache = search_cache_extent(nodes, node_start);
6309         if (!cache)
6310                 cache = search_cache_extent(nodes, 0);
6311
6312         if (!cache) {
6313                  cache = search_cache_extent(pending, 0);
6314                  if (!cache)
6315                          return 0;
6316                  ret = 0;
6317                  do {
6318                          bits[ret].start = cache->start;
6319                          bits[ret].size = cache->size;
6320                          cache = next_cache_extent(cache);
6321                          ret++;
6322                  } while (cache && ret < bits_nr);
6323                  return ret;
6324         }
6325
6326         ret = 0;
6327         do {
6328                 bits[ret].start = cache->start;
6329                 bits[ret].size = cache->size;
6330                 cache = next_cache_extent(cache);
6331                 ret++;
6332         } while (cache && ret < bits_nr);
6333
6334         if (bits_nr - ret > 8) {
6335                 u64 lookup = bits[0].start + bits[0].size;
6336                 struct cache_extent *next;
6337                 next = search_cache_extent(pending, lookup);
6338                 while(next) {
6339                         if (next->start - lookup > 32768)
6340                                 break;
6341                         bits[ret].start = next->start;
6342                         bits[ret].size = next->size;
6343                         lookup = next->start + next->size;
6344                         ret++;
6345                         if (ret == bits_nr)
6346                                 break;
6347                         next = next_cache_extent(next);
6348                         if (!next)
6349                                 break;
6350                 }
6351         }
6352         return ret;
6353 }
6354
6355 static void free_chunk_record(struct cache_extent *cache)
6356 {
6357         struct chunk_record *rec;
6358
6359         rec = container_of(cache, struct chunk_record, cache);
6360         list_del_init(&rec->list);
6361         list_del_init(&rec->dextents);
6362         free(rec);
6363 }
6364
6365 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6366 {
6367         cache_tree_free_extents(chunk_cache, free_chunk_record);
6368 }
6369
6370 static void free_device_record(struct rb_node *node)
6371 {
6372         struct device_record *rec;
6373
6374         rec = container_of(node, struct device_record, node);
6375         free(rec);
6376 }
6377
6378 FREE_RB_BASED_TREE(device_cache, free_device_record);
6379
6380 int insert_block_group_record(struct block_group_tree *tree,
6381                               struct block_group_record *bg_rec)
6382 {
6383         int ret;
6384
6385         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6386         if (ret)
6387                 return ret;
6388
6389         list_add_tail(&bg_rec->list, &tree->block_groups);
6390         return 0;
6391 }
6392
6393 static void free_block_group_record(struct cache_extent *cache)
6394 {
6395         struct block_group_record *rec;
6396
6397         rec = container_of(cache, struct block_group_record, cache);
6398         list_del_init(&rec->list);
6399         free(rec);
6400 }
6401
6402 void free_block_group_tree(struct block_group_tree *tree)
6403 {
6404         cache_tree_free_extents(&tree->tree, free_block_group_record);
6405 }
6406
6407 int insert_device_extent_record(struct device_extent_tree *tree,
6408                                 struct device_extent_record *de_rec)
6409 {
6410         int ret;
6411
6412         /*
6413          * Device extent is a bit different from the other extents, because
6414          * the extents which belong to the different devices may have the
6415          * same start and size, so we need use the special extent cache
6416          * search/insert functions.
6417          */
6418         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6419         if (ret)
6420                 return ret;
6421
6422         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6423         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6424         return 0;
6425 }
6426
6427 static void free_device_extent_record(struct cache_extent *cache)
6428 {
6429         struct device_extent_record *rec;
6430
6431         rec = container_of(cache, struct device_extent_record, cache);
6432         if (!list_empty(&rec->chunk_list))
6433                 list_del_init(&rec->chunk_list);
6434         if (!list_empty(&rec->device_list))
6435                 list_del_init(&rec->device_list);
6436         free(rec);
6437 }
6438
6439 void free_device_extent_tree(struct device_extent_tree *tree)
6440 {
6441         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6442 }
6443
6444 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6445 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6446                                  struct extent_buffer *leaf, int slot)
6447 {
6448         struct btrfs_extent_ref_v0 *ref0;
6449         struct btrfs_key key;
6450         int ret;
6451
6452         btrfs_item_key_to_cpu(leaf, &key, slot);
6453         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6454         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6455                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6456                                 0, 0);
6457         } else {
6458                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6459                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6460         }
6461         return ret;
6462 }
6463 #endif
6464
6465 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6466                                             struct btrfs_key *key,
6467                                             int slot)
6468 {
6469         struct btrfs_chunk *ptr;
6470         struct chunk_record *rec;
6471         int num_stripes, i;
6472
6473         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6474         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6475
6476         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6477         if (!rec) {
6478                 fprintf(stderr, "memory allocation failed\n");
6479                 exit(-1);
6480         }
6481
6482         INIT_LIST_HEAD(&rec->list);
6483         INIT_LIST_HEAD(&rec->dextents);
6484         rec->bg_rec = NULL;
6485
6486         rec->cache.start = key->offset;
6487         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6488
6489         rec->generation = btrfs_header_generation(leaf);
6490
6491         rec->objectid = key->objectid;
6492         rec->type = key->type;
6493         rec->offset = key->offset;
6494
6495         rec->length = rec->cache.size;
6496         rec->owner = btrfs_chunk_owner(leaf, ptr);
6497         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6498         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6499         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6500         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6501         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6502         rec->num_stripes = num_stripes;
6503         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6504
6505         for (i = 0; i < rec->num_stripes; ++i) {
6506                 rec->stripes[i].devid =
6507                         btrfs_stripe_devid_nr(leaf, ptr, i);
6508                 rec->stripes[i].offset =
6509                         btrfs_stripe_offset_nr(leaf, ptr, i);
6510                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6511                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6512                                 BTRFS_UUID_SIZE);
6513         }
6514
6515         return rec;
6516 }
6517
6518 static int process_chunk_item(struct cache_tree *chunk_cache,
6519                               struct btrfs_key *key, struct extent_buffer *eb,
6520                               int slot)
6521 {
6522         struct chunk_record *rec;
6523         struct btrfs_chunk *chunk;
6524         int ret = 0;
6525
6526         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6527         /*
6528          * Do extra check for this chunk item,
6529          *
6530          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6531          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6532          * and owner<->key_type check.
6533          */
6534         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6535                                       key->offset);
6536         if (ret < 0) {
6537                 error("chunk(%llu, %llu) is not valid, ignore it",
6538                       key->offset, btrfs_chunk_length(eb, chunk));
6539                 return 0;
6540         }
6541         rec = btrfs_new_chunk_record(eb, key, slot);
6542         ret = insert_cache_extent(chunk_cache, &rec->cache);
6543         if (ret) {
6544                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6545                         rec->offset, rec->length);
6546                 free(rec);
6547         }
6548
6549         return ret;
6550 }
6551
6552 static int process_device_item(struct rb_root *dev_cache,
6553                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6554 {
6555         struct btrfs_dev_item *ptr;
6556         struct device_record *rec;
6557         int ret = 0;
6558
6559         ptr = btrfs_item_ptr(eb,
6560                 slot, struct btrfs_dev_item);
6561
6562         rec = malloc(sizeof(*rec));
6563         if (!rec) {
6564                 fprintf(stderr, "memory allocation failed\n");
6565                 return -ENOMEM;
6566         }
6567
6568         rec->devid = key->offset;
6569         rec->generation = btrfs_header_generation(eb);
6570
6571         rec->objectid = key->objectid;
6572         rec->type = key->type;
6573         rec->offset = key->offset;
6574
6575         rec->devid = btrfs_device_id(eb, ptr);
6576         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6577         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6578
6579         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6580         if (ret) {
6581                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6582                 free(rec);
6583         }
6584
6585         return ret;
6586 }
6587
6588 struct block_group_record *
6589 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6590                              int slot)
6591 {
6592         struct btrfs_block_group_item *ptr;
6593         struct block_group_record *rec;
6594
6595         rec = calloc(1, sizeof(*rec));
6596         if (!rec) {
6597                 fprintf(stderr, "memory allocation failed\n");
6598                 exit(-1);
6599         }
6600
6601         rec->cache.start = key->objectid;
6602         rec->cache.size = key->offset;
6603
6604         rec->generation = btrfs_header_generation(leaf);
6605
6606         rec->objectid = key->objectid;
6607         rec->type = key->type;
6608         rec->offset = key->offset;
6609
6610         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6611         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6612
6613         INIT_LIST_HEAD(&rec->list);
6614
6615         return rec;
6616 }
6617
6618 static int process_block_group_item(struct block_group_tree *block_group_cache,
6619                                     struct btrfs_key *key,
6620                                     struct extent_buffer *eb, int slot)
6621 {
6622         struct block_group_record *rec;
6623         int ret = 0;
6624
6625         rec = btrfs_new_block_group_record(eb, key, slot);
6626         ret = insert_block_group_record(block_group_cache, rec);
6627         if (ret) {
6628                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6629                         rec->objectid, rec->offset);
6630                 free(rec);
6631         }
6632
6633         return ret;
6634 }
6635
6636 struct device_extent_record *
6637 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6638                                struct btrfs_key *key, int slot)
6639 {
6640         struct device_extent_record *rec;
6641         struct btrfs_dev_extent *ptr;
6642
6643         rec = calloc(1, sizeof(*rec));
6644         if (!rec) {
6645                 fprintf(stderr, "memory allocation failed\n");
6646                 exit(-1);
6647         }
6648
6649         rec->cache.objectid = key->objectid;
6650         rec->cache.start = key->offset;
6651
6652         rec->generation = btrfs_header_generation(leaf);
6653
6654         rec->objectid = key->objectid;
6655         rec->type = key->type;
6656         rec->offset = key->offset;
6657
6658         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6659         rec->chunk_objecteid =
6660                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6661         rec->chunk_offset =
6662                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6663         rec->length = btrfs_dev_extent_length(leaf, ptr);
6664         rec->cache.size = rec->length;
6665
6666         INIT_LIST_HEAD(&rec->chunk_list);
6667         INIT_LIST_HEAD(&rec->device_list);
6668
6669         return rec;
6670 }
6671
6672 static int
6673 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6674                            struct btrfs_key *key, struct extent_buffer *eb,
6675                            int slot)
6676 {
6677         struct device_extent_record *rec;
6678         int ret;
6679
6680         rec = btrfs_new_device_extent_record(eb, key, slot);
6681         ret = insert_device_extent_record(dev_extent_cache, rec);
6682         if (ret) {
6683                 fprintf(stderr,
6684                         "Device extent[%llu, %llu, %llu] existed.\n",
6685                         rec->objectid, rec->offset, rec->length);
6686                 free(rec);
6687         }
6688
6689         return ret;
6690 }
6691
6692 static int process_extent_item(struct btrfs_root *root,
6693                                struct cache_tree *extent_cache,
6694                                struct extent_buffer *eb, int slot)
6695 {
6696         struct btrfs_extent_item *ei;
6697         struct btrfs_extent_inline_ref *iref;
6698         struct btrfs_extent_data_ref *dref;
6699         struct btrfs_shared_data_ref *sref;
6700         struct btrfs_key key;
6701         struct extent_record tmpl;
6702         unsigned long end;
6703         unsigned long ptr;
6704         int ret;
6705         int type;
6706         u32 item_size = btrfs_item_size_nr(eb, slot);
6707         u64 refs = 0;
6708         u64 offset;
6709         u64 num_bytes;
6710         int metadata = 0;
6711
6712         btrfs_item_key_to_cpu(eb, &key, slot);
6713
6714         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6715                 metadata = 1;
6716                 num_bytes = root->nodesize;
6717         } else {
6718                 num_bytes = key.offset;
6719         }
6720
6721         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6722                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6723                       key.objectid, root->sectorsize);
6724                 return -EIO;
6725         }
6726         if (item_size < sizeof(*ei)) {
6727 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6728                 struct btrfs_extent_item_v0 *ei0;
6729                 BUG_ON(item_size != sizeof(*ei0));
6730                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6731                 refs = btrfs_extent_refs_v0(eb, ei0);
6732 #else
6733                 BUG();
6734 #endif
6735                 memset(&tmpl, 0, sizeof(tmpl));
6736                 tmpl.start = key.objectid;
6737                 tmpl.nr = num_bytes;
6738                 tmpl.extent_item_refs = refs;
6739                 tmpl.metadata = metadata;
6740                 tmpl.found_rec = 1;
6741                 tmpl.max_size = num_bytes;
6742
6743                 return add_extent_rec(extent_cache, &tmpl);
6744         }
6745
6746         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6747         refs = btrfs_extent_refs(eb, ei);
6748         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6749                 metadata = 1;
6750         else
6751                 metadata = 0;
6752         if (metadata && num_bytes != root->nodesize) {
6753                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6754                       num_bytes, root->nodesize);
6755                 return -EIO;
6756         }
6757         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6758                 error("ignore invalid data extent, length %llu is not aligned to %u",
6759                       num_bytes, root->sectorsize);
6760                 return -EIO;
6761         }
6762
6763         memset(&tmpl, 0, sizeof(tmpl));
6764         tmpl.start = key.objectid;
6765         tmpl.nr = num_bytes;
6766         tmpl.extent_item_refs = refs;
6767         tmpl.metadata = metadata;
6768         tmpl.found_rec = 1;
6769         tmpl.max_size = num_bytes;
6770         add_extent_rec(extent_cache, &tmpl);
6771
6772         ptr = (unsigned long)(ei + 1);
6773         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6774             key.type == BTRFS_EXTENT_ITEM_KEY)
6775                 ptr += sizeof(struct btrfs_tree_block_info);
6776
6777         end = (unsigned long)ei + item_size;
6778         while (ptr < end) {
6779                 iref = (struct btrfs_extent_inline_ref *)ptr;
6780                 type = btrfs_extent_inline_ref_type(eb, iref);
6781                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6782                 switch (type) {
6783                 case BTRFS_TREE_BLOCK_REF_KEY:
6784                         ret = add_tree_backref(extent_cache, key.objectid,
6785                                         0, offset, 0);
6786                         if (ret < 0)
6787                                 error("add_tree_backref failed: %s",
6788                                       strerror(-ret));
6789                         break;
6790                 case BTRFS_SHARED_BLOCK_REF_KEY:
6791                         ret = add_tree_backref(extent_cache, key.objectid,
6792                                         offset, 0, 0);
6793                         if (ret < 0)
6794                                 error("add_tree_backref failed: %s",
6795                                       strerror(-ret));
6796                         break;
6797                 case BTRFS_EXTENT_DATA_REF_KEY:
6798                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6799                         add_data_backref(extent_cache, key.objectid, 0,
6800                                         btrfs_extent_data_ref_root(eb, dref),
6801                                         btrfs_extent_data_ref_objectid(eb,
6802                                                                        dref),
6803                                         btrfs_extent_data_ref_offset(eb, dref),
6804                                         btrfs_extent_data_ref_count(eb, dref),
6805                                         0, num_bytes);
6806                         break;
6807                 case BTRFS_SHARED_DATA_REF_KEY:
6808                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6809                         add_data_backref(extent_cache, key.objectid, offset,
6810                                         0, 0, 0,
6811                                         btrfs_shared_data_ref_count(eb, sref),
6812                                         0, num_bytes);
6813                         break;
6814                 default:
6815                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6816                                 key.objectid, key.type, num_bytes);
6817                         goto out;
6818                 }
6819                 ptr += btrfs_extent_inline_ref_size(type);
6820         }
6821         WARN_ON(ptr > end);
6822 out:
6823         return 0;
6824 }
6825
6826 static int check_cache_range(struct btrfs_root *root,
6827                              struct btrfs_block_group_cache *cache,
6828                              u64 offset, u64 bytes)
6829 {
6830         struct btrfs_free_space *entry;
6831         u64 *logical;
6832         u64 bytenr;
6833         int stripe_len;
6834         int i, nr, ret;
6835
6836         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6837                 bytenr = btrfs_sb_offset(i);
6838                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6839                                        cache->key.objectid, bytenr, 0,
6840                                        &logical, &nr, &stripe_len);
6841                 if (ret)
6842                         return ret;
6843
6844                 while (nr--) {
6845                         if (logical[nr] + stripe_len <= offset)
6846                                 continue;
6847                         if (offset + bytes <= logical[nr])
6848                                 continue;
6849                         if (logical[nr] == offset) {
6850                                 if (stripe_len >= bytes) {
6851                                         free(logical);
6852                                         return 0;
6853                                 }
6854                                 bytes -= stripe_len;
6855                                 offset += stripe_len;
6856                         } else if (logical[nr] < offset) {
6857                                 if (logical[nr] + stripe_len >=
6858                                     offset + bytes) {
6859                                         free(logical);
6860                                         return 0;
6861                                 }
6862                                 bytes = (offset + bytes) -
6863                                         (logical[nr] + stripe_len);
6864                                 offset = logical[nr] + stripe_len;
6865                         } else {
6866                                 /*
6867                                  * Could be tricky, the super may land in the
6868                                  * middle of the area we're checking.  First
6869                                  * check the easiest case, it's at the end.
6870                                  */
6871                                 if (logical[nr] + stripe_len >=
6872                                     bytes + offset) {
6873                                         bytes = logical[nr] - offset;
6874                                         continue;
6875                                 }
6876
6877                                 /* Check the left side */
6878                                 ret = check_cache_range(root, cache,
6879                                                         offset,
6880                                                         logical[nr] - offset);
6881                                 if (ret) {
6882                                         free(logical);
6883                                         return ret;
6884                                 }
6885
6886                                 /* Now we continue with the right side */
6887                                 bytes = (offset + bytes) -
6888                                         (logical[nr] + stripe_len);
6889                                 offset = logical[nr] + stripe_len;
6890                         }
6891                 }
6892
6893                 free(logical);
6894         }
6895
6896         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6897         if (!entry) {
6898                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6899                         offset, offset+bytes);
6900                 return -EINVAL;
6901         }
6902
6903         if (entry->offset != offset) {
6904                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6905                         entry->offset);
6906                 return -EINVAL;
6907         }
6908
6909         if (entry->bytes != bytes) {
6910                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6911                         bytes, entry->bytes, offset);
6912                 return -EINVAL;
6913         }
6914
6915         unlink_free_space(cache->free_space_ctl, entry);
6916         free(entry);
6917         return 0;
6918 }
6919
6920 static int verify_space_cache(struct btrfs_root *root,
6921                               struct btrfs_block_group_cache *cache)
6922 {
6923         struct btrfs_path path;
6924         struct extent_buffer *leaf;
6925         struct btrfs_key key;
6926         u64 last;
6927         int ret = 0;
6928
6929         root = root->fs_info->extent_root;
6930
6931         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6932
6933         btrfs_init_path(&path);
6934         key.objectid = last;
6935         key.offset = 0;
6936         key.type = BTRFS_EXTENT_ITEM_KEY;
6937         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6938         if (ret < 0)
6939                 goto out;
6940         ret = 0;
6941         while (1) {
6942                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6943                         ret = btrfs_next_leaf(root, &path);
6944                         if (ret < 0)
6945                                 goto out;
6946                         if (ret > 0) {
6947                                 ret = 0;
6948                                 break;
6949                         }
6950                 }
6951                 leaf = path.nodes[0];
6952                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6953                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6954                         break;
6955                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6956                     key.type != BTRFS_METADATA_ITEM_KEY) {
6957                         path.slots[0]++;
6958                         continue;
6959                 }
6960
6961                 if (last == key.objectid) {
6962                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6963                                 last = key.objectid + key.offset;
6964                         else
6965                                 last = key.objectid + root->nodesize;
6966                         path.slots[0]++;
6967                         continue;
6968                 }
6969
6970                 ret = check_cache_range(root, cache, last,
6971                                         key.objectid - last);
6972                 if (ret)
6973                         break;
6974                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6975                         last = key.objectid + key.offset;
6976                 else
6977                         last = key.objectid + root->nodesize;
6978                 path.slots[0]++;
6979         }
6980
6981         if (last < cache->key.objectid + cache->key.offset)
6982                 ret = check_cache_range(root, cache, last,
6983                                         cache->key.objectid +
6984                                         cache->key.offset - last);
6985
6986 out:
6987         btrfs_release_path(&path);
6988
6989         if (!ret &&
6990             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6991                 fprintf(stderr, "There are still entries left in the space "
6992                         "cache\n");
6993                 ret = -EINVAL;
6994         }
6995
6996         return ret;
6997 }
6998
6999 static int check_space_cache(struct btrfs_root *root)
7000 {
7001         struct btrfs_block_group_cache *cache;
7002         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7003         int ret;
7004         int error = 0;
7005
7006         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7007             btrfs_super_generation(root->fs_info->super_copy) !=
7008             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7009                 printf("cache and super generation don't match, space cache "
7010                        "will be invalidated\n");
7011                 return 0;
7012         }
7013
7014         if (ctx.progress_enabled) {
7015                 ctx.tp = TASK_FREE_SPACE;
7016                 task_start(ctx.info);
7017         }
7018
7019         while (1) {
7020                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7021                 if (!cache)
7022                         break;
7023
7024                 start = cache->key.objectid + cache->key.offset;
7025                 if (!cache->free_space_ctl) {
7026                         if (btrfs_init_free_space_ctl(cache,
7027                                                       root->sectorsize)) {
7028                                 ret = -ENOMEM;
7029                                 break;
7030                         }
7031                 } else {
7032                         btrfs_remove_free_space_cache(cache);
7033                 }
7034
7035                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7036                         ret = exclude_super_stripes(root, cache);
7037                         if (ret) {
7038                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7039                                         strerror(-ret));
7040                                 error++;
7041                                 continue;
7042                         }
7043                         ret = load_free_space_tree(root->fs_info, cache);
7044                         free_excluded_extents(root, cache);
7045                         if (ret < 0) {
7046                                 fprintf(stderr, "could not load free space tree: %s\n",
7047                                         strerror(-ret));
7048                                 error++;
7049                                 continue;
7050                         }
7051                         error += ret;
7052                 } else {
7053                         ret = load_free_space_cache(root->fs_info, cache);
7054                         if (!ret)
7055                                 continue;
7056                 }
7057
7058                 ret = verify_space_cache(root, cache);
7059                 if (ret) {
7060                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7061                                 cache->key.objectid);
7062                         error++;
7063                 }
7064         }
7065
7066         task_stop(ctx.info);
7067
7068         return error ? -EINVAL : 0;
7069 }
7070
7071 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7072                         u64 num_bytes, unsigned long leaf_offset,
7073                         struct extent_buffer *eb) {
7074
7075         u64 offset = 0;
7076         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7077         char *data;
7078         unsigned long csum_offset;
7079         u32 csum;
7080         u32 csum_expected;
7081         u64 read_len;
7082         u64 data_checked = 0;
7083         u64 tmp;
7084         int ret = 0;
7085         int mirror;
7086         int num_copies;
7087
7088         if (num_bytes % root->sectorsize)
7089                 return -EINVAL;
7090
7091         data = malloc(num_bytes);
7092         if (!data)
7093                 return -ENOMEM;
7094
7095         while (offset < num_bytes) {
7096                 mirror = 0;
7097 again:
7098                 read_len = num_bytes - offset;
7099                 /* read as much space once a time */
7100                 ret = read_extent_data(root, data + offset,
7101                                 bytenr + offset, &read_len, mirror);
7102                 if (ret)
7103                         goto out;
7104                 data_checked = 0;
7105                 /* verify every 4k data's checksum */
7106                 while (data_checked < read_len) {
7107                         csum = ~(u32)0;
7108                         tmp = offset + data_checked;
7109
7110                         csum = btrfs_csum_data((char *)data + tmp,
7111                                                csum, root->sectorsize);
7112                         btrfs_csum_final(csum, (u8 *)&csum);
7113
7114                         csum_offset = leaf_offset +
7115                                  tmp / root->sectorsize * csum_size;
7116                         read_extent_buffer(eb, (char *)&csum_expected,
7117                                            csum_offset, csum_size);
7118                         /* try another mirror */
7119                         if (csum != csum_expected) {
7120                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7121                                                 mirror, bytenr + tmp,
7122                                                 csum, csum_expected);
7123                                 num_copies = btrfs_num_copies(
7124                                                 &root->fs_info->mapping_tree,
7125                                                 bytenr, num_bytes);
7126                                 if (mirror < num_copies - 1) {
7127                                         mirror += 1;
7128                                         goto again;
7129                                 }
7130                         }
7131                         data_checked += root->sectorsize;
7132                 }
7133                 offset += read_len;
7134         }
7135 out:
7136         free(data);
7137         return ret;
7138 }
7139
7140 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7141                                u64 num_bytes)
7142 {
7143         struct btrfs_path path;
7144         struct extent_buffer *leaf;
7145         struct btrfs_key key;
7146         int ret;
7147
7148         btrfs_init_path(&path);
7149         key.objectid = bytenr;
7150         key.type = BTRFS_EXTENT_ITEM_KEY;
7151         key.offset = (u64)-1;
7152
7153 again:
7154         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7155                                 0, 0);
7156         if (ret < 0) {
7157                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7158                 btrfs_release_path(&path);
7159                 return ret;
7160         } else if (ret) {
7161                 if (path.slots[0] > 0) {
7162                         path.slots[0]--;
7163                 } else {
7164                         ret = btrfs_prev_leaf(root, &path);
7165                         if (ret < 0) {
7166                                 goto out;
7167                         } else if (ret > 0) {
7168                                 ret = 0;
7169                                 goto out;
7170                         }
7171                 }
7172         }
7173
7174         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7175
7176         /*
7177          * Block group items come before extent items if they have the same
7178          * bytenr, so walk back one more just in case.  Dear future traveller,
7179          * first congrats on mastering time travel.  Now if it's not too much
7180          * trouble could you go back to 2006 and tell Chris to make the
7181          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7182          * EXTENT_ITEM_KEY please?
7183          */
7184         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7185                 if (path.slots[0] > 0) {
7186                         path.slots[0]--;
7187                 } else {
7188                         ret = btrfs_prev_leaf(root, &path);
7189                         if (ret < 0) {
7190                                 goto out;
7191                         } else if (ret > 0) {
7192                                 ret = 0;
7193                                 goto out;
7194                         }
7195                 }
7196                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7197         }
7198
7199         while (num_bytes) {
7200                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7201                         ret = btrfs_next_leaf(root, &path);
7202                         if (ret < 0) {
7203                                 fprintf(stderr, "Error going to next leaf "
7204                                         "%d\n", ret);
7205                                 btrfs_release_path(&path);
7206                                 return ret;
7207                         } else if (ret) {
7208                                 break;
7209                         }
7210                 }
7211                 leaf = path.nodes[0];
7212                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7213                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7214                         path.slots[0]++;
7215                         continue;
7216                 }
7217                 if (key.objectid + key.offset < bytenr) {
7218                         path.slots[0]++;
7219                         continue;
7220                 }
7221                 if (key.objectid > bytenr + num_bytes)
7222                         break;
7223
7224                 if (key.objectid == bytenr) {
7225                         if (key.offset >= num_bytes) {
7226                                 num_bytes = 0;
7227                                 break;
7228                         }
7229                         num_bytes -= key.offset;
7230                         bytenr += key.offset;
7231                 } else if (key.objectid < bytenr) {
7232                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7233                                 num_bytes = 0;
7234                                 break;
7235                         }
7236                         num_bytes = (bytenr + num_bytes) -
7237                                 (key.objectid + key.offset);
7238                         bytenr = key.objectid + key.offset;
7239                 } else {
7240                         if (key.objectid + key.offset < bytenr + num_bytes) {
7241                                 u64 new_start = key.objectid + key.offset;
7242                                 u64 new_bytes = bytenr + num_bytes - new_start;
7243
7244                                 /*
7245                                  * Weird case, the extent is in the middle of
7246                                  * our range, we'll have to search one side
7247                                  * and then the other.  Not sure if this happens
7248                                  * in real life, but no harm in coding it up
7249                                  * anyway just in case.
7250                                  */
7251                                 btrfs_release_path(&path);
7252                                 ret = check_extent_exists(root, new_start,
7253                                                           new_bytes);
7254                                 if (ret) {
7255                                         fprintf(stderr, "Right section didn't "
7256                                                 "have a record\n");
7257                                         break;
7258                                 }
7259                                 num_bytes = key.objectid - bytenr;
7260                                 goto again;
7261                         }
7262                         num_bytes = key.objectid - bytenr;
7263                 }
7264                 path.slots[0]++;
7265         }
7266         ret = 0;
7267
7268 out:
7269         if (num_bytes && !ret) {
7270                 fprintf(stderr, "There are no extents for csum range "
7271                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7272                 ret = 1;
7273         }
7274
7275         btrfs_release_path(&path);
7276         return ret;
7277 }
7278
7279 static int check_csums(struct btrfs_root *root)
7280 {
7281         struct btrfs_path path;
7282         struct extent_buffer *leaf;
7283         struct btrfs_key key;
7284         u64 offset = 0, num_bytes = 0;
7285         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7286         int errors = 0;
7287         int ret;
7288         u64 data_len;
7289         unsigned long leaf_offset;
7290
7291         root = root->fs_info->csum_root;
7292         if (!extent_buffer_uptodate(root->node)) {
7293                 fprintf(stderr, "No valid csum tree found\n");
7294                 return -ENOENT;
7295         }
7296
7297         btrfs_init_path(&path);
7298         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7299         key.type = BTRFS_EXTENT_CSUM_KEY;
7300         key.offset = 0;
7301         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7302         if (ret < 0) {
7303                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7304                 btrfs_release_path(&path);
7305                 return ret;
7306         }
7307
7308         if (ret > 0 && path.slots[0])
7309                 path.slots[0]--;
7310         ret = 0;
7311
7312         while (1) {
7313                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7314                         ret = btrfs_next_leaf(root, &path);
7315                         if (ret < 0) {
7316                                 fprintf(stderr, "Error going to next leaf "
7317                                         "%d\n", ret);
7318                                 break;
7319                         }
7320                         if (ret)
7321                                 break;
7322                 }
7323                 leaf = path.nodes[0];
7324
7325                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7326                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7327                         path.slots[0]++;
7328                         continue;
7329                 }
7330
7331                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7332                               csum_size) * root->sectorsize;
7333                 if (!check_data_csum)
7334                         goto skip_csum_check;
7335                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7336                 ret = check_extent_csums(root, key.offset, data_len,
7337                                          leaf_offset, leaf);
7338                 if (ret)
7339                         break;
7340 skip_csum_check:
7341                 if (!num_bytes) {
7342                         offset = key.offset;
7343                 } else if (key.offset != offset + num_bytes) {
7344                         ret = check_extent_exists(root, offset, num_bytes);
7345                         if (ret) {
7346                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7347                                         "there is no extent record\n",
7348                                         offset, offset+num_bytes);
7349                                 errors++;
7350                         }
7351                         offset = key.offset;
7352                         num_bytes = 0;
7353                 }
7354                 num_bytes += data_len;
7355                 path.slots[0]++;
7356         }
7357
7358         btrfs_release_path(&path);
7359         return errors;
7360 }
7361
7362 static int is_dropped_key(struct btrfs_key *key,
7363                           struct btrfs_key *drop_key) {
7364         if (key->objectid < drop_key->objectid)
7365                 return 1;
7366         else if (key->objectid == drop_key->objectid) {
7367                 if (key->type < drop_key->type)
7368                         return 1;
7369                 else if (key->type == drop_key->type) {
7370                         if (key->offset < drop_key->offset)
7371                                 return 1;
7372                 }
7373         }
7374         return 0;
7375 }
7376
7377 /*
7378  * Here are the rules for FULL_BACKREF.
7379  *
7380  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7381  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7382  *      FULL_BACKREF set.
7383  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7384  *    if it happened after the relocation occurred since we'll have dropped the
7385  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7386  *    have no real way to know for sure.
7387  *
7388  * We process the blocks one root at a time, and we start from the lowest root
7389  * objectid and go to the highest.  So we can just lookup the owner backref for
7390  * the record and if we don't find it then we know it doesn't exist and we have
7391  * a FULL BACKREF.
7392  *
7393  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7394  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7395  * be set or not and then we can check later once we've gathered all the refs.
7396  */
7397 static int calc_extent_flag(struct cache_tree *extent_cache,
7398                            struct extent_buffer *buf,
7399                            struct root_item_record *ri,
7400                            u64 *flags)
7401 {
7402         struct extent_record *rec;
7403         struct cache_extent *cache;
7404         struct tree_backref *tback;
7405         u64 owner = 0;
7406
7407         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7408         /* we have added this extent before */
7409         if (!cache)
7410                 return -ENOENT;
7411
7412         rec = container_of(cache, struct extent_record, cache);
7413
7414         /*
7415          * Except file/reloc tree, we can not have
7416          * FULL BACKREF MODE
7417          */
7418         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7419                 goto normal;
7420         /*
7421          * root node
7422          */
7423         if (buf->start == ri->bytenr)
7424                 goto normal;
7425
7426         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7427                 goto full_backref;
7428
7429         owner = btrfs_header_owner(buf);
7430         if (owner == ri->objectid)
7431                 goto normal;
7432
7433         tback = find_tree_backref(rec, 0, owner);
7434         if (!tback)
7435                 goto full_backref;
7436 normal:
7437         *flags = 0;
7438         if (rec->flag_block_full_backref != FLAG_UNSET &&
7439             rec->flag_block_full_backref != 0)
7440                 rec->bad_full_backref = 1;
7441         return 0;
7442 full_backref:
7443         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7444         if (rec->flag_block_full_backref != FLAG_UNSET &&
7445             rec->flag_block_full_backref != 1)
7446                 rec->bad_full_backref = 1;
7447         return 0;
7448 }
7449
7450 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7451 {
7452         fprintf(stderr, "Invalid key type(");
7453         print_key_type(stderr, 0, key_type);
7454         fprintf(stderr, ") found in root(");
7455         print_objectid(stderr, rootid, 0);
7456         fprintf(stderr, ")\n");
7457 }
7458
7459 /*
7460  * Check if the key is valid with its extent buffer.
7461  *
7462  * This is a early check in case invalid key exists in a extent buffer
7463  * This is not comprehensive yet, but should prevent wrong key/item passed
7464  * further
7465  */
7466 static int check_type_with_root(u64 rootid, u8 key_type)
7467 {
7468         switch (key_type) {
7469         /* Only valid in chunk tree */
7470         case BTRFS_DEV_ITEM_KEY:
7471         case BTRFS_CHUNK_ITEM_KEY:
7472                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7473                         goto err;
7474                 break;
7475         /* valid in csum and log tree */
7476         case BTRFS_CSUM_TREE_OBJECTID:
7477                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7478                       is_fstree(rootid)))
7479                         goto err;
7480                 break;
7481         case BTRFS_EXTENT_ITEM_KEY:
7482         case BTRFS_METADATA_ITEM_KEY:
7483         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7484                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7485                         goto err;
7486                 break;
7487         case BTRFS_ROOT_ITEM_KEY:
7488                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7489                         goto err;
7490                 break;
7491         case BTRFS_DEV_EXTENT_KEY:
7492                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7493                         goto err;
7494                 break;
7495         }
7496         return 0;
7497 err:
7498         report_mismatch_key_root(key_type, rootid);
7499         return -EINVAL;
7500 }
7501
7502 static int run_next_block(struct btrfs_root *root,
7503                           struct block_info *bits,
7504                           int bits_nr,
7505                           u64 *last,
7506                           struct cache_tree *pending,
7507                           struct cache_tree *seen,
7508                           struct cache_tree *reada,
7509                           struct cache_tree *nodes,
7510                           struct cache_tree *extent_cache,
7511                           struct cache_tree *chunk_cache,
7512                           struct rb_root *dev_cache,
7513                           struct block_group_tree *block_group_cache,
7514                           struct device_extent_tree *dev_extent_cache,
7515                           struct root_item_record *ri)
7516 {
7517         struct extent_buffer *buf;
7518         struct extent_record *rec = NULL;
7519         u64 bytenr;
7520         u32 size;
7521         u64 parent;
7522         u64 owner;
7523         u64 flags;
7524         u64 ptr;
7525         u64 gen = 0;
7526         int ret = 0;
7527         int i;
7528         int nritems;
7529         struct btrfs_key key;
7530         struct cache_extent *cache;
7531         int reada_bits;
7532
7533         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7534                                     bits_nr, &reada_bits);
7535         if (nritems == 0)
7536                 return 1;
7537
7538         if (!reada_bits) {
7539                 for(i = 0; i < nritems; i++) {
7540                         ret = add_cache_extent(reada, bits[i].start,
7541                                                bits[i].size);
7542                         if (ret == -EEXIST)
7543                                 continue;
7544
7545                         /* fixme, get the parent transid */
7546                         readahead_tree_block(root, bits[i].start,
7547                                              bits[i].size, 0);
7548                 }
7549         }
7550         *last = bits[0].start;
7551         bytenr = bits[0].start;
7552         size = bits[0].size;
7553
7554         cache = lookup_cache_extent(pending, bytenr, size);
7555         if (cache) {
7556                 remove_cache_extent(pending, cache);
7557                 free(cache);
7558         }
7559         cache = lookup_cache_extent(reada, bytenr, size);
7560         if (cache) {
7561                 remove_cache_extent(reada, cache);
7562                 free(cache);
7563         }
7564         cache = lookup_cache_extent(nodes, bytenr, size);
7565         if (cache) {
7566                 remove_cache_extent(nodes, cache);
7567                 free(cache);
7568         }
7569         cache = lookup_cache_extent(extent_cache, bytenr, size);
7570         if (cache) {
7571                 rec = container_of(cache, struct extent_record, cache);
7572                 gen = rec->parent_generation;
7573         }
7574
7575         /* fixme, get the real parent transid */
7576         buf = read_tree_block(root, bytenr, size, gen);
7577         if (!extent_buffer_uptodate(buf)) {
7578                 record_bad_block_io(root->fs_info,
7579                                     extent_cache, bytenr, size);
7580                 goto out;
7581         }
7582
7583         nritems = btrfs_header_nritems(buf);
7584
7585         flags = 0;
7586         if (!init_extent_tree) {
7587                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7588                                        btrfs_header_level(buf), 1, NULL,
7589                                        &flags);
7590                 if (ret < 0) {
7591                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7592                         if (ret < 0) {
7593                                 fprintf(stderr, "Couldn't calc extent flags\n");
7594                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7595                         }
7596                 }
7597         } else {
7598                 flags = 0;
7599                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7600                 if (ret < 0) {
7601                         fprintf(stderr, "Couldn't calc extent flags\n");
7602                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7603                 }
7604         }
7605
7606         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7607                 if (ri != NULL &&
7608                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7609                     ri->objectid == btrfs_header_owner(buf)) {
7610                         /*
7611                          * Ok we got to this block from it's original owner and
7612                          * we have FULL_BACKREF set.  Relocation can leave
7613                          * converted blocks over so this is altogether possible,
7614                          * however it's not possible if the generation > the
7615                          * last snapshot, so check for this case.
7616                          */
7617                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7618                             btrfs_header_generation(buf) > ri->last_snapshot) {
7619                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7620                                 rec->bad_full_backref = 1;
7621                         }
7622                 }
7623         } else {
7624                 if (ri != NULL &&
7625                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7626                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7627                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7628                         rec->bad_full_backref = 1;
7629                 }
7630         }
7631
7632         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7633                 rec->flag_block_full_backref = 1;
7634                 parent = bytenr;
7635                 owner = 0;
7636         } else {
7637                 rec->flag_block_full_backref = 0;
7638                 parent = 0;
7639                 owner = btrfs_header_owner(buf);
7640         }
7641
7642         ret = check_block(root, extent_cache, buf, flags);
7643         if (ret)
7644                 goto out;
7645
7646         if (btrfs_is_leaf(buf)) {
7647                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7648                 for (i = 0; i < nritems; i++) {
7649                         struct btrfs_file_extent_item *fi;
7650                         btrfs_item_key_to_cpu(buf, &key, i);
7651                         /*
7652                          * Check key type against the leaf owner.
7653                          * Could filter quite a lot of early error if
7654                          * owner is correct
7655                          */
7656                         if (check_type_with_root(btrfs_header_owner(buf),
7657                                                  key.type)) {
7658                                 fprintf(stderr, "ignoring invalid key\n");
7659                                 continue;
7660                         }
7661                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7662                                 process_extent_item(root, extent_cache, buf,
7663                                                     i);
7664                                 continue;
7665                         }
7666                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7667                                 process_extent_item(root, extent_cache, buf,
7668                                                     i);
7669                                 continue;
7670                         }
7671                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7672                                 total_csum_bytes +=
7673                                         btrfs_item_size_nr(buf, i);
7674                                 continue;
7675                         }
7676                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7677                                 process_chunk_item(chunk_cache, &key, buf, i);
7678                                 continue;
7679                         }
7680                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7681                                 process_device_item(dev_cache, &key, buf, i);
7682                                 continue;
7683                         }
7684                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7685                                 process_block_group_item(block_group_cache,
7686                                         &key, buf, i);
7687                                 continue;
7688                         }
7689                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7690                                 process_device_extent_item(dev_extent_cache,
7691                                         &key, buf, i);
7692                                 continue;
7693
7694                         }
7695                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7696 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7697                                 process_extent_ref_v0(extent_cache, buf, i);
7698 #else
7699                                 BUG();
7700 #endif
7701                                 continue;
7702                         }
7703
7704                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7705                                 ret = add_tree_backref(extent_cache,
7706                                                 key.objectid, 0, key.offset, 0);
7707                                 if (ret < 0)
7708                                         error("add_tree_backref failed: %s",
7709                                               strerror(-ret));
7710                                 continue;
7711                         }
7712                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7713                                 ret = add_tree_backref(extent_cache,
7714                                                 key.objectid, key.offset, 0, 0);
7715                                 if (ret < 0)
7716                                         error("add_tree_backref failed: %s",
7717                                               strerror(-ret));
7718                                 continue;
7719                         }
7720                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7721                                 struct btrfs_extent_data_ref *ref;
7722                                 ref = btrfs_item_ptr(buf, i,
7723                                                 struct btrfs_extent_data_ref);
7724                                 add_data_backref(extent_cache,
7725                                         key.objectid, 0,
7726                                         btrfs_extent_data_ref_root(buf, ref),
7727                                         btrfs_extent_data_ref_objectid(buf,
7728                                                                        ref),
7729                                         btrfs_extent_data_ref_offset(buf, ref),
7730                                         btrfs_extent_data_ref_count(buf, ref),
7731                                         0, root->sectorsize);
7732                                 continue;
7733                         }
7734                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7735                                 struct btrfs_shared_data_ref *ref;
7736                                 ref = btrfs_item_ptr(buf, i,
7737                                                 struct btrfs_shared_data_ref);
7738                                 add_data_backref(extent_cache,
7739                                         key.objectid, key.offset, 0, 0, 0,
7740                                         btrfs_shared_data_ref_count(buf, ref),
7741                                         0, root->sectorsize);
7742                                 continue;
7743                         }
7744                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7745                                 struct bad_item *bad;
7746
7747                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7748                                         continue;
7749                                 if (!owner)
7750                                         continue;
7751                                 bad = malloc(sizeof(struct bad_item));
7752                                 if (!bad)
7753                                         continue;
7754                                 INIT_LIST_HEAD(&bad->list);
7755                                 memcpy(&bad->key, &key,
7756                                        sizeof(struct btrfs_key));
7757                                 bad->root_id = owner;
7758                                 list_add_tail(&bad->list, &delete_items);
7759                                 continue;
7760                         }
7761                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7762                                 continue;
7763                         fi = btrfs_item_ptr(buf, i,
7764                                             struct btrfs_file_extent_item);
7765                         if (btrfs_file_extent_type(buf, fi) ==
7766                             BTRFS_FILE_EXTENT_INLINE)
7767                                 continue;
7768                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7769                                 continue;
7770
7771                         data_bytes_allocated +=
7772                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7773                         if (data_bytes_allocated < root->sectorsize) {
7774                                 abort();
7775                         }
7776                         data_bytes_referenced +=
7777                                 btrfs_file_extent_num_bytes(buf, fi);
7778                         add_data_backref(extent_cache,
7779                                 btrfs_file_extent_disk_bytenr(buf, fi),
7780                                 parent, owner, key.objectid, key.offset -
7781                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7782                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7783                 }
7784         } else {
7785                 int level;
7786                 struct btrfs_key first_key;
7787
7788                 first_key.objectid = 0;
7789
7790                 if (nritems > 0)
7791                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7792                 level = btrfs_header_level(buf);
7793                 for (i = 0; i < nritems; i++) {
7794                         struct extent_record tmpl;
7795
7796                         ptr = btrfs_node_blockptr(buf, i);
7797                         size = root->nodesize;
7798                         btrfs_node_key_to_cpu(buf, &key, i);
7799                         if (ri != NULL) {
7800                                 if ((level == ri->drop_level)
7801                                     && is_dropped_key(&key, &ri->drop_key)) {
7802                                         continue;
7803                                 }
7804                         }
7805
7806                         memset(&tmpl, 0, sizeof(tmpl));
7807                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7808                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7809                         tmpl.start = ptr;
7810                         tmpl.nr = size;
7811                         tmpl.refs = 1;
7812                         tmpl.metadata = 1;
7813                         tmpl.max_size = size;
7814                         ret = add_extent_rec(extent_cache, &tmpl);
7815                         if (ret < 0)
7816                                 goto out;
7817
7818                         ret = add_tree_backref(extent_cache, ptr, parent,
7819                                         owner, 1);
7820                         if (ret < 0) {
7821                                 error("add_tree_backref failed: %s",
7822                                       strerror(-ret));
7823                                 continue;
7824                         }
7825
7826                         if (level > 1) {
7827                                 add_pending(nodes, seen, ptr, size);
7828                         } else {
7829                                 add_pending(pending, seen, ptr, size);
7830                         }
7831                 }
7832                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7833                                       nritems) * sizeof(struct btrfs_key_ptr);
7834         }
7835         total_btree_bytes += buf->len;
7836         if (fs_root_objectid(btrfs_header_owner(buf)))
7837                 total_fs_tree_bytes += buf->len;
7838         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7839                 total_extent_tree_bytes += buf->len;
7840         if (!found_old_backref &&
7841             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7842             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7843             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7844                 found_old_backref = 1;
7845 out:
7846         free_extent_buffer(buf);
7847         return ret;
7848 }
7849
7850 static int add_root_to_pending(struct extent_buffer *buf,
7851                                struct cache_tree *extent_cache,
7852                                struct cache_tree *pending,
7853                                struct cache_tree *seen,
7854                                struct cache_tree *nodes,
7855                                u64 objectid)
7856 {
7857         struct extent_record tmpl;
7858         int ret;
7859
7860         if (btrfs_header_level(buf) > 0)
7861                 add_pending(nodes, seen, buf->start, buf->len);
7862         else
7863                 add_pending(pending, seen, buf->start, buf->len);
7864
7865         memset(&tmpl, 0, sizeof(tmpl));
7866         tmpl.start = buf->start;
7867         tmpl.nr = buf->len;
7868         tmpl.is_root = 1;
7869         tmpl.refs = 1;
7870         tmpl.metadata = 1;
7871         tmpl.max_size = buf->len;
7872         add_extent_rec(extent_cache, &tmpl);
7873
7874         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7875             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7876                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7877                                 0, 1);
7878         else
7879                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7880                                 1);
7881         return ret;
7882 }
7883
7884 /* as we fix the tree, we might be deleting blocks that
7885  * we're tracking for repair.  This hook makes sure we
7886  * remove any backrefs for blocks as we are fixing them.
7887  */
7888 static int free_extent_hook(struct btrfs_trans_handle *trans,
7889                             struct btrfs_root *root,
7890                             u64 bytenr, u64 num_bytes, u64 parent,
7891                             u64 root_objectid, u64 owner, u64 offset,
7892                             int refs_to_drop)
7893 {
7894         struct extent_record *rec;
7895         struct cache_extent *cache;
7896         int is_data;
7897         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7898
7899         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7900         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7901         if (!cache)
7902                 return 0;
7903
7904         rec = container_of(cache, struct extent_record, cache);
7905         if (is_data) {
7906                 struct data_backref *back;
7907                 back = find_data_backref(rec, parent, root_objectid, owner,
7908                                          offset, 1, bytenr, num_bytes);
7909                 if (!back)
7910                         goto out;
7911                 if (back->node.found_ref) {
7912                         back->found_ref -= refs_to_drop;
7913                         if (rec->refs)
7914                                 rec->refs -= refs_to_drop;
7915                 }
7916                 if (back->node.found_extent_tree) {
7917                         back->num_refs -= refs_to_drop;
7918                         if (rec->extent_item_refs)
7919                                 rec->extent_item_refs -= refs_to_drop;
7920                 }
7921                 if (back->found_ref == 0)
7922                         back->node.found_ref = 0;
7923                 if (back->num_refs == 0)
7924                         back->node.found_extent_tree = 0;
7925
7926                 if (!back->node.found_extent_tree && back->node.found_ref) {
7927                         list_del(&back->node.list);
7928                         free(back);
7929                 }
7930         } else {
7931                 struct tree_backref *back;
7932                 back = find_tree_backref(rec, parent, root_objectid);
7933                 if (!back)
7934                         goto out;
7935                 if (back->node.found_ref) {
7936                         if (rec->refs)
7937                                 rec->refs--;
7938                         back->node.found_ref = 0;
7939                 }
7940                 if (back->node.found_extent_tree) {
7941                         if (rec->extent_item_refs)
7942                                 rec->extent_item_refs--;
7943                         back->node.found_extent_tree = 0;
7944                 }
7945                 if (!back->node.found_extent_tree && back->node.found_ref) {
7946                         list_del(&back->node.list);
7947                         free(back);
7948                 }
7949         }
7950         maybe_free_extent_rec(extent_cache, rec);
7951 out:
7952         return 0;
7953 }
7954
7955 static int delete_extent_records(struct btrfs_trans_handle *trans,
7956                                  struct btrfs_root *root,
7957                                  struct btrfs_path *path,
7958                                  u64 bytenr)
7959 {
7960         struct btrfs_key key;
7961         struct btrfs_key found_key;
7962         struct extent_buffer *leaf;
7963         int ret;
7964         int slot;
7965
7966
7967         key.objectid = bytenr;
7968         key.type = (u8)-1;
7969         key.offset = (u64)-1;
7970
7971         while(1) {
7972                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7973                                         &key, path, 0, 1);
7974                 if (ret < 0)
7975                         break;
7976
7977                 if (ret > 0) {
7978                         ret = 0;
7979                         if (path->slots[0] == 0)
7980                                 break;
7981                         path->slots[0]--;
7982                 }
7983                 ret = 0;
7984
7985                 leaf = path->nodes[0];
7986                 slot = path->slots[0];
7987
7988                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7989                 if (found_key.objectid != bytenr)
7990                         break;
7991
7992                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7993                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7994                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7995                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7996                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7997                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7998                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7999                         btrfs_release_path(path);
8000                         if (found_key.type == 0) {
8001                                 if (found_key.offset == 0)
8002                                         break;
8003                                 key.offset = found_key.offset - 1;
8004                                 key.type = found_key.type;
8005                         }
8006                         key.type = found_key.type - 1;
8007                         key.offset = (u64)-1;
8008                         continue;
8009                 }
8010
8011                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8012                         found_key.objectid, found_key.type, found_key.offset);
8013
8014                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8015                 if (ret)
8016                         break;
8017                 btrfs_release_path(path);
8018
8019                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8020                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8021                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8022                                 found_key.offset : root->nodesize;
8023
8024                         ret = btrfs_update_block_group(trans, root, bytenr,
8025                                                        bytes, 0, 0);
8026                         if (ret)
8027                                 break;
8028                 }
8029         }
8030
8031         btrfs_release_path(path);
8032         return ret;
8033 }
8034
8035 /*
8036  * for a single backref, this will allocate a new extent
8037  * and add the backref to it.
8038  */
8039 static int record_extent(struct btrfs_trans_handle *trans,
8040                          struct btrfs_fs_info *info,
8041                          struct btrfs_path *path,
8042                          struct extent_record *rec,
8043                          struct extent_backref *back,
8044                          int allocated, u64 flags)
8045 {
8046         int ret = 0;
8047         struct btrfs_root *extent_root = info->extent_root;
8048         struct extent_buffer *leaf;
8049         struct btrfs_key ins_key;
8050         struct btrfs_extent_item *ei;
8051         struct data_backref *dback;
8052         struct btrfs_tree_block_info *bi;
8053
8054         if (!back->is_data)
8055                 rec->max_size = max_t(u64, rec->max_size,
8056                                     info->extent_root->nodesize);
8057
8058         if (!allocated) {
8059                 u32 item_size = sizeof(*ei);
8060
8061                 if (!back->is_data)
8062                         item_size += sizeof(*bi);
8063
8064                 ins_key.objectid = rec->start;
8065                 ins_key.offset = rec->max_size;
8066                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8067
8068                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8069                                         &ins_key, item_size);
8070                 if (ret)
8071                         goto fail;
8072
8073                 leaf = path->nodes[0];
8074                 ei = btrfs_item_ptr(leaf, path->slots[0],
8075                                     struct btrfs_extent_item);
8076
8077                 btrfs_set_extent_refs(leaf, ei, 0);
8078                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8079
8080                 if (back->is_data) {
8081                         btrfs_set_extent_flags(leaf, ei,
8082                                                BTRFS_EXTENT_FLAG_DATA);
8083                 } else {
8084                         struct btrfs_disk_key copy_key;;
8085
8086                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8087                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8088                                              sizeof(*bi));
8089
8090                         btrfs_set_disk_key_objectid(&copy_key,
8091                                                     rec->info_objectid);
8092                         btrfs_set_disk_key_type(&copy_key, 0);
8093                         btrfs_set_disk_key_offset(&copy_key, 0);
8094
8095                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8096                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8097
8098                         btrfs_set_extent_flags(leaf, ei,
8099                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8100                 }
8101
8102                 btrfs_mark_buffer_dirty(leaf);
8103                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8104                                                rec->max_size, 1, 0);
8105                 if (ret)
8106                         goto fail;
8107                 btrfs_release_path(path);
8108         }
8109
8110         if (back->is_data) {
8111                 u64 parent;
8112                 int i;
8113
8114                 dback = to_data_backref(back);
8115                 if (back->full_backref)
8116                         parent = dback->parent;
8117                 else
8118                         parent = 0;
8119
8120                 for (i = 0; i < dback->found_ref; i++) {
8121                         /* if parent != 0, we're doing a full backref
8122                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8123                          * just makes the backref allocator create a data
8124                          * backref
8125                          */
8126                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8127                                                    rec->start, rec->max_size,
8128                                                    parent,
8129                                                    dback->root,
8130                                                    parent ?
8131                                                    BTRFS_FIRST_FREE_OBJECTID :
8132                                                    dback->owner,
8133                                                    dback->offset);
8134                         if (ret)
8135                                 break;
8136                 }
8137                 fprintf(stderr, "adding new data backref"
8138                                 " on %llu %s %llu owner %llu"
8139                                 " offset %llu found %d\n",
8140                                 (unsigned long long)rec->start,
8141                                 back->full_backref ?
8142                                 "parent" : "root",
8143                                 back->full_backref ?
8144                                 (unsigned long long)parent :
8145                                 (unsigned long long)dback->root,
8146                                 (unsigned long long)dback->owner,
8147                                 (unsigned long long)dback->offset,
8148                                 dback->found_ref);
8149         } else {
8150                 u64 parent;
8151                 struct tree_backref *tback;
8152
8153                 tback = to_tree_backref(back);
8154                 if (back->full_backref)
8155                         parent = tback->parent;
8156                 else
8157                         parent = 0;
8158
8159                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8160                                            rec->start, rec->max_size,
8161                                            parent, tback->root, 0, 0);
8162                 fprintf(stderr, "adding new tree backref on "
8163                         "start %llu len %llu parent %llu root %llu\n",
8164                         rec->start, rec->max_size, parent, tback->root);
8165         }
8166 fail:
8167         btrfs_release_path(path);
8168         return ret;
8169 }
8170
8171 static struct extent_entry *find_entry(struct list_head *entries,
8172                                        u64 bytenr, u64 bytes)
8173 {
8174         struct extent_entry *entry = NULL;
8175
8176         list_for_each_entry(entry, entries, list) {
8177                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8178                         return entry;
8179         }
8180
8181         return NULL;
8182 }
8183
8184 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8185 {
8186         struct extent_entry *entry, *best = NULL, *prev = NULL;
8187
8188         list_for_each_entry(entry, entries, list) {
8189                 /*
8190                  * If there are as many broken entries as entries then we know
8191                  * not to trust this particular entry.
8192                  */
8193                 if (entry->broken == entry->count)
8194                         continue;
8195
8196                 /*
8197                  * Special case, when there are only two entries and 'best' is
8198                  * the first one
8199                  */
8200                 if (!prev) {
8201                         best = entry;
8202                         prev = entry;
8203                         continue;
8204                 }
8205
8206                 /*
8207                  * If our current entry == best then we can't be sure our best
8208                  * is really the best, so we need to keep searching.
8209                  */
8210                 if (best && best->count == entry->count) {
8211                         prev = entry;
8212                         best = NULL;
8213                         continue;
8214                 }
8215
8216                 /* Prev == entry, not good enough, have to keep searching */
8217                 if (!prev->broken && prev->count == entry->count)
8218                         continue;
8219
8220                 if (!best)
8221                         best = (prev->count > entry->count) ? prev : entry;
8222                 else if (best->count < entry->count)
8223                         best = entry;
8224                 prev = entry;
8225         }
8226
8227         return best;
8228 }
8229
8230 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8231                       struct data_backref *dback, struct extent_entry *entry)
8232 {
8233         struct btrfs_trans_handle *trans;
8234         struct btrfs_root *root;
8235         struct btrfs_file_extent_item *fi;
8236         struct extent_buffer *leaf;
8237         struct btrfs_key key;
8238         u64 bytenr, bytes;
8239         int ret, err;
8240
8241         key.objectid = dback->root;
8242         key.type = BTRFS_ROOT_ITEM_KEY;
8243         key.offset = (u64)-1;
8244         root = btrfs_read_fs_root(info, &key);
8245         if (IS_ERR(root)) {
8246                 fprintf(stderr, "Couldn't find root for our ref\n");
8247                 return -EINVAL;
8248         }
8249
8250         /*
8251          * The backref points to the original offset of the extent if it was
8252          * split, so we need to search down to the offset we have and then walk
8253          * forward until we find the backref we're looking for.
8254          */
8255         key.objectid = dback->owner;
8256         key.type = BTRFS_EXTENT_DATA_KEY;
8257         key.offset = dback->offset;
8258         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8259         if (ret < 0) {
8260                 fprintf(stderr, "Error looking up ref %d\n", ret);
8261                 return ret;
8262         }
8263
8264         while (1) {
8265                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8266                         ret = btrfs_next_leaf(root, path);
8267                         if (ret) {
8268                                 fprintf(stderr, "Couldn't find our ref, next\n");
8269                                 return -EINVAL;
8270                         }
8271                 }
8272                 leaf = path->nodes[0];
8273                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8274                 if (key.objectid != dback->owner ||
8275                     key.type != BTRFS_EXTENT_DATA_KEY) {
8276                         fprintf(stderr, "Couldn't find our ref, search\n");
8277                         return -EINVAL;
8278                 }
8279                 fi = btrfs_item_ptr(leaf, path->slots[0],
8280                                     struct btrfs_file_extent_item);
8281                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8282                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8283
8284                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8285                         break;
8286                 path->slots[0]++;
8287         }
8288
8289         btrfs_release_path(path);
8290
8291         trans = btrfs_start_transaction(root, 1);
8292         if (IS_ERR(trans))
8293                 return PTR_ERR(trans);
8294
8295         /*
8296          * Ok we have the key of the file extent we want to fix, now we can cow
8297          * down to the thing and fix it.
8298          */
8299         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8300         if (ret < 0) {
8301                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8302                         key.objectid, key.type, key.offset, ret);
8303                 goto out;
8304         }
8305         if (ret > 0) {
8306                 fprintf(stderr, "Well that's odd, we just found this key "
8307                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8308                         key.offset);
8309                 ret = -EINVAL;
8310                 goto out;
8311         }
8312         leaf = path->nodes[0];
8313         fi = btrfs_item_ptr(leaf, path->slots[0],
8314                             struct btrfs_file_extent_item);
8315
8316         if (btrfs_file_extent_compression(leaf, fi) &&
8317             dback->disk_bytenr != entry->bytenr) {
8318                 fprintf(stderr, "Ref doesn't match the record start and is "
8319                         "compressed, please take a btrfs-image of this file "
8320                         "system and send it to a btrfs developer so they can "
8321                         "complete this functionality for bytenr %Lu\n",
8322                         dback->disk_bytenr);
8323                 ret = -EINVAL;
8324                 goto out;
8325         }
8326
8327         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8328                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8329         } else if (dback->disk_bytenr > entry->bytenr) {
8330                 u64 off_diff, offset;
8331
8332                 off_diff = dback->disk_bytenr - entry->bytenr;
8333                 offset = btrfs_file_extent_offset(leaf, fi);
8334                 if (dback->disk_bytenr + offset +
8335                     btrfs_file_extent_num_bytes(leaf, fi) >
8336                     entry->bytenr + entry->bytes) {
8337                         fprintf(stderr, "Ref is past the entry end, please "
8338                                 "take a btrfs-image of this file system and "
8339                                 "send it to a btrfs developer, ref %Lu\n",
8340                                 dback->disk_bytenr);
8341                         ret = -EINVAL;
8342                         goto out;
8343                 }
8344                 offset += off_diff;
8345                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8346                 btrfs_set_file_extent_offset(leaf, fi, offset);
8347         } else if (dback->disk_bytenr < entry->bytenr) {
8348                 u64 offset;
8349
8350                 offset = btrfs_file_extent_offset(leaf, fi);
8351                 if (dback->disk_bytenr + offset < entry->bytenr) {
8352                         fprintf(stderr, "Ref is before the entry start, please"
8353                                 " take a btrfs-image of this file system and "
8354                                 "send it to a btrfs developer, ref %Lu\n",
8355                                 dback->disk_bytenr);
8356                         ret = -EINVAL;
8357                         goto out;
8358                 }
8359
8360                 offset += dback->disk_bytenr;
8361                 offset -= entry->bytenr;
8362                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8363                 btrfs_set_file_extent_offset(leaf, fi, offset);
8364         }
8365
8366         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8367
8368         /*
8369          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8370          * only do this if we aren't using compression, otherwise it's a
8371          * trickier case.
8372          */
8373         if (!btrfs_file_extent_compression(leaf, fi))
8374                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8375         else
8376                 printf("ram bytes may be wrong?\n");
8377         btrfs_mark_buffer_dirty(leaf);
8378 out:
8379         err = btrfs_commit_transaction(trans, root);
8380         btrfs_release_path(path);
8381         return ret ? ret : err;
8382 }
8383
8384 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8385                            struct extent_record *rec)
8386 {
8387         struct extent_backref *back;
8388         struct data_backref *dback;
8389         struct extent_entry *entry, *best = NULL;
8390         LIST_HEAD(entries);
8391         int nr_entries = 0;
8392         int broken_entries = 0;
8393         int ret = 0;
8394         short mismatch = 0;
8395
8396         /*
8397          * Metadata is easy and the backrefs should always agree on bytenr and
8398          * size, if not we've got bigger issues.
8399          */
8400         if (rec->metadata)
8401                 return 0;
8402
8403         list_for_each_entry(back, &rec->backrefs, list) {
8404                 if (back->full_backref || !back->is_data)
8405                         continue;
8406
8407                 dback = to_data_backref(back);
8408
8409                 /*
8410                  * We only pay attention to backrefs that we found a real
8411                  * backref for.
8412                  */
8413                 if (dback->found_ref == 0)
8414                         continue;
8415
8416                 /*
8417                  * For now we only catch when the bytes don't match, not the
8418                  * bytenr.  We can easily do this at the same time, but I want
8419                  * to have a fs image to test on before we just add repair
8420                  * functionality willy-nilly so we know we won't screw up the
8421                  * repair.
8422                  */
8423
8424                 entry = find_entry(&entries, dback->disk_bytenr,
8425                                    dback->bytes);
8426                 if (!entry) {
8427                         entry = malloc(sizeof(struct extent_entry));
8428                         if (!entry) {
8429                                 ret = -ENOMEM;
8430                                 goto out;
8431                         }
8432                         memset(entry, 0, sizeof(*entry));
8433                         entry->bytenr = dback->disk_bytenr;
8434                         entry->bytes = dback->bytes;
8435                         list_add_tail(&entry->list, &entries);
8436                         nr_entries++;
8437                 }
8438
8439                 /*
8440                  * If we only have on entry we may think the entries agree when
8441                  * in reality they don't so we have to do some extra checking.
8442                  */
8443                 if (dback->disk_bytenr != rec->start ||
8444                     dback->bytes != rec->nr || back->broken)
8445                         mismatch = 1;
8446
8447                 if (back->broken) {
8448                         entry->broken++;
8449                         broken_entries++;
8450                 }
8451
8452                 entry->count++;
8453         }
8454
8455         /* Yay all the backrefs agree, carry on good sir */
8456         if (nr_entries <= 1 && !mismatch)
8457                 goto out;
8458
8459         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8460                 "%Lu\n", rec->start);
8461
8462         /*
8463          * First we want to see if the backrefs can agree amongst themselves who
8464          * is right, so figure out which one of the entries has the highest
8465          * count.
8466          */
8467         best = find_most_right_entry(&entries);
8468
8469         /*
8470          * Ok so we may have an even split between what the backrefs think, so
8471          * this is where we use the extent ref to see what it thinks.
8472          */
8473         if (!best) {
8474                 entry = find_entry(&entries, rec->start, rec->nr);
8475                 if (!entry && (!broken_entries || !rec->found_rec)) {
8476                         fprintf(stderr, "Backrefs don't agree with each other "
8477                                 "and extent record doesn't agree with anybody,"
8478                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8479                                 rec->start, rec->nr);
8480                         ret = -EINVAL;
8481                         goto out;
8482                 } else if (!entry) {
8483                         /*
8484                          * Ok our backrefs were broken, we'll assume this is the
8485                          * correct value and add an entry for this range.
8486                          */
8487                         entry = malloc(sizeof(struct extent_entry));
8488                         if (!entry) {
8489                                 ret = -ENOMEM;
8490                                 goto out;
8491                         }
8492                         memset(entry, 0, sizeof(*entry));
8493                         entry->bytenr = rec->start;
8494                         entry->bytes = rec->nr;
8495                         list_add_tail(&entry->list, &entries);
8496                         nr_entries++;
8497                 }
8498                 entry->count++;
8499                 best = find_most_right_entry(&entries);
8500                 if (!best) {
8501                         fprintf(stderr, "Backrefs and extent record evenly "
8502                                 "split on who is right, this is going to "
8503                                 "require user input to fix bytenr %Lu bytes "
8504                                 "%Lu\n", rec->start, rec->nr);
8505                         ret = -EINVAL;
8506                         goto out;
8507                 }
8508         }
8509
8510         /*
8511          * I don't think this can happen currently as we'll abort() if we catch
8512          * this case higher up, but in case somebody removes that we still can't
8513          * deal with it properly here yet, so just bail out of that's the case.
8514          */
8515         if (best->bytenr != rec->start) {
8516                 fprintf(stderr, "Extent start and backref starts don't match, "
8517                         "please use btrfs-image on this file system and send "
8518                         "it to a btrfs developer so they can make fsck fix "
8519                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8520                         rec->start, rec->nr);
8521                 ret = -EINVAL;
8522                 goto out;
8523         }
8524
8525         /*
8526          * Ok great we all agreed on an extent record, let's go find the real
8527          * references and fix up the ones that don't match.
8528          */
8529         list_for_each_entry(back, &rec->backrefs, list) {
8530                 if (back->full_backref || !back->is_data)
8531                         continue;
8532
8533                 dback = to_data_backref(back);
8534
8535                 /*
8536                  * Still ignoring backrefs that don't have a real ref attached
8537                  * to them.
8538                  */
8539                 if (dback->found_ref == 0)
8540                         continue;
8541
8542                 if (dback->bytes == best->bytes &&
8543                     dback->disk_bytenr == best->bytenr)
8544                         continue;
8545
8546                 ret = repair_ref(info, path, dback, best);
8547                 if (ret)
8548                         goto out;
8549         }
8550
8551         /*
8552          * Ok we messed with the actual refs, which means we need to drop our
8553          * entire cache and go back and rescan.  I know this is a huge pain and
8554          * adds a lot of extra work, but it's the only way to be safe.  Once all
8555          * the backrefs agree we may not need to do anything to the extent
8556          * record itself.
8557          */
8558         ret = -EAGAIN;
8559 out:
8560         while (!list_empty(&entries)) {
8561                 entry = list_entry(entries.next, struct extent_entry, list);
8562                 list_del_init(&entry->list);
8563                 free(entry);
8564         }
8565         return ret;
8566 }
8567
8568 static int process_duplicates(struct btrfs_root *root,
8569                               struct cache_tree *extent_cache,
8570                               struct extent_record *rec)
8571 {
8572         struct extent_record *good, *tmp;
8573         struct cache_extent *cache;
8574         int ret;
8575
8576         /*
8577          * If we found a extent record for this extent then return, or if we
8578          * have more than one duplicate we are likely going to need to delete
8579          * something.
8580          */
8581         if (rec->found_rec || rec->num_duplicates > 1)
8582                 return 0;
8583
8584         /* Shouldn't happen but just in case */
8585         BUG_ON(!rec->num_duplicates);
8586
8587         /*
8588          * So this happens if we end up with a backref that doesn't match the
8589          * actual extent entry.  So either the backref is bad or the extent
8590          * entry is bad.  Either way we want to have the extent_record actually
8591          * reflect what we found in the extent_tree, so we need to take the
8592          * duplicate out and use that as the extent_record since the only way we
8593          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8594          */
8595         remove_cache_extent(extent_cache, &rec->cache);
8596
8597         good = to_extent_record(rec->dups.next);
8598         list_del_init(&good->list);
8599         INIT_LIST_HEAD(&good->backrefs);
8600         INIT_LIST_HEAD(&good->dups);
8601         good->cache.start = good->start;
8602         good->cache.size = good->nr;
8603         good->content_checked = 0;
8604         good->owner_ref_checked = 0;
8605         good->num_duplicates = 0;
8606         good->refs = rec->refs;
8607         list_splice_init(&rec->backrefs, &good->backrefs);
8608         while (1) {
8609                 cache = lookup_cache_extent(extent_cache, good->start,
8610                                             good->nr);
8611                 if (!cache)
8612                         break;
8613                 tmp = container_of(cache, struct extent_record, cache);
8614
8615                 /*
8616                  * If we find another overlapping extent and it's found_rec is
8617                  * set then it's a duplicate and we need to try and delete
8618                  * something.
8619                  */
8620                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8621                         if (list_empty(&good->list))
8622                                 list_add_tail(&good->list,
8623                                               &duplicate_extents);
8624                         good->num_duplicates += tmp->num_duplicates + 1;
8625                         list_splice_init(&tmp->dups, &good->dups);
8626                         list_del_init(&tmp->list);
8627                         list_add_tail(&tmp->list, &good->dups);
8628                         remove_cache_extent(extent_cache, &tmp->cache);
8629                         continue;
8630                 }
8631
8632                 /*
8633                  * Ok we have another non extent item backed extent rec, so lets
8634                  * just add it to this extent and carry on like we did above.
8635                  */
8636                 good->refs += tmp->refs;
8637                 list_splice_init(&tmp->backrefs, &good->backrefs);
8638                 remove_cache_extent(extent_cache, &tmp->cache);
8639                 free(tmp);
8640         }
8641         ret = insert_cache_extent(extent_cache, &good->cache);
8642         BUG_ON(ret);
8643         free(rec);
8644         return good->num_duplicates ? 0 : 1;
8645 }
8646
8647 static int delete_duplicate_records(struct btrfs_root *root,
8648                                     struct extent_record *rec)
8649 {
8650         struct btrfs_trans_handle *trans;
8651         LIST_HEAD(delete_list);
8652         struct btrfs_path path;
8653         struct extent_record *tmp, *good, *n;
8654         int nr_del = 0;
8655         int ret = 0, err;
8656         struct btrfs_key key;
8657
8658         btrfs_init_path(&path);
8659
8660         good = rec;
8661         /* Find the record that covers all of the duplicates. */
8662         list_for_each_entry(tmp, &rec->dups, list) {
8663                 if (good->start < tmp->start)
8664                         continue;
8665                 if (good->nr > tmp->nr)
8666                         continue;
8667
8668                 if (tmp->start + tmp->nr < good->start + good->nr) {
8669                         fprintf(stderr, "Ok we have overlapping extents that "
8670                                 "aren't completely covered by each other, this "
8671                                 "is going to require more careful thought.  "
8672                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8673                                 tmp->start, tmp->nr, good->start, good->nr);
8674                         abort();
8675                 }
8676                 good = tmp;
8677         }
8678
8679         if (good != rec)
8680                 list_add_tail(&rec->list, &delete_list);
8681
8682         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8683                 if (tmp == good)
8684                         continue;
8685                 list_move_tail(&tmp->list, &delete_list);
8686         }
8687
8688         root = root->fs_info->extent_root;
8689         trans = btrfs_start_transaction(root, 1);
8690         if (IS_ERR(trans)) {
8691                 ret = PTR_ERR(trans);
8692                 goto out;
8693         }
8694
8695         list_for_each_entry(tmp, &delete_list, list) {
8696                 if (tmp->found_rec == 0)
8697                         continue;
8698                 key.objectid = tmp->start;
8699                 key.type = BTRFS_EXTENT_ITEM_KEY;
8700                 key.offset = tmp->nr;
8701
8702                 /* Shouldn't happen but just in case */
8703                 if (tmp->metadata) {
8704                         fprintf(stderr, "Well this shouldn't happen, extent "
8705                                 "record overlaps but is metadata? "
8706                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8707                         abort();
8708                 }
8709
8710                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8711                 if (ret) {
8712                         if (ret > 0)
8713                                 ret = -EINVAL;
8714                         break;
8715                 }
8716                 ret = btrfs_del_item(trans, root, &path);
8717                 if (ret)
8718                         break;
8719                 btrfs_release_path(&path);
8720                 nr_del++;
8721         }
8722         err = btrfs_commit_transaction(trans, root);
8723         if (err && !ret)
8724                 ret = err;
8725 out:
8726         while (!list_empty(&delete_list)) {
8727                 tmp = to_extent_record(delete_list.next);
8728                 list_del_init(&tmp->list);
8729                 if (tmp == rec)
8730                         continue;
8731                 free(tmp);
8732         }
8733
8734         while (!list_empty(&rec->dups)) {
8735                 tmp = to_extent_record(rec->dups.next);
8736                 list_del_init(&tmp->list);
8737                 free(tmp);
8738         }
8739
8740         btrfs_release_path(&path);
8741
8742         if (!ret && !nr_del)
8743                 rec->num_duplicates = 0;
8744
8745         return ret ? ret : nr_del;
8746 }
8747
8748 static int find_possible_backrefs(struct btrfs_fs_info *info,
8749                                   struct btrfs_path *path,
8750                                   struct cache_tree *extent_cache,
8751                                   struct extent_record *rec)
8752 {
8753         struct btrfs_root *root;
8754         struct extent_backref *back;
8755         struct data_backref *dback;
8756         struct cache_extent *cache;
8757         struct btrfs_file_extent_item *fi;
8758         struct btrfs_key key;
8759         u64 bytenr, bytes;
8760         int ret;
8761
8762         list_for_each_entry(back, &rec->backrefs, list) {
8763                 /* Don't care about full backrefs (poor unloved backrefs) */
8764                 if (back->full_backref || !back->is_data)
8765                         continue;
8766
8767                 dback = to_data_backref(back);
8768
8769                 /* We found this one, we don't need to do a lookup */
8770                 if (dback->found_ref)
8771                         continue;
8772
8773                 key.objectid = dback->root;
8774                 key.type = BTRFS_ROOT_ITEM_KEY;
8775                 key.offset = (u64)-1;
8776
8777                 root = btrfs_read_fs_root(info, &key);
8778
8779                 /* No root, definitely a bad ref, skip */
8780                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8781                         continue;
8782                 /* Other err, exit */
8783                 if (IS_ERR(root))
8784                         return PTR_ERR(root);
8785
8786                 key.objectid = dback->owner;
8787                 key.type = BTRFS_EXTENT_DATA_KEY;
8788                 key.offset = dback->offset;
8789                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8790                 if (ret) {
8791                         btrfs_release_path(path);
8792                         if (ret < 0)
8793                                 return ret;
8794                         /* Didn't find it, we can carry on */
8795                         ret = 0;
8796                         continue;
8797                 }
8798
8799                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8800                                     struct btrfs_file_extent_item);
8801                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8802                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8803                 btrfs_release_path(path);
8804                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8805                 if (cache) {
8806                         struct extent_record *tmp;
8807                         tmp = container_of(cache, struct extent_record, cache);
8808
8809                         /*
8810                          * If we found an extent record for the bytenr for this
8811                          * particular backref then we can't add it to our
8812                          * current extent record.  We only want to add backrefs
8813                          * that don't have a corresponding extent item in the
8814                          * extent tree since they likely belong to this record
8815                          * and we need to fix it if it doesn't match bytenrs.
8816                          */
8817                         if  (tmp->found_rec)
8818                                 continue;
8819                 }
8820
8821                 dback->found_ref += 1;
8822                 dback->disk_bytenr = bytenr;
8823                 dback->bytes = bytes;
8824
8825                 /*
8826                  * Set this so the verify backref code knows not to trust the
8827                  * values in this backref.
8828                  */
8829                 back->broken = 1;
8830         }
8831
8832         return 0;
8833 }
8834
8835 /*
8836  * Record orphan data ref into corresponding root.
8837  *
8838  * Return 0 if the extent item contains data ref and recorded.
8839  * Return 1 if the extent item contains no useful data ref
8840  *   On that case, it may contains only shared_dataref or metadata backref
8841  *   or the file extent exists(this should be handled by the extent bytenr
8842  *   recovery routine)
8843  * Return <0 if something goes wrong.
8844  */
8845 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8846                                       struct extent_record *rec)
8847 {
8848         struct btrfs_key key;
8849         struct btrfs_root *dest_root;
8850         struct extent_backref *back;
8851         struct data_backref *dback;
8852         struct orphan_data_extent *orphan;
8853         struct btrfs_path path;
8854         int recorded_data_ref = 0;
8855         int ret = 0;
8856
8857         if (rec->metadata)
8858                 return 1;
8859         btrfs_init_path(&path);
8860         list_for_each_entry(back, &rec->backrefs, list) {
8861                 if (back->full_backref || !back->is_data ||
8862                     !back->found_extent_tree)
8863                         continue;
8864                 dback = to_data_backref(back);
8865                 if (dback->found_ref)
8866                         continue;
8867                 key.objectid = dback->root;
8868                 key.type = BTRFS_ROOT_ITEM_KEY;
8869                 key.offset = (u64)-1;
8870
8871                 dest_root = btrfs_read_fs_root(fs_info, &key);
8872
8873                 /* For non-exist root we just skip it */
8874                 if (IS_ERR(dest_root) || !dest_root)
8875                         continue;
8876
8877                 key.objectid = dback->owner;
8878                 key.type = BTRFS_EXTENT_DATA_KEY;
8879                 key.offset = dback->offset;
8880
8881                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8882                 btrfs_release_path(&path);
8883                 /*
8884                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8885                  * we need to record it for inode/file extent rebuild.
8886                  * For ret > 0, we record it only for file extent rebuild.
8887                  * For ret == 0, the file extent exists but only bytenr
8888                  * mismatch, let the original bytenr fix routine to handle,
8889                  * don't record it.
8890                  */
8891                 if (ret == 0)
8892                         continue;
8893                 ret = 0;
8894                 orphan = malloc(sizeof(*orphan));
8895                 if (!orphan) {
8896                         ret = -ENOMEM;
8897                         goto out;
8898                 }
8899                 INIT_LIST_HEAD(&orphan->list);
8900                 orphan->root = dback->root;
8901                 orphan->objectid = dback->owner;
8902                 orphan->offset = dback->offset;
8903                 orphan->disk_bytenr = rec->cache.start;
8904                 orphan->disk_len = rec->cache.size;
8905                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8906                 recorded_data_ref = 1;
8907         }
8908 out:
8909         btrfs_release_path(&path);
8910         if (!ret)
8911                 return !recorded_data_ref;
8912         else
8913                 return ret;
8914 }
8915
8916 /*
8917  * when an incorrect extent item is found, this will delete
8918  * all of the existing entries for it and recreate them
8919  * based on what the tree scan found.
8920  */
8921 static int fixup_extent_refs(struct btrfs_fs_info *info,
8922                              struct cache_tree *extent_cache,
8923                              struct extent_record *rec)
8924 {
8925         struct btrfs_trans_handle *trans = NULL;
8926         int ret;
8927         struct btrfs_path path;
8928         struct list_head *cur = rec->backrefs.next;
8929         struct cache_extent *cache;
8930         struct extent_backref *back;
8931         int allocated = 0;
8932         u64 flags = 0;
8933
8934         if (rec->flag_block_full_backref)
8935                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8936
8937         btrfs_init_path(&path);
8938         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8939                 /*
8940                  * Sometimes the backrefs themselves are so broken they don't
8941                  * get attached to any meaningful rec, so first go back and
8942                  * check any of our backrefs that we couldn't find and throw
8943                  * them into the list if we find the backref so that
8944                  * verify_backrefs can figure out what to do.
8945                  */
8946                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8947                 if (ret < 0)
8948                         goto out;
8949         }
8950
8951         /* step one, make sure all of the backrefs agree */
8952         ret = verify_backrefs(info, &path, rec);
8953         if (ret < 0)
8954                 goto out;
8955
8956         trans = btrfs_start_transaction(info->extent_root, 1);
8957         if (IS_ERR(trans)) {
8958                 ret = PTR_ERR(trans);
8959                 goto out;
8960         }
8961
8962         /* step two, delete all the existing records */
8963         ret = delete_extent_records(trans, info->extent_root, &path,
8964                                     rec->start);
8965
8966         if (ret < 0)
8967                 goto out;
8968
8969         /* was this block corrupt?  If so, don't add references to it */
8970         cache = lookup_cache_extent(info->corrupt_blocks,
8971                                     rec->start, rec->max_size);
8972         if (cache) {
8973                 ret = 0;
8974                 goto out;
8975         }
8976
8977         /* step three, recreate all the refs we did find */
8978         while(cur != &rec->backrefs) {
8979                 back = to_extent_backref(cur);
8980                 cur = cur->next;
8981
8982                 /*
8983                  * if we didn't find any references, don't create a
8984                  * new extent record
8985                  */
8986                 if (!back->found_ref)
8987                         continue;
8988
8989                 rec->bad_full_backref = 0;
8990                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8991                 allocated = 1;
8992
8993                 if (ret)
8994                         goto out;
8995         }
8996 out:
8997         if (trans) {
8998                 int err = btrfs_commit_transaction(trans, info->extent_root);
8999                 if (!ret)
9000                         ret = err;
9001         }
9002
9003         if (!ret)
9004                 fprintf(stderr, "Repaired extent references for %llu\n",
9005                                 (unsigned long long)rec->start);
9006
9007         btrfs_release_path(&path);
9008         return ret;
9009 }
9010
9011 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9012                               struct extent_record *rec)
9013 {
9014         struct btrfs_trans_handle *trans;
9015         struct btrfs_root *root = fs_info->extent_root;
9016         struct btrfs_path path;
9017         struct btrfs_extent_item *ei;
9018         struct btrfs_key key;
9019         u64 flags;
9020         int ret = 0;
9021
9022         key.objectid = rec->start;
9023         if (rec->metadata) {
9024                 key.type = BTRFS_METADATA_ITEM_KEY;
9025                 key.offset = rec->info_level;
9026         } else {
9027                 key.type = BTRFS_EXTENT_ITEM_KEY;
9028                 key.offset = rec->max_size;
9029         }
9030
9031         trans = btrfs_start_transaction(root, 0);
9032         if (IS_ERR(trans))
9033                 return PTR_ERR(trans);
9034
9035         btrfs_init_path(&path);
9036         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9037         if (ret < 0) {
9038                 btrfs_release_path(&path);
9039                 btrfs_commit_transaction(trans, root);
9040                 return ret;
9041         } else if (ret) {
9042                 fprintf(stderr, "Didn't find extent for %llu\n",
9043                         (unsigned long long)rec->start);
9044                 btrfs_release_path(&path);
9045                 btrfs_commit_transaction(trans, root);
9046                 return -ENOENT;
9047         }
9048
9049         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9050                             struct btrfs_extent_item);
9051         flags = btrfs_extent_flags(path.nodes[0], ei);
9052         if (rec->flag_block_full_backref) {
9053                 fprintf(stderr, "setting full backref on %llu\n",
9054                         (unsigned long long)key.objectid);
9055                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9056         } else {
9057                 fprintf(stderr, "clearing full backref on %llu\n",
9058                         (unsigned long long)key.objectid);
9059                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9060         }
9061         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9062         btrfs_mark_buffer_dirty(path.nodes[0]);
9063         btrfs_release_path(&path);
9064         ret = btrfs_commit_transaction(trans, root);
9065         if (!ret)
9066                 fprintf(stderr, "Repaired extent flags for %llu\n",
9067                                 (unsigned long long)rec->start);
9068
9069         return ret;
9070 }
9071
9072 /* right now we only prune from the extent allocation tree */
9073 static int prune_one_block(struct btrfs_trans_handle *trans,
9074                            struct btrfs_fs_info *info,
9075                            struct btrfs_corrupt_block *corrupt)
9076 {
9077         int ret;
9078         struct btrfs_path path;
9079         struct extent_buffer *eb;
9080         u64 found;
9081         int slot;
9082         int nritems;
9083         int level = corrupt->level + 1;
9084
9085         btrfs_init_path(&path);
9086 again:
9087         /* we want to stop at the parent to our busted block */
9088         path.lowest_level = level;
9089
9090         ret = btrfs_search_slot(trans, info->extent_root,
9091                                 &corrupt->key, &path, -1, 1);
9092
9093         if (ret < 0)
9094                 goto out;
9095
9096         eb = path.nodes[level];
9097         if (!eb) {
9098                 ret = -ENOENT;
9099                 goto out;
9100         }
9101
9102         /*
9103          * hopefully the search gave us the block we want to prune,
9104          * lets try that first
9105          */
9106         slot = path.slots[level];
9107         found =  btrfs_node_blockptr(eb, slot);
9108         if (found == corrupt->cache.start)
9109                 goto del_ptr;
9110
9111         nritems = btrfs_header_nritems(eb);
9112
9113         /* the search failed, lets scan this node and hope we find it */
9114         for (slot = 0; slot < nritems; slot++) {
9115                 found =  btrfs_node_blockptr(eb, slot);
9116                 if (found == corrupt->cache.start)
9117                         goto del_ptr;
9118         }
9119         /*
9120          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9121          * to this block
9122          */
9123         if (eb == info->extent_root->node) {
9124                 ret = -ENOENT;
9125                 goto out;
9126         } else {
9127                 level++;
9128                 btrfs_release_path(&path);
9129                 goto again;
9130         }
9131
9132 del_ptr:
9133         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9134         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9135
9136 out:
9137         btrfs_release_path(&path);
9138         return ret;
9139 }
9140
9141 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9142 {
9143         struct btrfs_trans_handle *trans = NULL;
9144         struct cache_extent *cache;
9145         struct btrfs_corrupt_block *corrupt;
9146
9147         while (1) {
9148                 cache = search_cache_extent(info->corrupt_blocks, 0);
9149                 if (!cache)
9150                         break;
9151                 if (!trans) {
9152                         trans = btrfs_start_transaction(info->extent_root, 1);
9153                         if (IS_ERR(trans))
9154                                 return PTR_ERR(trans);
9155                 }
9156                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9157                 prune_one_block(trans, info, corrupt);
9158                 remove_cache_extent(info->corrupt_blocks, cache);
9159         }
9160         if (trans)
9161                 return btrfs_commit_transaction(trans, info->extent_root);
9162         return 0;
9163 }
9164
9165 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9166 {
9167         struct btrfs_block_group_cache *cache;
9168         u64 start, end;
9169         int ret;
9170
9171         while (1) {
9172                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9173                                             &start, &end, EXTENT_DIRTY);
9174                 if (ret)
9175                         break;
9176                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9177         }
9178
9179         start = 0;
9180         while (1) {
9181                 cache = btrfs_lookup_first_block_group(fs_info, start);
9182                 if (!cache)
9183                         break;
9184                 if (cache->cached)
9185                         cache->cached = 0;
9186                 start = cache->key.objectid + cache->key.offset;
9187         }
9188 }
9189
9190 static int check_extent_refs(struct btrfs_root *root,
9191                              struct cache_tree *extent_cache)
9192 {
9193         struct extent_record *rec;
9194         struct cache_extent *cache;
9195         int ret = 0;
9196         int had_dups = 0;
9197
9198         if (repair) {
9199                 /*
9200                  * if we're doing a repair, we have to make sure
9201                  * we don't allocate from the problem extents.
9202                  * In the worst case, this will be all the
9203                  * extents in the FS
9204                  */
9205                 cache = search_cache_extent(extent_cache, 0);
9206                 while(cache) {
9207                         rec = container_of(cache, struct extent_record, cache);
9208                         set_extent_dirty(root->fs_info->excluded_extents,
9209                                          rec->start,
9210                                          rec->start + rec->max_size - 1);
9211                         cache = next_cache_extent(cache);
9212                 }
9213
9214                 /* pin down all the corrupted blocks too */
9215                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9216                 while(cache) {
9217                         set_extent_dirty(root->fs_info->excluded_extents,
9218                                          cache->start,
9219                                          cache->start + cache->size - 1);
9220                         cache = next_cache_extent(cache);
9221                 }
9222                 prune_corrupt_blocks(root->fs_info);
9223                 reset_cached_block_groups(root->fs_info);
9224         }
9225
9226         reset_cached_block_groups(root->fs_info);
9227
9228         /*
9229          * We need to delete any duplicate entries we find first otherwise we
9230          * could mess up the extent tree when we have backrefs that actually
9231          * belong to a different extent item and not the weird duplicate one.
9232          */
9233         while (repair && !list_empty(&duplicate_extents)) {
9234                 rec = to_extent_record(duplicate_extents.next);
9235                 list_del_init(&rec->list);
9236
9237                 /* Sometimes we can find a backref before we find an actual
9238                  * extent, so we need to process it a little bit to see if there
9239                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9240                  * if this is a backref screwup.  If we need to delete stuff
9241                  * process_duplicates() will return 0, otherwise it will return
9242                  * 1 and we
9243                  */
9244                 if (process_duplicates(root, extent_cache, rec))
9245                         continue;
9246                 ret = delete_duplicate_records(root, rec);
9247                 if (ret < 0)
9248                         return ret;
9249                 /*
9250                  * delete_duplicate_records will return the number of entries
9251                  * deleted, so if it's greater than 0 then we know we actually
9252                  * did something and we need to remove.
9253                  */
9254                 if (ret)
9255                         had_dups = 1;
9256         }
9257
9258         if (had_dups)
9259                 return -EAGAIN;
9260
9261         while(1) {
9262                 int cur_err = 0;
9263                 int fix = 0;
9264
9265                 cache = search_cache_extent(extent_cache, 0);
9266                 if (!cache)
9267                         break;
9268                 rec = container_of(cache, struct extent_record, cache);
9269                 if (rec->num_duplicates) {
9270                         fprintf(stderr, "extent item %llu has multiple extent "
9271                                 "items\n", (unsigned long long)rec->start);
9272                         cur_err = 1;
9273                 }
9274
9275                 if (rec->refs != rec->extent_item_refs) {
9276                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9277                                 (unsigned long long)rec->start,
9278                                 (unsigned long long)rec->nr);
9279                         fprintf(stderr, "extent item %llu, found %llu\n",
9280                                 (unsigned long long)rec->extent_item_refs,
9281                                 (unsigned long long)rec->refs);
9282                         ret = record_orphan_data_extents(root->fs_info, rec);
9283                         if (ret < 0)
9284                                 goto repair_abort;
9285                         fix = ret;
9286                         cur_err = 1;
9287                 }
9288                 if (all_backpointers_checked(rec, 1)) {
9289                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9290                                 (unsigned long long)rec->start,
9291                                 (unsigned long long)rec->nr);
9292                         fix = 1;
9293                         cur_err = 1;
9294                 }
9295                 if (!rec->owner_ref_checked) {
9296                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9297                                 (unsigned long long)rec->start,
9298                                 (unsigned long long)rec->nr);
9299                         fix = 1;
9300                         cur_err = 1;
9301                 }
9302
9303                 if (repair && fix) {
9304                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9305                         if (ret)
9306                                 goto repair_abort;
9307                 }
9308
9309
9310                 if (rec->bad_full_backref) {
9311                         fprintf(stderr, "bad full backref, on [%llu]\n",
9312                                 (unsigned long long)rec->start);
9313                         if (repair) {
9314                                 ret = fixup_extent_flags(root->fs_info, rec);
9315                                 if (ret)
9316                                         goto repair_abort;
9317                                 fix = 1;
9318                         }
9319                         cur_err = 1;
9320                 }
9321                 /*
9322                  * Although it's not a extent ref's problem, we reuse this
9323                  * routine for error reporting.
9324                  * No repair function yet.
9325                  */
9326                 if (rec->crossing_stripes) {
9327                         fprintf(stderr,
9328                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9329                                 rec->start, rec->start + rec->max_size);
9330                         cur_err = 1;
9331                 }
9332
9333                 if (rec->wrong_chunk_type) {
9334                         fprintf(stderr,
9335                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9336                                 rec->start, rec->start + rec->max_size);
9337                         cur_err = 1;
9338                 }
9339
9340                 remove_cache_extent(extent_cache, cache);
9341                 free_all_extent_backrefs(rec);
9342                 if (!init_extent_tree && repair && (!cur_err || fix))
9343                         clear_extent_dirty(root->fs_info->excluded_extents,
9344                                            rec->start,
9345                                            rec->start + rec->max_size - 1);
9346                 free(rec);
9347         }
9348 repair_abort:
9349         if (repair) {
9350                 if (ret && ret != -EAGAIN) {
9351                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9352                         exit(1);
9353                 } else if (!ret) {
9354                         struct btrfs_trans_handle *trans;
9355
9356                         root = root->fs_info->extent_root;
9357                         trans = btrfs_start_transaction(root, 1);
9358                         if (IS_ERR(trans)) {
9359                                 ret = PTR_ERR(trans);
9360                                 goto repair_abort;
9361                         }
9362
9363                         btrfs_fix_block_accounting(trans, root);
9364                         ret = btrfs_commit_transaction(trans, root);
9365                         if (ret)
9366                                 goto repair_abort;
9367                 }
9368                 return ret;
9369         }
9370         return 0;
9371 }
9372
9373 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9374 {
9375         u64 stripe_size;
9376
9377         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9378                 stripe_size = length;
9379                 stripe_size /= num_stripes;
9380         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9381                 stripe_size = length * 2;
9382                 stripe_size /= num_stripes;
9383         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9384                 stripe_size = length;
9385                 stripe_size /= (num_stripes - 1);
9386         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9387                 stripe_size = length;
9388                 stripe_size /= (num_stripes - 2);
9389         } else {
9390                 stripe_size = length;
9391         }
9392         return stripe_size;
9393 }
9394
9395 /*
9396  * Check the chunk with its block group/dev list ref:
9397  * Return 0 if all refs seems valid.
9398  * Return 1 if part of refs seems valid, need later check for rebuild ref
9399  * like missing block group and needs to search extent tree to rebuild them.
9400  * Return -1 if essential refs are missing and unable to rebuild.
9401  */
9402 static int check_chunk_refs(struct chunk_record *chunk_rec,
9403                             struct block_group_tree *block_group_cache,
9404                             struct device_extent_tree *dev_extent_cache,
9405                             int silent)
9406 {
9407         struct cache_extent *block_group_item;
9408         struct block_group_record *block_group_rec;
9409         struct cache_extent *dev_extent_item;
9410         struct device_extent_record *dev_extent_rec;
9411         u64 devid;
9412         u64 offset;
9413         u64 length;
9414         int metadump_v2 = 0;
9415         int i;
9416         int ret = 0;
9417
9418         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9419                                                chunk_rec->offset,
9420                                                chunk_rec->length);
9421         if (block_group_item) {
9422                 block_group_rec = container_of(block_group_item,
9423                                                struct block_group_record,
9424                                                cache);
9425                 if (chunk_rec->length != block_group_rec->offset ||
9426                     chunk_rec->offset != block_group_rec->objectid ||
9427                     (!metadump_v2 &&
9428                      chunk_rec->type_flags != block_group_rec->flags)) {
9429                         if (!silent)
9430                                 fprintf(stderr,
9431                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9432                                         chunk_rec->objectid,
9433                                         chunk_rec->type,
9434                                         chunk_rec->offset,
9435                                         chunk_rec->length,
9436                                         chunk_rec->offset,
9437                                         chunk_rec->type_flags,
9438                                         block_group_rec->objectid,
9439                                         block_group_rec->type,
9440                                         block_group_rec->offset,
9441                                         block_group_rec->offset,
9442                                         block_group_rec->objectid,
9443                                         block_group_rec->flags);
9444                         ret = -1;
9445                 } else {
9446                         list_del_init(&block_group_rec->list);
9447                         chunk_rec->bg_rec = block_group_rec;
9448                 }
9449         } else {
9450                 if (!silent)
9451                         fprintf(stderr,
9452                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9453                                 chunk_rec->objectid,
9454                                 chunk_rec->type,
9455                                 chunk_rec->offset,
9456                                 chunk_rec->length,
9457                                 chunk_rec->offset,
9458                                 chunk_rec->type_flags);
9459                 ret = 1;
9460         }
9461
9462         if (metadump_v2)
9463                 return ret;
9464
9465         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9466                                     chunk_rec->num_stripes);
9467         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9468                 devid = chunk_rec->stripes[i].devid;
9469                 offset = chunk_rec->stripes[i].offset;
9470                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9471                                                        devid, offset, length);
9472                 if (dev_extent_item) {
9473                         dev_extent_rec = container_of(dev_extent_item,
9474                                                 struct device_extent_record,
9475                                                 cache);
9476                         if (dev_extent_rec->objectid != devid ||
9477                             dev_extent_rec->offset != offset ||
9478                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9479                             dev_extent_rec->length != length) {
9480                                 if (!silent)
9481                                         fprintf(stderr,
9482                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9483                                                 chunk_rec->objectid,
9484                                                 chunk_rec->type,
9485                                                 chunk_rec->offset,
9486                                                 chunk_rec->stripes[i].devid,
9487                                                 chunk_rec->stripes[i].offset,
9488                                                 dev_extent_rec->objectid,
9489                                                 dev_extent_rec->offset,
9490                                                 dev_extent_rec->length);
9491                                 ret = -1;
9492                         } else {
9493                                 list_move(&dev_extent_rec->chunk_list,
9494                                           &chunk_rec->dextents);
9495                         }
9496                 } else {
9497                         if (!silent)
9498                                 fprintf(stderr,
9499                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9500                                         chunk_rec->objectid,
9501                                         chunk_rec->type,
9502                                         chunk_rec->offset,
9503                                         chunk_rec->stripes[i].devid,
9504                                         chunk_rec->stripes[i].offset);
9505                         ret = -1;
9506                 }
9507         }
9508         return ret;
9509 }
9510
9511 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9512 int check_chunks(struct cache_tree *chunk_cache,
9513                  struct block_group_tree *block_group_cache,
9514                  struct device_extent_tree *dev_extent_cache,
9515                  struct list_head *good, struct list_head *bad,
9516                  struct list_head *rebuild, int silent)
9517 {
9518         struct cache_extent *chunk_item;
9519         struct chunk_record *chunk_rec;
9520         struct block_group_record *bg_rec;
9521         struct device_extent_record *dext_rec;
9522         int err;
9523         int ret = 0;
9524
9525         chunk_item = first_cache_extent(chunk_cache);
9526         while (chunk_item) {
9527                 chunk_rec = container_of(chunk_item, struct chunk_record,
9528                                          cache);
9529                 err = check_chunk_refs(chunk_rec, block_group_cache,
9530                                        dev_extent_cache, silent);
9531                 if (err < 0)
9532                         ret = err;
9533                 if (err == 0 && good)
9534                         list_add_tail(&chunk_rec->list, good);
9535                 if (err > 0 && rebuild)
9536                         list_add_tail(&chunk_rec->list, rebuild);
9537                 if (err < 0 && bad)
9538                         list_add_tail(&chunk_rec->list, bad);
9539                 chunk_item = next_cache_extent(chunk_item);
9540         }
9541
9542         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9543                 if (!silent)
9544                         fprintf(stderr,
9545                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9546                                 bg_rec->objectid,
9547                                 bg_rec->offset,
9548                                 bg_rec->flags);
9549                 if (!ret)
9550                         ret = 1;
9551         }
9552
9553         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9554                             chunk_list) {
9555                 if (!silent)
9556                         fprintf(stderr,
9557                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9558                                 dext_rec->objectid,
9559                                 dext_rec->offset,
9560                                 dext_rec->length);
9561                 if (!ret)
9562                         ret = 1;
9563         }
9564         return ret;
9565 }
9566
9567
9568 static int check_device_used(struct device_record *dev_rec,
9569                              struct device_extent_tree *dext_cache)
9570 {
9571         struct cache_extent *cache;
9572         struct device_extent_record *dev_extent_rec;
9573         u64 total_byte = 0;
9574
9575         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9576         while (cache) {
9577                 dev_extent_rec = container_of(cache,
9578                                               struct device_extent_record,
9579                                               cache);
9580                 if (dev_extent_rec->objectid != dev_rec->devid)
9581                         break;
9582
9583                 list_del_init(&dev_extent_rec->device_list);
9584                 total_byte += dev_extent_rec->length;
9585                 cache = next_cache_extent(cache);
9586         }
9587
9588         if (total_byte != dev_rec->byte_used) {
9589                 fprintf(stderr,
9590                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9591                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9592                         dev_rec->type, dev_rec->offset);
9593                 return -1;
9594         } else {
9595                 return 0;
9596         }
9597 }
9598
9599 /* check btrfs_dev_item -> btrfs_dev_extent */
9600 static int check_devices(struct rb_root *dev_cache,
9601                          struct device_extent_tree *dev_extent_cache)
9602 {
9603         struct rb_node *dev_node;
9604         struct device_record *dev_rec;
9605         struct device_extent_record *dext_rec;
9606         int err;
9607         int ret = 0;
9608
9609         dev_node = rb_first(dev_cache);
9610         while (dev_node) {
9611                 dev_rec = container_of(dev_node, struct device_record, node);
9612                 err = check_device_used(dev_rec, dev_extent_cache);
9613                 if (err)
9614                         ret = err;
9615
9616                 dev_node = rb_next(dev_node);
9617         }
9618         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9619                             device_list) {
9620                 fprintf(stderr,
9621                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9622                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9623                 if (!ret)
9624                         ret = 1;
9625         }
9626         return ret;
9627 }
9628
9629 static int add_root_item_to_list(struct list_head *head,
9630                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9631                                   u8 level, u8 drop_level,
9632                                   int level_size, struct btrfs_key *drop_key)
9633 {
9634
9635         struct root_item_record *ri_rec;
9636         ri_rec = malloc(sizeof(*ri_rec));
9637         if (!ri_rec)
9638                 return -ENOMEM;
9639         ri_rec->bytenr = bytenr;
9640         ri_rec->objectid = objectid;
9641         ri_rec->level = level;
9642         ri_rec->level_size = level_size;
9643         ri_rec->drop_level = drop_level;
9644         ri_rec->last_snapshot = last_snapshot;
9645         if (drop_key)
9646                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9647         list_add_tail(&ri_rec->list, head);
9648
9649         return 0;
9650 }
9651
9652 static void free_root_item_list(struct list_head *list)
9653 {
9654         struct root_item_record *ri_rec;
9655
9656         while (!list_empty(list)) {
9657                 ri_rec = list_first_entry(list, struct root_item_record,
9658                                           list);
9659                 list_del_init(&ri_rec->list);
9660                 free(ri_rec);
9661         }
9662 }
9663
9664 static int deal_root_from_list(struct list_head *list,
9665                                struct btrfs_root *root,
9666                                struct block_info *bits,
9667                                int bits_nr,
9668                                struct cache_tree *pending,
9669                                struct cache_tree *seen,
9670                                struct cache_tree *reada,
9671                                struct cache_tree *nodes,
9672                                struct cache_tree *extent_cache,
9673                                struct cache_tree *chunk_cache,
9674                                struct rb_root *dev_cache,
9675                                struct block_group_tree *block_group_cache,
9676                                struct device_extent_tree *dev_extent_cache)
9677 {
9678         int ret = 0;
9679         u64 last;
9680
9681         while (!list_empty(list)) {
9682                 struct root_item_record *rec;
9683                 struct extent_buffer *buf;
9684                 rec = list_entry(list->next,
9685                                  struct root_item_record, list);
9686                 last = 0;
9687                 buf = read_tree_block(root->fs_info->tree_root,
9688                                       rec->bytenr, rec->level_size, 0);
9689                 if (!extent_buffer_uptodate(buf)) {
9690                         free_extent_buffer(buf);
9691                         ret = -EIO;
9692                         break;
9693                 }
9694                 ret = add_root_to_pending(buf, extent_cache, pending,
9695                                     seen, nodes, rec->objectid);
9696                 if (ret < 0)
9697                         break;
9698                 /*
9699                  * To rebuild extent tree, we need deal with snapshot
9700                  * one by one, otherwise we deal with node firstly which
9701                  * can maximize readahead.
9702                  */
9703                 while (1) {
9704                         ret = run_next_block(root, bits, bits_nr, &last,
9705                                              pending, seen, reada, nodes,
9706                                              extent_cache, chunk_cache,
9707                                              dev_cache, block_group_cache,
9708                                              dev_extent_cache, rec);
9709                         if (ret != 0)
9710                                 break;
9711                 }
9712                 free_extent_buffer(buf);
9713                 list_del(&rec->list);
9714                 free(rec);
9715                 if (ret < 0)
9716                         break;
9717         }
9718         while (ret >= 0) {
9719                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9720                                      reada, nodes, extent_cache, chunk_cache,
9721                                      dev_cache, block_group_cache,
9722                                      dev_extent_cache, NULL);
9723                 if (ret != 0) {
9724                         if (ret > 0)
9725                                 ret = 0;
9726                         break;
9727                 }
9728         }
9729         return ret;
9730 }
9731
9732 static int check_chunks_and_extents(struct btrfs_root *root)
9733 {
9734         struct rb_root dev_cache;
9735         struct cache_tree chunk_cache;
9736         struct block_group_tree block_group_cache;
9737         struct device_extent_tree dev_extent_cache;
9738         struct cache_tree extent_cache;
9739         struct cache_tree seen;
9740         struct cache_tree pending;
9741         struct cache_tree reada;
9742         struct cache_tree nodes;
9743         struct extent_io_tree excluded_extents;
9744         struct cache_tree corrupt_blocks;
9745         struct btrfs_path path;
9746         struct btrfs_key key;
9747         struct btrfs_key found_key;
9748         int ret, err = 0;
9749         struct block_info *bits;
9750         int bits_nr;
9751         struct extent_buffer *leaf;
9752         int slot;
9753         struct btrfs_root_item ri;
9754         struct list_head dropping_trees;
9755         struct list_head normal_trees;
9756         struct btrfs_root *root1;
9757         u64 objectid;
9758         u32 level_size;
9759         u8 level;
9760
9761         dev_cache = RB_ROOT;
9762         cache_tree_init(&chunk_cache);
9763         block_group_tree_init(&block_group_cache);
9764         device_extent_tree_init(&dev_extent_cache);
9765
9766         cache_tree_init(&extent_cache);
9767         cache_tree_init(&seen);
9768         cache_tree_init(&pending);
9769         cache_tree_init(&nodes);
9770         cache_tree_init(&reada);
9771         cache_tree_init(&corrupt_blocks);
9772         extent_io_tree_init(&excluded_extents);
9773         INIT_LIST_HEAD(&dropping_trees);
9774         INIT_LIST_HEAD(&normal_trees);
9775
9776         if (repair) {
9777                 root->fs_info->excluded_extents = &excluded_extents;
9778                 root->fs_info->fsck_extent_cache = &extent_cache;
9779                 root->fs_info->free_extent_hook = free_extent_hook;
9780                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9781         }
9782
9783         bits_nr = 1024;
9784         bits = malloc(bits_nr * sizeof(struct block_info));
9785         if (!bits) {
9786                 perror("malloc");
9787                 exit(1);
9788         }
9789
9790         if (ctx.progress_enabled) {
9791                 ctx.tp = TASK_EXTENTS;
9792                 task_start(ctx.info);
9793         }
9794
9795 again:
9796         root1 = root->fs_info->tree_root;
9797         level = btrfs_header_level(root1->node);
9798         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9799                                     root1->node->start, 0, level, 0,
9800                                     root1->nodesize, NULL);
9801         if (ret < 0)
9802                 goto out;
9803         root1 = root->fs_info->chunk_root;
9804         level = btrfs_header_level(root1->node);
9805         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9806                                     root1->node->start, 0, level, 0,
9807                                     root1->nodesize, NULL);
9808         if (ret < 0)
9809                 goto out;
9810         btrfs_init_path(&path);
9811         key.offset = 0;
9812         key.objectid = 0;
9813         key.type = BTRFS_ROOT_ITEM_KEY;
9814         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9815                                         &key, &path, 0, 0);
9816         if (ret < 0)
9817                 goto out;
9818         while(1) {
9819                 leaf = path.nodes[0];
9820                 slot = path.slots[0];
9821                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9822                         ret = btrfs_next_leaf(root, &path);
9823                         if (ret != 0)
9824                                 break;
9825                         leaf = path.nodes[0];
9826                         slot = path.slots[0];
9827                 }
9828                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9829                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9830                         unsigned long offset;
9831                         u64 last_snapshot;
9832
9833                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9834                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9835                         last_snapshot = btrfs_root_last_snapshot(&ri);
9836                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9837                                 level = btrfs_root_level(&ri);
9838                                 level_size = root->nodesize;
9839                                 ret = add_root_item_to_list(&normal_trees,
9840                                                 found_key.objectid,
9841                                                 btrfs_root_bytenr(&ri),
9842                                                 last_snapshot, level,
9843                                                 0, level_size, NULL);
9844                                 if (ret < 0)
9845                                         goto out;
9846                         } else {
9847                                 level = btrfs_root_level(&ri);
9848                                 level_size = root->nodesize;
9849                                 objectid = found_key.objectid;
9850                                 btrfs_disk_key_to_cpu(&found_key,
9851                                                       &ri.drop_progress);
9852                                 ret = add_root_item_to_list(&dropping_trees,
9853                                                 objectid,
9854                                                 btrfs_root_bytenr(&ri),
9855                                                 last_snapshot, level,
9856                                                 ri.drop_level,
9857                                                 level_size, &found_key);
9858                                 if (ret < 0)
9859                                         goto out;
9860                         }
9861                 }
9862                 path.slots[0]++;
9863         }
9864         btrfs_release_path(&path);
9865
9866         /*
9867          * check_block can return -EAGAIN if it fixes something, please keep
9868          * this in mind when dealing with return values from these functions, if
9869          * we get -EAGAIN we want to fall through and restart the loop.
9870          */
9871         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9872                                   &seen, &reada, &nodes, &extent_cache,
9873                                   &chunk_cache, &dev_cache, &block_group_cache,
9874                                   &dev_extent_cache);
9875         if (ret < 0) {
9876                 if (ret == -EAGAIN)
9877                         goto loop;
9878                 goto out;
9879         }
9880         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9881                                   &pending, &seen, &reada, &nodes,
9882                                   &extent_cache, &chunk_cache, &dev_cache,
9883                                   &block_group_cache, &dev_extent_cache);
9884         if (ret < 0) {
9885                 if (ret == -EAGAIN)
9886                         goto loop;
9887                 goto out;
9888         }
9889
9890         ret = check_chunks(&chunk_cache, &block_group_cache,
9891                            &dev_extent_cache, NULL, NULL, NULL, 0);
9892         if (ret) {
9893                 if (ret == -EAGAIN)
9894                         goto loop;
9895                 err = ret;
9896         }
9897
9898         ret = check_extent_refs(root, &extent_cache);
9899         if (ret < 0) {
9900                 if (ret == -EAGAIN)
9901                         goto loop;
9902                 goto out;
9903         }
9904
9905         ret = check_devices(&dev_cache, &dev_extent_cache);
9906         if (ret && err)
9907                 ret = err;
9908
9909 out:
9910         task_stop(ctx.info);
9911         if (repair) {
9912                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9913                 extent_io_tree_cleanup(&excluded_extents);
9914                 root->fs_info->fsck_extent_cache = NULL;
9915                 root->fs_info->free_extent_hook = NULL;
9916                 root->fs_info->corrupt_blocks = NULL;
9917                 root->fs_info->excluded_extents = NULL;
9918         }
9919         free(bits);
9920         free_chunk_cache_tree(&chunk_cache);
9921         free_device_cache_tree(&dev_cache);
9922         free_block_group_tree(&block_group_cache);
9923         free_device_extent_tree(&dev_extent_cache);
9924         free_extent_cache_tree(&seen);
9925         free_extent_cache_tree(&pending);
9926         free_extent_cache_tree(&reada);
9927         free_extent_cache_tree(&nodes);
9928         return ret;
9929 loop:
9930         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9931         free_extent_cache_tree(&seen);
9932         free_extent_cache_tree(&pending);
9933         free_extent_cache_tree(&reada);
9934         free_extent_cache_tree(&nodes);
9935         free_chunk_cache_tree(&chunk_cache);
9936         free_block_group_tree(&block_group_cache);
9937         free_device_cache_tree(&dev_cache);
9938         free_device_extent_tree(&dev_extent_cache);
9939         free_extent_record_cache(&extent_cache);
9940         free_root_item_list(&normal_trees);
9941         free_root_item_list(&dropping_trees);
9942         extent_io_tree_cleanup(&excluded_extents);
9943         goto again;
9944 }
9945
9946 /*
9947  * Check backrefs of a tree block given by @bytenr or @eb.
9948  *
9949  * @root:       the root containing the @bytenr or @eb
9950  * @eb:         tree block extent buffer, can be NULL
9951  * @bytenr:     bytenr of the tree block to search
9952  * @level:      tree level of the tree block
9953  * @owner:      owner of the tree block
9954  *
9955  * Return >0 for any error found and output error message
9956  * Return 0 for no error found
9957  */
9958 static int check_tree_block_ref(struct btrfs_root *root,
9959                                 struct extent_buffer *eb, u64 bytenr,
9960                                 int level, u64 owner)
9961 {
9962         struct btrfs_key key;
9963         struct btrfs_root *extent_root = root->fs_info->extent_root;
9964         struct btrfs_path path;
9965         struct btrfs_extent_item *ei;
9966         struct btrfs_extent_inline_ref *iref;
9967         struct extent_buffer *leaf;
9968         unsigned long end;
9969         unsigned long ptr;
9970         int slot;
9971         int skinny_level;
9972         int type;
9973         u32 nodesize = root->nodesize;
9974         u32 item_size;
9975         u64 offset;
9976         int tree_reloc_root = 0;
9977         int found_ref = 0;
9978         int err = 0;
9979         int ret;
9980
9981         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9982             btrfs_header_bytenr(root->node) == bytenr)
9983                 tree_reloc_root = 1;
9984
9985         btrfs_init_path(&path);
9986         key.objectid = bytenr;
9987         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9988                 key.type = BTRFS_METADATA_ITEM_KEY;
9989         else
9990                 key.type = BTRFS_EXTENT_ITEM_KEY;
9991         key.offset = (u64)-1;
9992
9993         /* Search for the backref in extent tree */
9994         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9995         if (ret < 0) {
9996                 err |= BACKREF_MISSING;
9997                 goto out;
9998         }
9999         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10000         if (ret) {
10001                 err |= BACKREF_MISSING;
10002                 goto out;
10003         }
10004
10005         leaf = path.nodes[0];
10006         slot = path.slots[0];
10007         btrfs_item_key_to_cpu(leaf, &key, slot);
10008
10009         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10010
10011         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10012                 skinny_level = (int)key.offset;
10013                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10014         } else {
10015                 struct btrfs_tree_block_info *info;
10016
10017                 info = (struct btrfs_tree_block_info *)(ei + 1);
10018                 skinny_level = btrfs_tree_block_level(leaf, info);
10019                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10020         }
10021
10022         if (eb) {
10023                 u64 header_gen;
10024                 u64 extent_gen;
10025
10026                 if (!(btrfs_extent_flags(leaf, ei) &
10027                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10028                         error(
10029                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10030                                 key.objectid, nodesize,
10031                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10032                         err = BACKREF_MISMATCH;
10033                 }
10034                 header_gen = btrfs_header_generation(eb);
10035                 extent_gen = btrfs_extent_generation(leaf, ei);
10036                 if (header_gen != extent_gen) {
10037                         error(
10038         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10039                                 key.objectid, nodesize, header_gen,
10040                                 extent_gen);
10041                         err = BACKREF_MISMATCH;
10042                 }
10043                 if (level != skinny_level) {
10044                         error(
10045                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10046                                 key.objectid, nodesize, level, skinny_level);
10047                         err = BACKREF_MISMATCH;
10048                 }
10049                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10050                         error(
10051                         "extent[%llu %u] is referred by other roots than %llu",
10052                                 key.objectid, nodesize, root->objectid);
10053                         err = BACKREF_MISMATCH;
10054                 }
10055         }
10056
10057         /*
10058          * Iterate the extent/metadata item to find the exact backref
10059          */
10060         item_size = btrfs_item_size_nr(leaf, slot);
10061         ptr = (unsigned long)iref;
10062         end = (unsigned long)ei + item_size;
10063         while (ptr < end) {
10064                 iref = (struct btrfs_extent_inline_ref *)ptr;
10065                 type = btrfs_extent_inline_ref_type(leaf, iref);
10066                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10067
10068                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10069                         (offset == root->objectid || offset == owner)) {
10070                         found_ref = 1;
10071                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10072                         /*
10073                          * Backref of tree reloc root points to itself, no need
10074                          * to check backref any more.
10075                          */
10076                         if (tree_reloc_root)
10077                                 found_ref = 1;
10078                         else
10079                         /* Check if the backref points to valid referencer */
10080                                 found_ref = !check_tree_block_ref(root, NULL,
10081                                                 offset, level + 1, owner);
10082                 }
10083
10084                 if (found_ref)
10085                         break;
10086                 ptr += btrfs_extent_inline_ref_size(type);
10087         }
10088
10089         /*
10090          * Inlined extent item doesn't have what we need, check
10091          * TREE_BLOCK_REF_KEY
10092          */
10093         if (!found_ref) {
10094                 btrfs_release_path(&path);
10095                 key.objectid = bytenr;
10096                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10097                 key.offset = root->objectid;
10098
10099                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10100                 if (!ret)
10101                         found_ref = 1;
10102         }
10103         if (!found_ref)
10104                 err |= BACKREF_MISSING;
10105 out:
10106         btrfs_release_path(&path);
10107         if (eb && (err & BACKREF_MISSING))
10108                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10109                         bytenr, nodesize, owner, level);
10110         return err;
10111 }
10112
10113 /*
10114  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10115  *
10116  * Return >0 any error found and output error message
10117  * Return 0 for no error found
10118  */
10119 static int check_extent_data_item(struct btrfs_root *root,
10120                                   struct extent_buffer *eb, int slot)
10121 {
10122         struct btrfs_file_extent_item *fi;
10123         struct btrfs_path path;
10124         struct btrfs_root *extent_root = root->fs_info->extent_root;
10125         struct btrfs_key fi_key;
10126         struct btrfs_key dbref_key;
10127         struct extent_buffer *leaf;
10128         struct btrfs_extent_item *ei;
10129         struct btrfs_extent_inline_ref *iref;
10130         struct btrfs_extent_data_ref *dref;
10131         u64 owner;
10132         u64 disk_bytenr;
10133         u64 disk_num_bytes;
10134         u64 extent_num_bytes;
10135         u64 extent_flags;
10136         u32 item_size;
10137         unsigned long end;
10138         unsigned long ptr;
10139         int type;
10140         u64 ref_root;
10141         int found_dbackref = 0;
10142         int err = 0;
10143         int ret;
10144
10145         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10146         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10147
10148         /* Nothing to check for hole and inline data extents */
10149         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10150             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10151                 return 0;
10152
10153         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10154         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10155         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10156
10157         /* Check unaligned disk_num_bytes and num_bytes */
10158         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10159                 error(
10160 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10161                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10162                         root->sectorsize);
10163                 err |= BYTES_UNALIGNED;
10164         } else {
10165                 data_bytes_allocated += disk_num_bytes;
10166         }
10167         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10168                 error(
10169 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10170                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10171                         root->sectorsize);
10172                 err |= BYTES_UNALIGNED;
10173         } else {
10174                 data_bytes_referenced += extent_num_bytes;
10175         }
10176         owner = btrfs_header_owner(eb);
10177
10178         /* Check the extent item of the file extent in extent tree */
10179         btrfs_init_path(&path);
10180         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10181         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10182         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10183
10184         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10185         if (ret) {
10186                 err |= BACKREF_MISSING;
10187                 goto error;
10188         }
10189
10190         leaf = path.nodes[0];
10191         slot = path.slots[0];
10192         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10193
10194         extent_flags = btrfs_extent_flags(leaf, ei);
10195
10196         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10197                 error(
10198                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10199                     disk_bytenr, disk_num_bytes,
10200                     BTRFS_EXTENT_FLAG_DATA);
10201                 err |= BACKREF_MISMATCH;
10202         }
10203
10204         /* Check data backref inside that extent item */
10205         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10206         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10207         ptr = (unsigned long)iref;
10208         end = (unsigned long)ei + item_size;
10209         while (ptr < end) {
10210                 iref = (struct btrfs_extent_inline_ref *)ptr;
10211                 type = btrfs_extent_inline_ref_type(leaf, iref);
10212                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10213
10214                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10215                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10216                         if (ref_root == owner || ref_root == root->objectid)
10217                                 found_dbackref = 1;
10218                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10219                         found_dbackref = !check_tree_block_ref(root, NULL,
10220                                 btrfs_extent_inline_ref_offset(leaf, iref),
10221                                 0, owner);
10222                 }
10223
10224                 if (found_dbackref)
10225                         break;
10226                 ptr += btrfs_extent_inline_ref_size(type);
10227         }
10228
10229         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10230         if (!found_dbackref) {
10231                 btrfs_release_path(&path);
10232
10233                 btrfs_init_path(&path);
10234                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10235                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10236                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10237                                 fi_key.objectid, fi_key.offset);
10238
10239                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10240                                         &dbref_key, &path, 0, 0);
10241                 if (!ret)
10242                         found_dbackref = 1;
10243         }
10244
10245         if (!found_dbackref)
10246                 err |= BACKREF_MISSING;
10247 error:
10248         btrfs_release_path(&path);
10249         if (err & BACKREF_MISSING) {
10250                 error("data extent[%llu %llu] backref lost",
10251                       disk_bytenr, disk_num_bytes);
10252         }
10253         return err;
10254 }
10255
10256 /*
10257  * Get real tree block level for the case like shared block
10258  * Return >= 0 as tree level
10259  * Return <0 for error
10260  */
10261 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10262 {
10263         struct extent_buffer *eb;
10264         struct btrfs_path path;
10265         struct btrfs_key key;
10266         struct btrfs_extent_item *ei;
10267         u64 flags;
10268         u64 transid;
10269         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10270         u8 backref_level;
10271         u8 header_level;
10272         int ret;
10273
10274         /* Search extent tree for extent generation and level */
10275         key.objectid = bytenr;
10276         key.type = BTRFS_METADATA_ITEM_KEY;
10277         key.offset = (u64)-1;
10278
10279         btrfs_init_path(&path);
10280         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10281         if (ret < 0)
10282                 goto release_out;
10283         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10284         if (ret < 0)
10285                 goto release_out;
10286         if (ret > 0) {
10287                 ret = -ENOENT;
10288                 goto release_out;
10289         }
10290
10291         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10292         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10293                             struct btrfs_extent_item);
10294         flags = btrfs_extent_flags(path.nodes[0], ei);
10295         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10296                 ret = -ENOENT;
10297                 goto release_out;
10298         }
10299
10300         /* Get transid for later read_tree_block() check */
10301         transid = btrfs_extent_generation(path.nodes[0], ei);
10302
10303         /* Get backref level as one source */
10304         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10305                 backref_level = key.offset;
10306         } else {
10307                 struct btrfs_tree_block_info *info;
10308
10309                 info = (struct btrfs_tree_block_info *)(ei + 1);
10310                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10311         }
10312         btrfs_release_path(&path);
10313
10314         /* Get level from tree block as an alternative source */
10315         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10316         if (!extent_buffer_uptodate(eb)) {
10317                 free_extent_buffer(eb);
10318                 return -EIO;
10319         }
10320         header_level = btrfs_header_level(eb);
10321         free_extent_buffer(eb);
10322
10323         if (header_level != backref_level)
10324                 return -EIO;
10325         return header_level;
10326
10327 release_out:
10328         btrfs_release_path(&path);
10329         return ret;
10330 }
10331
10332 /*
10333  * Check if a tree block backref is valid (points to a valid tree block)
10334  * if level == -1, level will be resolved
10335  * Return >0 for any error found and print error message
10336  */
10337 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10338                                     u64 bytenr, int level)
10339 {
10340         struct btrfs_root *root;
10341         struct btrfs_key key;
10342         struct btrfs_path path;
10343         struct extent_buffer *eb;
10344         struct extent_buffer *node;
10345         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10346         int err = 0;
10347         int ret;
10348
10349         /* Query level for level == -1 special case */
10350         if (level == -1)
10351                 level = query_tree_block_level(fs_info, bytenr);
10352         if (level < 0) {
10353                 err |= REFERENCER_MISSING;
10354                 goto out;
10355         }
10356
10357         key.objectid = root_id;
10358         key.type = BTRFS_ROOT_ITEM_KEY;
10359         key.offset = (u64)-1;
10360
10361         root = btrfs_read_fs_root(fs_info, &key);
10362         if (IS_ERR(root)) {
10363                 err |= REFERENCER_MISSING;
10364                 goto out;
10365         }
10366
10367         /* Read out the tree block to get item/node key */
10368         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10369         if (!extent_buffer_uptodate(eb)) {
10370                 err |= REFERENCER_MISSING;
10371                 free_extent_buffer(eb);
10372                 goto out;
10373         }
10374
10375         /* Empty tree, no need to check key */
10376         if (!btrfs_header_nritems(eb) && !level) {
10377                 free_extent_buffer(eb);
10378                 goto out;
10379         }
10380
10381         if (level)
10382                 btrfs_node_key_to_cpu(eb, &key, 0);
10383         else
10384                 btrfs_item_key_to_cpu(eb, &key, 0);
10385
10386         free_extent_buffer(eb);
10387
10388         btrfs_init_path(&path);
10389         path.lowest_level = level;
10390         /* Search with the first key, to ensure we can reach it */
10391         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10392         if (ret < 0) {
10393                 err |= REFERENCER_MISSING;
10394                 goto release_out;
10395         }
10396
10397         node = path.nodes[level];
10398         if (btrfs_header_bytenr(node) != bytenr) {
10399                 error(
10400         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10401                         bytenr, nodesize, bytenr,
10402                         btrfs_header_bytenr(node));
10403                 err |= REFERENCER_MISMATCH;
10404         }
10405         if (btrfs_header_level(node) != level) {
10406                 error(
10407         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10408                         bytenr, nodesize, level,
10409                         btrfs_header_level(node));
10410                 err |= REFERENCER_MISMATCH;
10411         }
10412
10413 release_out:
10414         btrfs_release_path(&path);
10415 out:
10416         if (err & REFERENCER_MISSING) {
10417                 if (level < 0)
10418                         error("extent [%llu %d] lost referencer (owner: %llu)",
10419                                 bytenr, nodesize, root_id);
10420                 else
10421                         error(
10422                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10423                                 bytenr, nodesize, root_id, level);
10424         }
10425
10426         return err;
10427 }
10428
10429 /*
10430  * Check if tree block @eb is tree reloc root.
10431  * Return 0 if it's not or any problem happens
10432  * Return 1 if it's a tree reloc root
10433  */
10434 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10435                                  struct extent_buffer *eb)
10436 {
10437         struct btrfs_root *tree_reloc_root;
10438         struct btrfs_key key;
10439         u64 bytenr = btrfs_header_bytenr(eb);
10440         u64 owner = btrfs_header_owner(eb);
10441         int ret = 0;
10442
10443         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10444         key.offset = owner;
10445         key.type = BTRFS_ROOT_ITEM_KEY;
10446
10447         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10448         if (IS_ERR(tree_reloc_root))
10449                 return 0;
10450
10451         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10452                 ret = 1;
10453         btrfs_free_fs_root(tree_reloc_root);
10454         return ret;
10455 }
10456
10457 /*
10458  * Check referencer for shared block backref
10459  * If level == -1, this function will resolve the level.
10460  */
10461 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10462                                      u64 parent, u64 bytenr, int level)
10463 {
10464         struct extent_buffer *eb;
10465         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10466         u32 nr;
10467         int found_parent = 0;
10468         int i;
10469
10470         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10471         if (!extent_buffer_uptodate(eb))
10472                 goto out;
10473
10474         if (level == -1)
10475                 level = query_tree_block_level(fs_info, bytenr);
10476         if (level < 0)
10477                 goto out;
10478
10479         /* It's possible it's a tree reloc root */
10480         if (parent == bytenr) {
10481                 if (is_tree_reloc_root(fs_info, eb))
10482                         found_parent = 1;
10483                 goto out;
10484         }
10485
10486         if (level + 1 != btrfs_header_level(eb))
10487                 goto out;
10488
10489         nr = btrfs_header_nritems(eb);
10490         for (i = 0; i < nr; i++) {
10491                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10492                         found_parent = 1;
10493                         break;
10494                 }
10495         }
10496 out:
10497         free_extent_buffer(eb);
10498         if (!found_parent) {
10499                 error(
10500         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10501                         bytenr, nodesize, parent, level);
10502                 return REFERENCER_MISSING;
10503         }
10504         return 0;
10505 }
10506
10507 /*
10508  * Check referencer for normal (inlined) data ref
10509  * If len == 0, it will be resolved by searching in extent tree
10510  */
10511 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10512                                      u64 root_id, u64 objectid, u64 offset,
10513                                      u64 bytenr, u64 len, u32 count)
10514 {
10515         struct btrfs_root *root;
10516         struct btrfs_root *extent_root = fs_info->extent_root;
10517         struct btrfs_key key;
10518         struct btrfs_path path;
10519         struct extent_buffer *leaf;
10520         struct btrfs_file_extent_item *fi;
10521         u32 found_count = 0;
10522         int slot;
10523         int ret = 0;
10524
10525         if (!len) {
10526                 key.objectid = bytenr;
10527                 key.type = BTRFS_EXTENT_ITEM_KEY;
10528                 key.offset = (u64)-1;
10529
10530                 btrfs_init_path(&path);
10531                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10532                 if (ret < 0)
10533                         goto out;
10534                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10535                 if (ret)
10536                         goto out;
10537                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10538                 if (key.objectid != bytenr ||
10539                     key.type != BTRFS_EXTENT_ITEM_KEY)
10540                         goto out;
10541                 len = key.offset;
10542                 btrfs_release_path(&path);
10543         }
10544         key.objectid = root_id;
10545         key.type = BTRFS_ROOT_ITEM_KEY;
10546         key.offset = (u64)-1;
10547         btrfs_init_path(&path);
10548
10549         root = btrfs_read_fs_root(fs_info, &key);
10550         if (IS_ERR(root))
10551                 goto out;
10552
10553         key.objectid = objectid;
10554         key.type = BTRFS_EXTENT_DATA_KEY;
10555         /*
10556          * It can be nasty as data backref offset is
10557          * file offset - file extent offset, which is smaller or
10558          * equal to original backref offset.  The only special case is
10559          * overflow.  So we need to special check and do further search.
10560          */
10561         key.offset = offset & (1ULL << 63) ? 0 : offset;
10562
10563         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10564         if (ret < 0)
10565                 goto out;
10566
10567         /*
10568          * Search afterwards to get correct one
10569          * NOTE: As we must do a comprehensive check on the data backref to
10570          * make sure the dref count also matches, we must iterate all file
10571          * extents for that inode.
10572          */
10573         while (1) {
10574                 leaf = path.nodes[0];
10575                 slot = path.slots[0];
10576
10577                 btrfs_item_key_to_cpu(leaf, &key, slot);
10578                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10579                         break;
10580                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10581                 /*
10582                  * Except normal disk bytenr and disk num bytes, we still
10583                  * need to do extra check on dbackref offset as
10584                  * dbackref offset = file_offset - file_extent_offset
10585                  */
10586                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10587                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10588                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10589                     offset)
10590                         found_count++;
10591
10592                 ret = btrfs_next_item(root, &path);
10593                 if (ret)
10594                         break;
10595         }
10596 out:
10597         btrfs_release_path(&path);
10598         if (found_count != count) {
10599                 error(
10600 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10601                         bytenr, len, root_id, objectid, offset, count, found_count);
10602                 return REFERENCER_MISSING;
10603         }
10604         return 0;
10605 }
10606
10607 /*
10608  * Check if the referencer of a shared data backref exists
10609  */
10610 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10611                                      u64 parent, u64 bytenr)
10612 {
10613         struct extent_buffer *eb;
10614         struct btrfs_key key;
10615         struct btrfs_file_extent_item *fi;
10616         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10617         u32 nr;
10618         int found_parent = 0;
10619         int i;
10620
10621         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10622         if (!extent_buffer_uptodate(eb))
10623                 goto out;
10624
10625         nr = btrfs_header_nritems(eb);
10626         for (i = 0; i < nr; i++) {
10627                 btrfs_item_key_to_cpu(eb, &key, i);
10628                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10629                         continue;
10630
10631                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10632                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10633                         continue;
10634
10635                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10636                         found_parent = 1;
10637                         break;
10638                 }
10639         }
10640
10641 out:
10642         free_extent_buffer(eb);
10643         if (!found_parent) {
10644                 error("shared extent %llu referencer lost (parent: %llu)",
10645                         bytenr, parent);
10646                 return REFERENCER_MISSING;
10647         }
10648         return 0;
10649 }
10650
10651 /*
10652  * This function will check a given extent item, including its backref and
10653  * itself (like crossing stripe boundary and type)
10654  *
10655  * Since we don't use extent_record anymore, introduce new error bit
10656  */
10657 static int check_extent_item(struct btrfs_fs_info *fs_info,
10658                              struct extent_buffer *eb, int slot)
10659 {
10660         struct btrfs_extent_item *ei;
10661         struct btrfs_extent_inline_ref *iref;
10662         struct btrfs_extent_data_ref *dref;
10663         unsigned long end;
10664         unsigned long ptr;
10665         int type;
10666         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10667         u32 item_size = btrfs_item_size_nr(eb, slot);
10668         u64 flags;
10669         u64 offset;
10670         int metadata = 0;
10671         int level;
10672         struct btrfs_key key;
10673         int ret;
10674         int err = 0;
10675
10676         btrfs_item_key_to_cpu(eb, &key, slot);
10677         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10678                 bytes_used += key.offset;
10679         else
10680                 bytes_used += nodesize;
10681
10682         if (item_size < sizeof(*ei)) {
10683                 /*
10684                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10685                  * old thing when on disk format is still un-determined.
10686                  * No need to care about it anymore
10687                  */
10688                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10689                 return -ENOTTY;
10690         }
10691
10692         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10693         flags = btrfs_extent_flags(eb, ei);
10694
10695         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10696                 metadata = 1;
10697         if (metadata && check_crossing_stripes(global_info, key.objectid,
10698                                                eb->len)) {
10699                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10700                       key.objectid, key.objectid + nodesize);
10701                 err |= CROSSING_STRIPE_BOUNDARY;
10702         }
10703
10704         ptr = (unsigned long)(ei + 1);
10705
10706         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10707                 /* Old EXTENT_ITEM metadata */
10708                 struct btrfs_tree_block_info *info;
10709
10710                 info = (struct btrfs_tree_block_info *)ptr;
10711                 level = btrfs_tree_block_level(eb, info);
10712                 ptr += sizeof(struct btrfs_tree_block_info);
10713         } else {
10714                 /* New METADATA_ITEM */
10715                 level = key.offset;
10716         }
10717         end = (unsigned long)ei + item_size;
10718
10719         if (ptr >= end) {
10720                 err |= ITEM_SIZE_MISMATCH;
10721                 goto out;
10722         }
10723
10724         /* Now check every backref in this extent item */
10725 next:
10726         iref = (struct btrfs_extent_inline_ref *)ptr;
10727         type = btrfs_extent_inline_ref_type(eb, iref);
10728         offset = btrfs_extent_inline_ref_offset(eb, iref);
10729         switch (type) {
10730         case BTRFS_TREE_BLOCK_REF_KEY:
10731                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10732                                                level);
10733                 err |= ret;
10734                 break;
10735         case BTRFS_SHARED_BLOCK_REF_KEY:
10736                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10737                                                  level);
10738                 err |= ret;
10739                 break;
10740         case BTRFS_EXTENT_DATA_REF_KEY:
10741                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10742                 ret = check_extent_data_backref(fs_info,
10743                                 btrfs_extent_data_ref_root(eb, dref),
10744                                 btrfs_extent_data_ref_objectid(eb, dref),
10745                                 btrfs_extent_data_ref_offset(eb, dref),
10746                                 key.objectid, key.offset,
10747                                 btrfs_extent_data_ref_count(eb, dref));
10748                 err |= ret;
10749                 break;
10750         case BTRFS_SHARED_DATA_REF_KEY:
10751                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10752                 err |= ret;
10753                 break;
10754         default:
10755                 error("extent[%llu %d %llu] has unknown ref type: %d",
10756                         key.objectid, key.type, key.offset, type);
10757                 err |= UNKNOWN_TYPE;
10758                 goto out;
10759         }
10760
10761         ptr += btrfs_extent_inline_ref_size(type);
10762         if (ptr < end)
10763                 goto next;
10764
10765 out:
10766         return err;
10767 }
10768
10769 /*
10770  * Check if a dev extent item is referred correctly by its chunk
10771  */
10772 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10773                                  struct extent_buffer *eb, int slot)
10774 {
10775         struct btrfs_root *chunk_root = fs_info->chunk_root;
10776         struct btrfs_dev_extent *ptr;
10777         struct btrfs_path path;
10778         struct btrfs_key chunk_key;
10779         struct btrfs_key devext_key;
10780         struct btrfs_chunk *chunk;
10781         struct extent_buffer *l;
10782         int num_stripes;
10783         u64 length;
10784         int i;
10785         int found_chunk = 0;
10786         int ret;
10787
10788         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10789         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10790         length = btrfs_dev_extent_length(eb, ptr);
10791
10792         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10793         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10794         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10795
10796         btrfs_init_path(&path);
10797         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10798         if (ret)
10799                 goto out;
10800
10801         l = path.nodes[0];
10802         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10803         if (btrfs_chunk_length(l, chunk) != length)
10804                 goto out;
10805
10806         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10807         for (i = 0; i < num_stripes; i++) {
10808                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10809                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10810
10811                 if (devid == devext_key.objectid &&
10812                     offset == devext_key.offset) {
10813                         found_chunk = 1;
10814                         break;
10815                 }
10816         }
10817 out:
10818         btrfs_release_path(&path);
10819         if (!found_chunk) {
10820                 error(
10821                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10822                         devext_key.objectid, devext_key.offset, length);
10823                 return REFERENCER_MISSING;
10824         }
10825         return 0;
10826 }
10827
10828 /*
10829  * Check if the used space is correct with the dev item
10830  */
10831 static int check_dev_item(struct btrfs_fs_info *fs_info,
10832                           struct extent_buffer *eb, int slot)
10833 {
10834         struct btrfs_root *dev_root = fs_info->dev_root;
10835         struct btrfs_dev_item *dev_item;
10836         struct btrfs_path path;
10837         struct btrfs_key key;
10838         struct btrfs_dev_extent *ptr;
10839         u64 dev_id;
10840         u64 used;
10841         u64 total = 0;
10842         int ret;
10843
10844         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10845         dev_id = btrfs_device_id(eb, dev_item);
10846         used = btrfs_device_bytes_used(eb, dev_item);
10847
10848         key.objectid = dev_id;
10849         key.type = BTRFS_DEV_EXTENT_KEY;
10850         key.offset = 0;
10851
10852         btrfs_init_path(&path);
10853         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10854         if (ret < 0) {
10855                 btrfs_item_key_to_cpu(eb, &key, slot);
10856                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10857                         key.objectid, key.type, key.offset);
10858                 btrfs_release_path(&path);
10859                 return REFERENCER_MISSING;
10860         }
10861
10862         /* Iterate dev_extents to calculate the used space of a device */
10863         while (1) {
10864                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10865
10866                 if (key.objectid > dev_id)
10867                         break;
10868                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10869                         goto next;
10870
10871                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10872                                      struct btrfs_dev_extent);
10873                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10874 next:
10875                 ret = btrfs_next_item(dev_root, &path);
10876                 if (ret)
10877                         break;
10878         }
10879         btrfs_release_path(&path);
10880
10881         if (used != total) {
10882                 btrfs_item_key_to_cpu(eb, &key, slot);
10883                 error(
10884 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10885                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10886                         BTRFS_DEV_EXTENT_KEY, dev_id);
10887                 return ACCOUNTING_MISMATCH;
10888         }
10889         return 0;
10890 }
10891
10892 /*
10893  * Check a block group item with its referener (chunk) and its used space
10894  * with extent/metadata item
10895  */
10896 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10897                                   struct extent_buffer *eb, int slot)
10898 {
10899         struct btrfs_root *extent_root = fs_info->extent_root;
10900         struct btrfs_root *chunk_root = fs_info->chunk_root;
10901         struct btrfs_block_group_item *bi;
10902         struct btrfs_block_group_item bg_item;
10903         struct btrfs_path path;
10904         struct btrfs_key bg_key;
10905         struct btrfs_key chunk_key;
10906         struct btrfs_key extent_key;
10907         struct btrfs_chunk *chunk;
10908         struct extent_buffer *leaf;
10909         struct btrfs_extent_item *ei;
10910         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10911         u64 flags;
10912         u64 bg_flags;
10913         u64 used;
10914         u64 total = 0;
10915         int ret;
10916         int err = 0;
10917
10918         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10919         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10920         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10921         used = btrfs_block_group_used(&bg_item);
10922         bg_flags = btrfs_block_group_flags(&bg_item);
10923
10924         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10925         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10926         chunk_key.offset = bg_key.objectid;
10927
10928         btrfs_init_path(&path);
10929         /* Search for the referencer chunk */
10930         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10931         if (ret) {
10932                 error(
10933                 "block group[%llu %llu] did not find the related chunk item",
10934                         bg_key.objectid, bg_key.offset);
10935                 err |= REFERENCER_MISSING;
10936         } else {
10937                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10938                                         struct btrfs_chunk);
10939                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10940                                                 bg_key.offset) {
10941                         error(
10942         "block group[%llu %llu] related chunk item length does not match",
10943                                 bg_key.objectid, bg_key.offset);
10944                         err |= REFERENCER_MISMATCH;
10945                 }
10946         }
10947         btrfs_release_path(&path);
10948
10949         /* Search from the block group bytenr */
10950         extent_key.objectid = bg_key.objectid;
10951         extent_key.type = 0;
10952         extent_key.offset = 0;
10953
10954         btrfs_init_path(&path);
10955         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10956         if (ret < 0)
10957                 goto out;
10958
10959         /* Iterate extent tree to account used space */
10960         while (1) {
10961                 leaf = path.nodes[0];
10962                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10963                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10964                         break;
10965
10966                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10967                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10968                         goto next;
10969                 if (extent_key.objectid < bg_key.objectid)
10970                         goto next;
10971
10972                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10973                         total += nodesize;
10974                 else
10975                         total += extent_key.offset;
10976
10977                 ei = btrfs_item_ptr(leaf, path.slots[0],
10978                                     struct btrfs_extent_item);
10979                 flags = btrfs_extent_flags(leaf, ei);
10980                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10981                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10982                                 error(
10983                         "bad extent[%llu, %llu) type mismatch with chunk",
10984                                         extent_key.objectid,
10985                                         extent_key.objectid + extent_key.offset);
10986                                 err |= CHUNK_TYPE_MISMATCH;
10987                         }
10988                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10989                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10990                                     BTRFS_BLOCK_GROUP_METADATA))) {
10991                                 error(
10992                         "bad extent[%llu, %llu) type mismatch with chunk",
10993                                         extent_key.objectid,
10994                                         extent_key.objectid + nodesize);
10995                                 err |= CHUNK_TYPE_MISMATCH;
10996                         }
10997                 }
10998 next:
10999                 ret = btrfs_next_item(extent_root, &path);
11000                 if (ret)
11001                         break;
11002         }
11003
11004 out:
11005         btrfs_release_path(&path);
11006
11007         if (total != used) {
11008                 error(
11009                 "block group[%llu %llu] used %llu but extent items used %llu",
11010                         bg_key.objectid, bg_key.offset, used, total);
11011                 err |= ACCOUNTING_MISMATCH;
11012         }
11013         return err;
11014 }
11015
11016 /*
11017  * Check a chunk item.
11018  * Including checking all referred dev_extents and block group
11019  */
11020 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11021                             struct extent_buffer *eb, int slot)
11022 {
11023         struct btrfs_root *extent_root = fs_info->extent_root;
11024         struct btrfs_root *dev_root = fs_info->dev_root;
11025         struct btrfs_path path;
11026         struct btrfs_key chunk_key;
11027         struct btrfs_key bg_key;
11028         struct btrfs_key devext_key;
11029         struct btrfs_chunk *chunk;
11030         struct extent_buffer *leaf;
11031         struct btrfs_block_group_item *bi;
11032         struct btrfs_block_group_item bg_item;
11033         struct btrfs_dev_extent *ptr;
11034         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11035         u64 length;
11036         u64 chunk_end;
11037         u64 type;
11038         u64 profile;
11039         int num_stripes;
11040         u64 offset;
11041         u64 objectid;
11042         int i;
11043         int ret;
11044         int err = 0;
11045
11046         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11047         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11048         length = btrfs_chunk_length(eb, chunk);
11049         chunk_end = chunk_key.offset + length;
11050         if (!IS_ALIGNED(length, sectorsize)) {
11051                 error("chunk[%llu %llu) not aligned to %u",
11052                         chunk_key.offset, chunk_end, sectorsize);
11053                 err |= BYTES_UNALIGNED;
11054                 goto out;
11055         }
11056
11057         type = btrfs_chunk_type(eb, chunk);
11058         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11059         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11060                 error("chunk[%llu %llu) has no chunk type",
11061                         chunk_key.offset, chunk_end);
11062                 err |= UNKNOWN_TYPE;
11063         }
11064         if (profile && (profile & (profile - 1))) {
11065                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11066                         chunk_key.offset, chunk_end, profile);
11067                 err |= UNKNOWN_TYPE;
11068         }
11069
11070         bg_key.objectid = chunk_key.offset;
11071         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11072         bg_key.offset = length;
11073
11074         btrfs_init_path(&path);
11075         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11076         if (ret) {
11077                 error(
11078                 "chunk[%llu %llu) did not find the related block group item",
11079                         chunk_key.offset, chunk_end);
11080                 err |= REFERENCER_MISSING;
11081         } else{
11082                 leaf = path.nodes[0];
11083                 bi = btrfs_item_ptr(leaf, path.slots[0],
11084                                     struct btrfs_block_group_item);
11085                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11086                                    sizeof(bg_item));
11087                 if (btrfs_block_group_flags(&bg_item) != type) {
11088                         error(
11089 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11090                                 chunk_key.offset, chunk_end, type,
11091                                 btrfs_block_group_flags(&bg_item));
11092                         err |= REFERENCER_MISSING;
11093                 }
11094         }
11095
11096         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11097         for (i = 0; i < num_stripes; i++) {
11098                 btrfs_release_path(&path);
11099                 btrfs_init_path(&path);
11100                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11101                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11102                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11103
11104                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11105                                         0, 0);
11106                 if (ret)
11107                         goto not_match_dev;
11108
11109                 leaf = path.nodes[0];
11110                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11111                                      struct btrfs_dev_extent);
11112                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11113                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11114                 if (objectid != chunk_key.objectid ||
11115                     offset != chunk_key.offset ||
11116                     btrfs_dev_extent_length(leaf, ptr) != length)
11117                         goto not_match_dev;
11118                 continue;
11119 not_match_dev:
11120                 err |= BACKREF_MISSING;
11121                 error(
11122                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11123                         chunk_key.objectid, chunk_end, i);
11124                 continue;
11125         }
11126         btrfs_release_path(&path);
11127 out:
11128         return err;
11129 }
11130
11131 /*
11132  * Main entry function to check known items and update related accounting info
11133  */
11134 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11135 {
11136         struct btrfs_fs_info *fs_info = root->fs_info;
11137         struct btrfs_key key;
11138         int slot = 0;
11139         int type;
11140         struct btrfs_extent_data_ref *dref;
11141         int ret;
11142         int err = 0;
11143
11144 next:
11145         btrfs_item_key_to_cpu(eb, &key, slot);
11146         type = key.type;
11147
11148         switch (type) {
11149         case BTRFS_EXTENT_DATA_KEY:
11150                 ret = check_extent_data_item(root, eb, slot);
11151                 err |= ret;
11152                 break;
11153         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11154                 ret = check_block_group_item(fs_info, eb, slot);
11155                 err |= ret;
11156                 break;
11157         case BTRFS_DEV_ITEM_KEY:
11158                 ret = check_dev_item(fs_info, eb, slot);
11159                 err |= ret;
11160                 break;
11161         case BTRFS_CHUNK_ITEM_KEY:
11162                 ret = check_chunk_item(fs_info, eb, slot);
11163                 err |= ret;
11164                 break;
11165         case BTRFS_DEV_EXTENT_KEY:
11166                 ret = check_dev_extent_item(fs_info, eb, slot);
11167                 err |= ret;
11168                 break;
11169         case BTRFS_EXTENT_ITEM_KEY:
11170         case BTRFS_METADATA_ITEM_KEY:
11171                 ret = check_extent_item(fs_info, eb, slot);
11172                 err |= ret;
11173                 break;
11174         case BTRFS_EXTENT_CSUM_KEY:
11175                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11176                 break;
11177         case BTRFS_TREE_BLOCK_REF_KEY:
11178                 ret = check_tree_block_backref(fs_info, key.offset,
11179                                                key.objectid, -1);
11180                 err |= ret;
11181                 break;
11182         case BTRFS_EXTENT_DATA_REF_KEY:
11183                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11184                 ret = check_extent_data_backref(fs_info,
11185                                 btrfs_extent_data_ref_root(eb, dref),
11186                                 btrfs_extent_data_ref_objectid(eb, dref),
11187                                 btrfs_extent_data_ref_offset(eb, dref),
11188                                 key.objectid, 0,
11189                                 btrfs_extent_data_ref_count(eb, dref));
11190                 err |= ret;
11191                 break;
11192         case BTRFS_SHARED_BLOCK_REF_KEY:
11193                 ret = check_shared_block_backref(fs_info, key.offset,
11194                                                  key.objectid, -1);
11195                 err |= ret;
11196                 break;
11197         case BTRFS_SHARED_DATA_REF_KEY:
11198                 ret = check_shared_data_backref(fs_info, key.offset,
11199                                                 key.objectid);
11200                 err |= ret;
11201                 break;
11202         default:
11203                 break;
11204         }
11205
11206         if (++slot < btrfs_header_nritems(eb))
11207                 goto next;
11208
11209         return err;
11210 }
11211
11212 /*
11213  * Helper function for later fs/subvol tree check.  To determine if a tree
11214  * block should be checked.
11215  * This function will ensure only the direct referencer with lowest rootid to
11216  * check a fs/subvolume tree block.
11217  *
11218  * Backref check at extent tree would detect errors like missing subvolume
11219  * tree, so we can do aggressive check to reduce duplicated checks.
11220  */
11221 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11222 {
11223         struct btrfs_root *extent_root = root->fs_info->extent_root;
11224         struct btrfs_key key;
11225         struct btrfs_path path;
11226         struct extent_buffer *leaf;
11227         int slot;
11228         struct btrfs_extent_item *ei;
11229         unsigned long ptr;
11230         unsigned long end;
11231         int type;
11232         u32 item_size;
11233         u64 offset;
11234         struct btrfs_extent_inline_ref *iref;
11235         int ret;
11236
11237         btrfs_init_path(&path);
11238         key.objectid = btrfs_header_bytenr(eb);
11239         key.type = BTRFS_METADATA_ITEM_KEY;
11240         key.offset = (u64)-1;
11241
11242         /*
11243          * Any failure in backref resolving means we can't determine
11244          * whom the tree block belongs to.
11245          * So in that case, we need to check that tree block
11246          */
11247         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11248         if (ret < 0)
11249                 goto need_check;
11250
11251         ret = btrfs_previous_extent_item(extent_root, &path,
11252                                          btrfs_header_bytenr(eb));
11253         if (ret)
11254                 goto need_check;
11255
11256         leaf = path.nodes[0];
11257         slot = path.slots[0];
11258         btrfs_item_key_to_cpu(leaf, &key, slot);
11259         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11260
11261         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11262                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11263         } else {
11264                 struct btrfs_tree_block_info *info;
11265
11266                 info = (struct btrfs_tree_block_info *)(ei + 1);
11267                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11268         }
11269
11270         item_size = btrfs_item_size_nr(leaf, slot);
11271         ptr = (unsigned long)iref;
11272         end = (unsigned long)ei + item_size;
11273         while (ptr < end) {
11274                 iref = (struct btrfs_extent_inline_ref *)ptr;
11275                 type = btrfs_extent_inline_ref_type(leaf, iref);
11276                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11277
11278                 /*
11279                  * We only check the tree block if current root is
11280                  * the lowest referencer of it.
11281                  */
11282                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11283                     offset < root->objectid) {
11284                         btrfs_release_path(&path);
11285                         return 0;
11286                 }
11287
11288                 ptr += btrfs_extent_inline_ref_size(type);
11289         }
11290         /*
11291          * Normally we should also check keyed tree block ref, but that may be
11292          * very time consuming.  Inlined ref should already make us skip a lot
11293          * of refs now.  So skip search keyed tree block ref.
11294          */
11295
11296 need_check:
11297         btrfs_release_path(&path);
11298         return 1;
11299 }
11300
11301 /*
11302  * Traversal function for tree block. We will do:
11303  * 1) Skip shared fs/subvolume tree blocks
11304  * 2) Update related bytes accounting
11305  * 3) Pre-order traversal
11306  */
11307 static int traverse_tree_block(struct btrfs_root *root,
11308                                 struct extent_buffer *node)
11309 {
11310         struct extent_buffer *eb;
11311         struct btrfs_key key;
11312         struct btrfs_key drop_key;
11313         int level;
11314         u64 nr;
11315         int i;
11316         int err = 0;
11317         int ret;
11318
11319         /*
11320          * Skip shared fs/subvolume tree block, in that case they will
11321          * be checked by referencer with lowest rootid
11322          */
11323         if (is_fstree(root->objectid) && !should_check(root, node))
11324                 return 0;
11325
11326         /* Update bytes accounting */
11327         total_btree_bytes += node->len;
11328         if (fs_root_objectid(btrfs_header_owner(node)))
11329                 total_fs_tree_bytes += node->len;
11330         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11331                 total_extent_tree_bytes += node->len;
11332         if (!found_old_backref &&
11333             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11334             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11335             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11336                 found_old_backref = 1;
11337
11338         /* pre-order tranversal, check itself first */
11339         level = btrfs_header_level(node);
11340         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11341                                    btrfs_header_level(node),
11342                                    btrfs_header_owner(node));
11343         err |= ret;
11344         if (err)
11345                 error(
11346         "check %s failed root %llu bytenr %llu level %d, force continue check",
11347                         level ? "node":"leaf", root->objectid,
11348                         btrfs_header_bytenr(node), btrfs_header_level(node));
11349
11350         if (!level) {
11351                 btree_space_waste += btrfs_leaf_free_space(root, node);
11352                 ret = check_leaf_items(root, node);
11353                 err |= ret;
11354                 return err;
11355         }
11356
11357         nr = btrfs_header_nritems(node);
11358         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11359         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11360                 sizeof(struct btrfs_key_ptr);
11361
11362         /* Then check all its children */
11363         for (i = 0; i < nr; i++) {
11364                 u64 blocknr = btrfs_node_blockptr(node, i);
11365
11366                 btrfs_node_key_to_cpu(node, &key, i);
11367                 if (level == root->root_item.drop_level &&
11368                     is_dropped_key(&key, &drop_key))
11369                         continue;
11370
11371                 /*
11372                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11373                  * to call the function itself.
11374                  */
11375                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11376                 if (extent_buffer_uptodate(eb)) {
11377                         ret = traverse_tree_block(root, eb);
11378                         err |= ret;
11379                 }
11380                 free_extent_buffer(eb);
11381         }
11382
11383         return err;
11384 }
11385
11386 /*
11387  * Low memory usage version check_chunks_and_extents.
11388  */
11389 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11390 {
11391         struct btrfs_path path;
11392         struct btrfs_key key;
11393         struct btrfs_root *root1;
11394         struct btrfs_root *cur_root;
11395         int err = 0;
11396         int ret;
11397
11398         root1 = root->fs_info->chunk_root;
11399         ret = traverse_tree_block(root1, root1->node);
11400         err |= ret;
11401
11402         root1 = root->fs_info->tree_root;
11403         ret = traverse_tree_block(root1, root1->node);
11404         err |= ret;
11405
11406         btrfs_init_path(&path);
11407         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11408         key.offset = 0;
11409         key.type = BTRFS_ROOT_ITEM_KEY;
11410
11411         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11412         if (ret) {
11413                 error("cannot find extent treet in tree_root");
11414                 goto out;
11415         }
11416
11417         while (1) {
11418                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11419                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11420                         goto next;
11421                 key.offset = (u64)-1;
11422
11423                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11424                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11425                                         &key);
11426                 else
11427                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11428                 if (IS_ERR(cur_root) || !cur_root) {
11429                         error("failed to read tree: %lld", key.objectid);
11430                         goto next;
11431                 }
11432
11433                 ret = traverse_tree_block(cur_root, cur_root->node);
11434                 err |= ret;
11435
11436                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11437                         btrfs_free_fs_root(cur_root);
11438 next:
11439                 ret = btrfs_next_item(root1, &path);
11440                 if (ret)
11441                         goto out;
11442         }
11443
11444 out:
11445         btrfs_release_path(&path);
11446         return err;
11447 }
11448
11449 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11450                            struct btrfs_root *root, int overwrite)
11451 {
11452         struct extent_buffer *c;
11453         struct extent_buffer *old = root->node;
11454         int level;
11455         int ret;
11456         struct btrfs_disk_key disk_key = {0,0,0};
11457
11458         level = 0;
11459
11460         if (overwrite) {
11461                 c = old;
11462                 extent_buffer_get(c);
11463                 goto init;
11464         }
11465         c = btrfs_alloc_free_block(trans, root,
11466                                    root->nodesize,
11467                                    root->root_key.objectid,
11468                                    &disk_key, level, 0, 0);
11469         if (IS_ERR(c)) {
11470                 c = old;
11471                 extent_buffer_get(c);
11472                 overwrite = 1;
11473         }
11474 init:
11475         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11476         btrfs_set_header_level(c, level);
11477         btrfs_set_header_bytenr(c, c->start);
11478         btrfs_set_header_generation(c, trans->transid);
11479         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11480         btrfs_set_header_owner(c, root->root_key.objectid);
11481
11482         write_extent_buffer(c, root->fs_info->fsid,
11483                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11484
11485         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11486                             btrfs_header_chunk_tree_uuid(c),
11487                             BTRFS_UUID_SIZE);
11488
11489         btrfs_mark_buffer_dirty(c);
11490         /*
11491          * this case can happen in the following case:
11492          *
11493          * 1.overwrite previous root.
11494          *
11495          * 2.reinit reloc data root, this is because we skip pin
11496          * down reloc data tree before which means we can allocate
11497          * same block bytenr here.
11498          */
11499         if (old->start == c->start) {
11500                 btrfs_set_root_generation(&root->root_item,
11501                                           trans->transid);
11502                 root->root_item.level = btrfs_header_level(root->node);
11503                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11504                                         &root->root_key, &root->root_item);
11505                 if (ret) {
11506                         free_extent_buffer(c);
11507                         return ret;
11508                 }
11509         }
11510         free_extent_buffer(old);
11511         root->node = c;
11512         add_root_to_dirty_list(root);
11513         return 0;
11514 }
11515
11516 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11517                                 struct extent_buffer *eb, int tree_root)
11518 {
11519         struct extent_buffer *tmp;
11520         struct btrfs_root_item *ri;
11521         struct btrfs_key key;
11522         u64 bytenr;
11523         u32 nodesize;
11524         int level = btrfs_header_level(eb);
11525         int nritems;
11526         int ret;
11527         int i;
11528
11529         /*
11530          * If we have pinned this block before, don't pin it again.
11531          * This can not only avoid forever loop with broken filesystem
11532          * but also give us some speedups.
11533          */
11534         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11535                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11536                 return 0;
11537
11538         btrfs_pin_extent(fs_info, eb->start, eb->len);
11539
11540         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11541         nritems = btrfs_header_nritems(eb);
11542         for (i = 0; i < nritems; i++) {
11543                 if (level == 0) {
11544                         btrfs_item_key_to_cpu(eb, &key, i);
11545                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11546                                 continue;
11547                         /* Skip the extent root and reloc roots */
11548                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11549                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11550                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11551                                 continue;
11552                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11553                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11554
11555                         /*
11556                          * If at any point we start needing the real root we
11557                          * will have to build a stump root for the root we are
11558                          * in, but for now this doesn't actually use the root so
11559                          * just pass in extent_root.
11560                          */
11561                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11562                                               nodesize, 0);
11563                         if (!extent_buffer_uptodate(tmp)) {
11564                                 fprintf(stderr, "Error reading root block\n");
11565                                 return -EIO;
11566                         }
11567                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11568                         free_extent_buffer(tmp);
11569                         if (ret)
11570                                 return ret;
11571                 } else {
11572                         bytenr = btrfs_node_blockptr(eb, i);
11573
11574                         /* If we aren't the tree root don't read the block */
11575                         if (level == 1 && !tree_root) {
11576                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11577                                 continue;
11578                         }
11579
11580                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11581                                               nodesize, 0);
11582                         if (!extent_buffer_uptodate(tmp)) {
11583                                 fprintf(stderr, "Error reading tree block\n");
11584                                 return -EIO;
11585                         }
11586                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11587                         free_extent_buffer(tmp);
11588                         if (ret)
11589                                 return ret;
11590                 }
11591         }
11592
11593         return 0;
11594 }
11595
11596 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11597 {
11598         int ret;
11599
11600         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11601         if (ret)
11602                 return ret;
11603
11604         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11605 }
11606
11607 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11608 {
11609         struct btrfs_block_group_cache *cache;
11610         struct btrfs_path path;
11611         struct extent_buffer *leaf;
11612         struct btrfs_chunk *chunk;
11613         struct btrfs_key key;
11614         int ret;
11615         u64 start;
11616
11617         btrfs_init_path(&path);
11618         key.objectid = 0;
11619         key.type = BTRFS_CHUNK_ITEM_KEY;
11620         key.offset = 0;
11621         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11622         if (ret < 0) {
11623                 btrfs_release_path(&path);
11624                 return ret;
11625         }
11626
11627         /*
11628          * We do this in case the block groups were screwed up and had alloc
11629          * bits that aren't actually set on the chunks.  This happens with
11630          * restored images every time and could happen in real life I guess.
11631          */
11632         fs_info->avail_data_alloc_bits = 0;
11633         fs_info->avail_metadata_alloc_bits = 0;
11634         fs_info->avail_system_alloc_bits = 0;
11635
11636         /* First we need to create the in-memory block groups */
11637         while (1) {
11638                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11639                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11640                         if (ret < 0) {
11641                                 btrfs_release_path(&path);
11642                                 return ret;
11643                         }
11644                         if (ret) {
11645                                 ret = 0;
11646                                 break;
11647                         }
11648                 }
11649                 leaf = path.nodes[0];
11650                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11651                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11652                         path.slots[0]++;
11653                         continue;
11654                 }
11655
11656                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11657                 btrfs_add_block_group(fs_info, 0,
11658                                       btrfs_chunk_type(leaf, chunk),
11659                                       key.objectid, key.offset,
11660                                       btrfs_chunk_length(leaf, chunk));
11661                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11662                                  key.offset + btrfs_chunk_length(leaf, chunk));
11663                 path.slots[0]++;
11664         }
11665         start = 0;
11666         while (1) {
11667                 cache = btrfs_lookup_first_block_group(fs_info, start);
11668                 if (!cache)
11669                         break;
11670                 cache->cached = 1;
11671                 start = cache->key.objectid + cache->key.offset;
11672         }
11673
11674         btrfs_release_path(&path);
11675         return 0;
11676 }
11677
11678 static int reset_balance(struct btrfs_trans_handle *trans,
11679                          struct btrfs_fs_info *fs_info)
11680 {
11681         struct btrfs_root *root = fs_info->tree_root;
11682         struct btrfs_path path;
11683         struct extent_buffer *leaf;
11684         struct btrfs_key key;
11685         int del_slot, del_nr = 0;
11686         int ret;
11687         int found = 0;
11688
11689         btrfs_init_path(&path);
11690         key.objectid = BTRFS_BALANCE_OBJECTID;
11691         key.type = BTRFS_BALANCE_ITEM_KEY;
11692         key.offset = 0;
11693         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11694         if (ret) {
11695                 if (ret > 0)
11696                         ret = 0;
11697                 if (!ret)
11698                         goto reinit_data_reloc;
11699                 else
11700                         goto out;
11701         }
11702
11703         ret = btrfs_del_item(trans, root, &path);
11704         if (ret)
11705                 goto out;
11706         btrfs_release_path(&path);
11707
11708         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11709         key.type = BTRFS_ROOT_ITEM_KEY;
11710         key.offset = 0;
11711         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11712         if (ret < 0)
11713                 goto out;
11714         while (1) {
11715                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11716                         if (!found)
11717                                 break;
11718
11719                         if (del_nr) {
11720                                 ret = btrfs_del_items(trans, root, &path,
11721                                                       del_slot, del_nr);
11722                                 del_nr = 0;
11723                                 if (ret)
11724                                         goto out;
11725                         }
11726                         key.offset++;
11727                         btrfs_release_path(&path);
11728
11729                         found = 0;
11730                         ret = btrfs_search_slot(trans, root, &key, &path,
11731                                                 -1, 1);
11732                         if (ret < 0)
11733                                 goto out;
11734                         continue;
11735                 }
11736                 found = 1;
11737                 leaf = path.nodes[0];
11738                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11739                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11740                         break;
11741                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11742                         path.slots[0]++;
11743                         continue;
11744                 }
11745                 if (!del_nr) {
11746                         del_slot = path.slots[0];
11747                         del_nr = 1;
11748                 } else {
11749                         del_nr++;
11750                 }
11751                 path.slots[0]++;
11752         }
11753
11754         if (del_nr) {
11755                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11756                 if (ret)
11757                         goto out;
11758         }
11759         btrfs_release_path(&path);
11760
11761 reinit_data_reloc:
11762         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11763         key.type = BTRFS_ROOT_ITEM_KEY;
11764         key.offset = (u64)-1;
11765         root = btrfs_read_fs_root(fs_info, &key);
11766         if (IS_ERR(root)) {
11767                 fprintf(stderr, "Error reading data reloc tree\n");
11768                 ret = PTR_ERR(root);
11769                 goto out;
11770         }
11771         record_root_in_trans(trans, root);
11772         ret = btrfs_fsck_reinit_root(trans, root, 0);
11773         if (ret)
11774                 goto out;
11775         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11776 out:
11777         btrfs_release_path(&path);
11778         return ret;
11779 }
11780
11781 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11782                               struct btrfs_fs_info *fs_info)
11783 {
11784         u64 start = 0;
11785         int ret;
11786
11787         /*
11788          * The only reason we don't do this is because right now we're just
11789          * walking the trees we find and pinning down their bytes, we don't look
11790          * at any of the leaves.  In order to do mixed groups we'd have to check
11791          * the leaves of any fs roots and pin down the bytes for any file
11792          * extents we find.  Not hard but why do it if we don't have to?
11793          */
11794         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11795                 fprintf(stderr, "We don't support re-initing the extent tree "
11796                         "for mixed block groups yet, please notify a btrfs "
11797                         "developer you want to do this so they can add this "
11798                         "functionality.\n");
11799                 return -EINVAL;
11800         }
11801
11802         /*
11803          * first we need to walk all of the trees except the extent tree and pin
11804          * down the bytes that are in use so we don't overwrite any existing
11805          * metadata.
11806          */
11807         ret = pin_metadata_blocks(fs_info);
11808         if (ret) {
11809                 fprintf(stderr, "error pinning down used bytes\n");
11810                 return ret;
11811         }
11812
11813         /*
11814          * Need to drop all the block groups since we're going to recreate all
11815          * of them again.
11816          */
11817         btrfs_free_block_groups(fs_info);
11818         ret = reset_block_groups(fs_info);
11819         if (ret) {
11820                 fprintf(stderr, "error resetting the block groups\n");
11821                 return ret;
11822         }
11823
11824         /* Ok we can allocate now, reinit the extent root */
11825         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11826         if (ret) {
11827                 fprintf(stderr, "extent root initialization failed\n");
11828                 /*
11829                  * When the transaction code is updated we should end the
11830                  * transaction, but for now progs only knows about commit so
11831                  * just return an error.
11832                  */
11833                 return ret;
11834         }
11835
11836         /*
11837          * Now we have all the in-memory block groups setup so we can make
11838          * allocations properly, and the metadata we care about is safe since we
11839          * pinned all of it above.
11840          */
11841         while (1) {
11842                 struct btrfs_block_group_cache *cache;
11843
11844                 cache = btrfs_lookup_first_block_group(fs_info, start);
11845                 if (!cache)
11846                         break;
11847                 start = cache->key.objectid + cache->key.offset;
11848                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11849                                         &cache->key, &cache->item,
11850                                         sizeof(cache->item));
11851                 if (ret) {
11852                         fprintf(stderr, "Error adding block group\n");
11853                         return ret;
11854                 }
11855                 btrfs_extent_post_op(trans, fs_info->extent_root);
11856         }
11857
11858         ret = reset_balance(trans, fs_info);
11859         if (ret)
11860                 fprintf(stderr, "error resetting the pending balance\n");
11861
11862         return ret;
11863 }
11864
11865 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11866 {
11867         struct btrfs_path path;
11868         struct btrfs_trans_handle *trans;
11869         struct btrfs_key key;
11870         int ret;
11871
11872         printf("Recowing metadata block %llu\n", eb->start);
11873         key.objectid = btrfs_header_owner(eb);
11874         key.type = BTRFS_ROOT_ITEM_KEY;
11875         key.offset = (u64)-1;
11876
11877         root = btrfs_read_fs_root(root->fs_info, &key);
11878         if (IS_ERR(root)) {
11879                 fprintf(stderr, "Couldn't find owner root %llu\n",
11880                         key.objectid);
11881                 return PTR_ERR(root);
11882         }
11883
11884         trans = btrfs_start_transaction(root, 1);
11885         if (IS_ERR(trans))
11886                 return PTR_ERR(trans);
11887
11888         btrfs_init_path(&path);
11889         path.lowest_level = btrfs_header_level(eb);
11890         if (path.lowest_level)
11891                 btrfs_node_key_to_cpu(eb, &key, 0);
11892         else
11893                 btrfs_item_key_to_cpu(eb, &key, 0);
11894
11895         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11896         btrfs_commit_transaction(trans, root);
11897         btrfs_release_path(&path);
11898         return ret;
11899 }
11900
11901 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11902 {
11903         struct btrfs_path path;
11904         struct btrfs_trans_handle *trans;
11905         struct btrfs_key key;
11906         int ret;
11907
11908         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11909                bad->key.type, bad->key.offset);
11910         key.objectid = bad->root_id;
11911         key.type = BTRFS_ROOT_ITEM_KEY;
11912         key.offset = (u64)-1;
11913
11914         root = btrfs_read_fs_root(root->fs_info, &key);
11915         if (IS_ERR(root)) {
11916                 fprintf(stderr, "Couldn't find owner root %llu\n",
11917                         key.objectid);
11918                 return PTR_ERR(root);
11919         }
11920
11921         trans = btrfs_start_transaction(root, 1);
11922         if (IS_ERR(trans))
11923                 return PTR_ERR(trans);
11924
11925         btrfs_init_path(&path);
11926         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11927         if (ret) {
11928                 if (ret > 0)
11929                         ret = 0;
11930                 goto out;
11931         }
11932         ret = btrfs_del_item(trans, root, &path);
11933 out:
11934         btrfs_commit_transaction(trans, root);
11935         btrfs_release_path(&path);
11936         return ret;
11937 }
11938
11939 static int zero_log_tree(struct btrfs_root *root)
11940 {
11941         struct btrfs_trans_handle *trans;
11942         int ret;
11943
11944         trans = btrfs_start_transaction(root, 1);
11945         if (IS_ERR(trans)) {
11946                 ret = PTR_ERR(trans);
11947                 return ret;
11948         }
11949         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11950         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11951         ret = btrfs_commit_transaction(trans, root);
11952         return ret;
11953 }
11954
11955 static int populate_csum(struct btrfs_trans_handle *trans,
11956                          struct btrfs_root *csum_root, char *buf, u64 start,
11957                          u64 len)
11958 {
11959         u64 offset = 0;
11960         u64 sectorsize;
11961         int ret = 0;
11962
11963         while (offset < len) {
11964                 sectorsize = csum_root->sectorsize;
11965                 ret = read_extent_data(csum_root, buf, start + offset,
11966                                        &sectorsize, 0);
11967                 if (ret)
11968                         break;
11969                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11970                                             start + offset, buf, sectorsize);
11971                 if (ret)
11972                         break;
11973                 offset += sectorsize;
11974         }
11975         return ret;
11976 }
11977
11978 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11979                                       struct btrfs_root *csum_root,
11980                                       struct btrfs_root *cur_root)
11981 {
11982         struct btrfs_path path;
11983         struct btrfs_key key;
11984         struct extent_buffer *node;
11985         struct btrfs_file_extent_item *fi;
11986         char *buf = NULL;
11987         u64 start = 0;
11988         u64 len = 0;
11989         int slot = 0;
11990         int ret = 0;
11991
11992         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11993         if (!buf)
11994                 return -ENOMEM;
11995
11996         btrfs_init_path(&path);
11997         key.objectid = 0;
11998         key.offset = 0;
11999         key.type = 0;
12000         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12001         if (ret < 0)
12002                 goto out;
12003         /* Iterate all regular file extents and fill its csum */
12004         while (1) {
12005                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12006
12007                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12008                         goto next;
12009                 node = path.nodes[0];
12010                 slot = path.slots[0];
12011                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12012                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12013                         goto next;
12014                 start = btrfs_file_extent_disk_bytenr(node, fi);
12015                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12016
12017                 ret = populate_csum(trans, csum_root, buf, start, len);
12018                 if (ret == -EEXIST)
12019                         ret = 0;
12020                 if (ret < 0)
12021                         goto out;
12022 next:
12023                 /*
12024                  * TODO: if next leaf is corrupted, jump to nearest next valid
12025                  * leaf.
12026                  */
12027                 ret = btrfs_next_item(cur_root, &path);
12028                 if (ret < 0)
12029                         goto out;
12030                 if (ret > 0) {
12031                         ret = 0;
12032                         goto out;
12033                 }
12034         }
12035
12036 out:
12037         btrfs_release_path(&path);
12038         free(buf);
12039         return ret;
12040 }
12041
12042 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12043                                   struct btrfs_root *csum_root)
12044 {
12045         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12046         struct btrfs_path path;
12047         struct btrfs_root *tree_root = fs_info->tree_root;
12048         struct btrfs_root *cur_root;
12049         struct extent_buffer *node;
12050         struct btrfs_key key;
12051         int slot = 0;
12052         int ret = 0;
12053
12054         btrfs_init_path(&path);
12055         key.objectid = BTRFS_FS_TREE_OBJECTID;
12056         key.offset = 0;
12057         key.type = BTRFS_ROOT_ITEM_KEY;
12058         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12059         if (ret < 0)
12060                 goto out;
12061         if (ret > 0) {
12062                 ret = -ENOENT;
12063                 goto out;
12064         }
12065
12066         while (1) {
12067                 node = path.nodes[0];
12068                 slot = path.slots[0];
12069                 btrfs_item_key_to_cpu(node, &key, slot);
12070                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12071                         goto out;
12072                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12073                         goto next;
12074                 if (!is_fstree(key.objectid))
12075                         goto next;
12076                 key.offset = (u64)-1;
12077
12078                 cur_root = btrfs_read_fs_root(fs_info, &key);
12079                 if (IS_ERR(cur_root) || !cur_root) {
12080                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12081                                 key.objectid);
12082                         goto out;
12083                 }
12084                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12085                                 cur_root);
12086                 if (ret < 0)
12087                         goto out;
12088 next:
12089                 ret = btrfs_next_item(tree_root, &path);
12090                 if (ret > 0) {
12091                         ret = 0;
12092                         goto out;
12093                 }
12094                 if (ret < 0)
12095                         goto out;
12096         }
12097
12098 out:
12099         btrfs_release_path(&path);
12100         return ret;
12101 }
12102
12103 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12104                                       struct btrfs_root *csum_root)
12105 {
12106         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12107         struct btrfs_path path;
12108         struct btrfs_extent_item *ei;
12109         struct extent_buffer *leaf;
12110         char *buf;
12111         struct btrfs_key key;
12112         int ret;
12113
12114         btrfs_init_path(&path);
12115         key.objectid = 0;
12116         key.type = BTRFS_EXTENT_ITEM_KEY;
12117         key.offset = 0;
12118         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12119         if (ret < 0) {
12120                 btrfs_release_path(&path);
12121                 return ret;
12122         }
12123
12124         buf = malloc(csum_root->sectorsize);
12125         if (!buf) {
12126                 btrfs_release_path(&path);
12127                 return -ENOMEM;
12128         }
12129
12130         while (1) {
12131                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12132                         ret = btrfs_next_leaf(extent_root, &path);
12133                         if (ret < 0)
12134                                 break;
12135                         if (ret) {
12136                                 ret = 0;
12137                                 break;
12138                         }
12139                 }
12140                 leaf = path.nodes[0];
12141
12142                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12143                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12144                         path.slots[0]++;
12145                         continue;
12146                 }
12147
12148                 ei = btrfs_item_ptr(leaf, path.slots[0],
12149                                     struct btrfs_extent_item);
12150                 if (!(btrfs_extent_flags(leaf, ei) &
12151                       BTRFS_EXTENT_FLAG_DATA)) {
12152                         path.slots[0]++;
12153                         continue;
12154                 }
12155
12156                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12157                                     key.offset);
12158                 if (ret)
12159                         break;
12160                 path.slots[0]++;
12161         }
12162
12163         btrfs_release_path(&path);
12164         free(buf);
12165         return ret;
12166 }
12167
12168 /*
12169  * Recalculate the csum and put it into the csum tree.
12170  *
12171  * Extent tree init will wipe out all the extent info, so in that case, we
12172  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12173  * will use fs/subvol trees to init the csum tree.
12174  */
12175 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12176                           struct btrfs_root *csum_root,
12177                           int search_fs_tree)
12178 {
12179         if (search_fs_tree)
12180                 return fill_csum_tree_from_fs(trans, csum_root);
12181         else
12182                 return fill_csum_tree_from_extent(trans, csum_root);
12183 }
12184
12185 static void free_roots_info_cache(void)
12186 {
12187         if (!roots_info_cache)
12188                 return;
12189
12190         while (!cache_tree_empty(roots_info_cache)) {
12191                 struct cache_extent *entry;
12192                 struct root_item_info *rii;
12193
12194                 entry = first_cache_extent(roots_info_cache);
12195                 if (!entry)
12196                         break;
12197                 remove_cache_extent(roots_info_cache, entry);
12198                 rii = container_of(entry, struct root_item_info, cache_extent);
12199                 free(rii);
12200         }
12201
12202         free(roots_info_cache);
12203         roots_info_cache = NULL;
12204 }
12205
12206 static int build_roots_info_cache(struct btrfs_fs_info *info)
12207 {
12208         int ret = 0;
12209         struct btrfs_key key;
12210         struct extent_buffer *leaf;
12211         struct btrfs_path path;
12212
12213         if (!roots_info_cache) {
12214                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12215                 if (!roots_info_cache)
12216                         return -ENOMEM;
12217                 cache_tree_init(roots_info_cache);
12218         }
12219
12220         btrfs_init_path(&path);
12221         key.objectid = 0;
12222         key.type = BTRFS_EXTENT_ITEM_KEY;
12223         key.offset = 0;
12224         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12225         if (ret < 0)
12226                 goto out;
12227         leaf = path.nodes[0];
12228
12229         while (1) {
12230                 struct btrfs_key found_key;
12231                 struct btrfs_extent_item *ei;
12232                 struct btrfs_extent_inline_ref *iref;
12233                 int slot = path.slots[0];
12234                 int type;
12235                 u64 flags;
12236                 u64 root_id;
12237                 u8 level;
12238                 struct cache_extent *entry;
12239                 struct root_item_info *rii;
12240
12241                 if (slot >= btrfs_header_nritems(leaf)) {
12242                         ret = btrfs_next_leaf(info->extent_root, &path);
12243                         if (ret < 0) {
12244                                 break;
12245                         } else if (ret) {
12246                                 ret = 0;
12247                                 break;
12248                         }
12249                         leaf = path.nodes[0];
12250                         slot = path.slots[0];
12251                 }
12252
12253                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12254
12255                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12256                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12257                         goto next;
12258
12259                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12260                 flags = btrfs_extent_flags(leaf, ei);
12261
12262                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12263                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12264                         goto next;
12265
12266                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12267                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12268                         level = found_key.offset;
12269                 } else {
12270                         struct btrfs_tree_block_info *binfo;
12271
12272                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12273                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12274                         level = btrfs_tree_block_level(leaf, binfo);
12275                 }
12276
12277                 /*
12278                  * For a root extent, it must be of the following type and the
12279                  * first (and only one) iref in the item.
12280                  */
12281                 type = btrfs_extent_inline_ref_type(leaf, iref);
12282                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12283                         goto next;
12284
12285                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12286                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12287                 if (!entry) {
12288                         rii = malloc(sizeof(struct root_item_info));
12289                         if (!rii) {
12290                                 ret = -ENOMEM;
12291                                 goto out;
12292                         }
12293                         rii->cache_extent.start = root_id;
12294                         rii->cache_extent.size = 1;
12295                         rii->level = (u8)-1;
12296                         entry = &rii->cache_extent;
12297                         ret = insert_cache_extent(roots_info_cache, entry);
12298                         ASSERT(ret == 0);
12299                 } else {
12300                         rii = container_of(entry, struct root_item_info,
12301                                            cache_extent);
12302                 }
12303
12304                 ASSERT(rii->cache_extent.start == root_id);
12305                 ASSERT(rii->cache_extent.size == 1);
12306
12307                 if (level > rii->level || rii->level == (u8)-1) {
12308                         rii->level = level;
12309                         rii->bytenr = found_key.objectid;
12310                         rii->gen = btrfs_extent_generation(leaf, ei);
12311                         rii->node_count = 1;
12312                 } else if (level == rii->level) {
12313                         rii->node_count++;
12314                 }
12315 next:
12316                 path.slots[0]++;
12317         }
12318
12319 out:
12320         btrfs_release_path(&path);
12321
12322         return ret;
12323 }
12324
12325 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12326                                   struct btrfs_path *path,
12327                                   const struct btrfs_key *root_key,
12328                                   const int read_only_mode)
12329 {
12330         const u64 root_id = root_key->objectid;
12331         struct cache_extent *entry;
12332         struct root_item_info *rii;
12333         struct btrfs_root_item ri;
12334         unsigned long offset;
12335
12336         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12337         if (!entry) {
12338                 fprintf(stderr,
12339                         "Error: could not find extent items for root %llu\n",
12340                         root_key->objectid);
12341                 return -ENOENT;
12342         }
12343
12344         rii = container_of(entry, struct root_item_info, cache_extent);
12345         ASSERT(rii->cache_extent.start == root_id);
12346         ASSERT(rii->cache_extent.size == 1);
12347
12348         if (rii->node_count != 1) {
12349                 fprintf(stderr,
12350                         "Error: could not find btree root extent for root %llu\n",
12351                         root_id);
12352                 return -ENOENT;
12353         }
12354
12355         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12356         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12357
12358         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12359             btrfs_root_level(&ri) != rii->level ||
12360             btrfs_root_generation(&ri) != rii->gen) {
12361
12362                 /*
12363                  * If we're in repair mode but our caller told us to not update
12364                  * the root item, i.e. just check if it needs to be updated, don't
12365                  * print this message, since the caller will call us again shortly
12366                  * for the same root item without read only mode (the caller will
12367                  * open a transaction first).
12368                  */
12369                 if (!(read_only_mode && repair))
12370                         fprintf(stderr,
12371                                 "%sroot item for root %llu,"
12372                                 " current bytenr %llu, current gen %llu, current level %u,"
12373                                 " new bytenr %llu, new gen %llu, new level %u\n",
12374                                 (read_only_mode ? "" : "fixing "),
12375                                 root_id,
12376                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12377                                 btrfs_root_level(&ri),
12378                                 rii->bytenr, rii->gen, rii->level);
12379
12380                 if (btrfs_root_generation(&ri) > rii->gen) {
12381                         fprintf(stderr,
12382                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12383                                 root_id, btrfs_root_generation(&ri), rii->gen);
12384                         return -EINVAL;
12385                 }
12386
12387                 if (!read_only_mode) {
12388                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12389                         btrfs_set_root_level(&ri, rii->level);
12390                         btrfs_set_root_generation(&ri, rii->gen);
12391                         write_extent_buffer(path->nodes[0], &ri,
12392                                             offset, sizeof(ri));
12393                 }
12394
12395                 return 1;
12396         }
12397
12398         return 0;
12399 }
12400
12401 /*
12402  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12403  * caused read-only snapshots to be corrupted if they were created at a moment
12404  * when the source subvolume/snapshot had orphan items. The issue was that the
12405  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12406  * node instead of the post orphan cleanup root node.
12407  * So this function, and its callees, just detects and fixes those cases. Even
12408  * though the regression was for read-only snapshots, this function applies to
12409  * any snapshot/subvolume root.
12410  * This must be run before any other repair code - not doing it so, makes other
12411  * repair code delete or modify backrefs in the extent tree for example, which
12412  * will result in an inconsistent fs after repairing the root items.
12413  */
12414 static int repair_root_items(struct btrfs_fs_info *info)
12415 {
12416         struct btrfs_path path;
12417         struct btrfs_key key;
12418         struct extent_buffer *leaf;
12419         struct btrfs_trans_handle *trans = NULL;
12420         int ret = 0;
12421         int bad_roots = 0;
12422         int need_trans = 0;
12423
12424         btrfs_init_path(&path);
12425
12426         ret = build_roots_info_cache(info);
12427         if (ret)
12428                 goto out;
12429
12430         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12431         key.type = BTRFS_ROOT_ITEM_KEY;
12432         key.offset = 0;
12433
12434 again:
12435         /*
12436          * Avoid opening and committing transactions if a leaf doesn't have
12437          * any root items that need to be fixed, so that we avoid rotating
12438          * backup roots unnecessarily.
12439          */
12440         if (need_trans) {
12441                 trans = btrfs_start_transaction(info->tree_root, 1);
12442                 if (IS_ERR(trans)) {
12443                         ret = PTR_ERR(trans);
12444                         goto out;
12445                 }
12446         }
12447
12448         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12449                                 0, trans ? 1 : 0);
12450         if (ret < 0)
12451                 goto out;
12452         leaf = path.nodes[0];
12453
12454         while (1) {
12455                 struct btrfs_key found_key;
12456
12457                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12458                         int no_more_keys = find_next_key(&path, &key);
12459
12460                         btrfs_release_path(&path);
12461                         if (trans) {
12462                                 ret = btrfs_commit_transaction(trans,
12463                                                                info->tree_root);
12464                                 trans = NULL;
12465                                 if (ret < 0)
12466                                         goto out;
12467                         }
12468                         need_trans = 0;
12469                         if (no_more_keys)
12470                                 break;
12471                         goto again;
12472                 }
12473
12474                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12475
12476                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12477                         goto next;
12478                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12479                         goto next;
12480
12481                 ret = maybe_repair_root_item(info, &path, &found_key,
12482                                              trans ? 0 : 1);
12483                 if (ret < 0)
12484                         goto out;
12485                 if (ret) {
12486                         if (!trans && repair) {
12487                                 need_trans = 1;
12488                                 key = found_key;
12489                                 btrfs_release_path(&path);
12490                                 goto again;
12491                         }
12492                         bad_roots++;
12493                 }
12494 next:
12495                 path.slots[0]++;
12496         }
12497         ret = 0;
12498 out:
12499         free_roots_info_cache();
12500         btrfs_release_path(&path);
12501         if (trans)
12502                 btrfs_commit_transaction(trans, info->tree_root);
12503         if (ret < 0)
12504                 return ret;
12505
12506         return bad_roots;
12507 }
12508
12509 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12510 {
12511         struct btrfs_trans_handle *trans;
12512         struct btrfs_block_group_cache *bg_cache;
12513         u64 current = 0;
12514         int ret = 0;
12515
12516         /* Clear all free space cache inodes and its extent data */
12517         while (1) {
12518                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12519                 if (!bg_cache)
12520                         break;
12521                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12522                 if (ret < 0)
12523                         return ret;
12524                 current = bg_cache->key.objectid + bg_cache->key.offset;
12525         }
12526
12527         /* Don't forget to set cache_generation to -1 */
12528         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12529         if (IS_ERR(trans)) {
12530                 error("failed to update super block cache generation");
12531                 return PTR_ERR(trans);
12532         }
12533         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12534         btrfs_commit_transaction(trans, fs_info->tree_root);
12535
12536         return ret;
12537 }
12538
12539 const char * const cmd_check_usage[] = {
12540         "btrfs check [options] <device>",
12541         "Check structural integrity of a filesystem (unmounted).",
12542         "Check structural integrity of an unmounted filesystem. Verify internal",
12543         "trees' consistency and item connectivity. In the repair mode try to",
12544         "fix the problems found. ",
12545         "WARNING: the repair mode is considered dangerous",
12546         "",
12547         "-s|--super <superblock>     use this superblock copy",
12548         "-b|--backup                 use the first valid backup root copy",
12549         "--repair                    try to repair the filesystem",
12550         "--readonly                  run in read-only mode (default)",
12551         "--init-csum-tree            create a new CRC tree",
12552         "--init-extent-tree          create a new extent tree",
12553         "--mode <MODE>               allows choice of memory/IO trade-offs",
12554         "                            where MODE is one of:",
12555         "                            original - read inodes and extents to memory (requires",
12556         "                                       more memory, does less IO)",
12557         "                            lowmem   - try to use less memory but read blocks again",
12558         "                                       when needed",
12559         "--check-data-csum           verify checksums of data blocks",
12560         "-Q|--qgroup-report          print a report on qgroup consistency",
12561         "-E|--subvol-extents <subvolid>",
12562         "                            print subvolume extents and sharing state",
12563         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12564         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12565         "-p|--progress               indicate progress",
12566         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12567         NULL
12568 };
12569
12570 int cmd_check(int argc, char **argv)
12571 {
12572         struct cache_tree root_cache;
12573         struct btrfs_root *root;
12574         struct btrfs_fs_info *info;
12575         u64 bytenr = 0;
12576         u64 subvolid = 0;
12577         u64 tree_root_bytenr = 0;
12578         u64 chunk_root_bytenr = 0;
12579         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12580         int ret;
12581         int err = 0;
12582         u64 num;
12583         int init_csum_tree = 0;
12584         int readonly = 0;
12585         int clear_space_cache = 0;
12586         int qgroup_report = 0;
12587         int qgroups_repaired = 0;
12588         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12589
12590         while(1) {
12591                 int c;
12592                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12593                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12594                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12595                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12596                 static const struct option long_options[] = {
12597                         { "super", required_argument, NULL, 's' },
12598                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12599                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12600                         { "init-csum-tree", no_argument, NULL,
12601                                 GETOPT_VAL_INIT_CSUM },
12602                         { "init-extent-tree", no_argument, NULL,
12603                                 GETOPT_VAL_INIT_EXTENT },
12604                         { "check-data-csum", no_argument, NULL,
12605                                 GETOPT_VAL_CHECK_CSUM },
12606                         { "backup", no_argument, NULL, 'b' },
12607                         { "subvol-extents", required_argument, NULL, 'E' },
12608                         { "qgroup-report", no_argument, NULL, 'Q' },
12609                         { "tree-root", required_argument, NULL, 'r' },
12610                         { "chunk-root", required_argument, NULL,
12611                                 GETOPT_VAL_CHUNK_TREE },
12612                         { "progress", no_argument, NULL, 'p' },
12613                         { "mode", required_argument, NULL,
12614                                 GETOPT_VAL_MODE },
12615                         { "clear-space-cache", required_argument, NULL,
12616                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12617                         { NULL, 0, NULL, 0}
12618                 };
12619
12620                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12621                 if (c < 0)
12622                         break;
12623                 switch(c) {
12624                         case 'a': /* ignored */ break;
12625                         case 'b':
12626                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12627                                 break;
12628                         case 's':
12629                                 num = arg_strtou64(optarg);
12630                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12631                                         error(
12632                                         "super mirror should be less than %d",
12633                                                 BTRFS_SUPER_MIRROR_MAX);
12634                                         exit(1);
12635                                 }
12636                                 bytenr = btrfs_sb_offset(((int)num));
12637                                 printf("using SB copy %llu, bytenr %llu\n", num,
12638                                        (unsigned long long)bytenr);
12639                                 break;
12640                         case 'Q':
12641                                 qgroup_report = 1;
12642                                 break;
12643                         case 'E':
12644                                 subvolid = arg_strtou64(optarg);
12645                                 break;
12646                         case 'r':
12647                                 tree_root_bytenr = arg_strtou64(optarg);
12648                                 break;
12649                         case GETOPT_VAL_CHUNK_TREE:
12650                                 chunk_root_bytenr = arg_strtou64(optarg);
12651                                 break;
12652                         case 'p':
12653                                 ctx.progress_enabled = true;
12654                                 break;
12655                         case '?':
12656                         case 'h':
12657                                 usage(cmd_check_usage);
12658                         case GETOPT_VAL_REPAIR:
12659                                 printf("enabling repair mode\n");
12660                                 repair = 1;
12661                                 ctree_flags |= OPEN_CTREE_WRITES;
12662                                 break;
12663                         case GETOPT_VAL_READONLY:
12664                                 readonly = 1;
12665                                 break;
12666                         case GETOPT_VAL_INIT_CSUM:
12667                                 printf("Creating a new CRC tree\n");
12668                                 init_csum_tree = 1;
12669                                 repair = 1;
12670                                 ctree_flags |= OPEN_CTREE_WRITES;
12671                                 break;
12672                         case GETOPT_VAL_INIT_EXTENT:
12673                                 init_extent_tree = 1;
12674                                 ctree_flags |= (OPEN_CTREE_WRITES |
12675                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12676                                 repair = 1;
12677                                 break;
12678                         case GETOPT_VAL_CHECK_CSUM:
12679                                 check_data_csum = 1;
12680                                 break;
12681                         case GETOPT_VAL_MODE:
12682                                 check_mode = parse_check_mode(optarg);
12683                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12684                                         error("unknown mode: %s", optarg);
12685                                         exit(1);
12686                                 }
12687                                 break;
12688                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12689                                 if (strcmp(optarg, "v1") == 0) {
12690                                         clear_space_cache = 1;
12691                                 } else if (strcmp(optarg, "v2") == 0) {
12692                                         clear_space_cache = 2;
12693                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12694                                 } else {
12695                                         error(
12696                 "invalid argument to --clear-space-cache, must be v1 or v2");
12697                                         exit(1);
12698                                 }
12699                                 ctree_flags |= OPEN_CTREE_WRITES;
12700                                 break;
12701                 }
12702         }
12703
12704         if (check_argc_exact(argc - optind, 1))
12705                 usage(cmd_check_usage);
12706
12707         if (ctx.progress_enabled) {
12708                 ctx.tp = TASK_NOTHING;
12709                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12710         }
12711
12712         /* This check is the only reason for --readonly to exist */
12713         if (readonly && repair) {
12714                 error("repair options are not compatible with --readonly");
12715                 exit(1);
12716         }
12717
12718         /*
12719          * Not supported yet
12720          */
12721         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12722                 error("low memory mode doesn't support repair yet");
12723                 exit(1);
12724         }
12725
12726         radix_tree_init();
12727         cache_tree_init(&root_cache);
12728
12729         if((ret = check_mounted(argv[optind])) < 0) {
12730                 error("could not check mount status: %s", strerror(-ret));
12731                 err |= !!ret;
12732                 goto err_out;
12733         } else if(ret) {
12734                 error("%s is currently mounted, aborting", argv[optind]);
12735                 ret = -EBUSY;
12736                 err |= !!ret;
12737                 goto err_out;
12738         }
12739
12740         /* only allow partial opening under repair mode */
12741         if (repair)
12742                 ctree_flags |= OPEN_CTREE_PARTIAL;
12743
12744         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12745                                   chunk_root_bytenr, ctree_flags);
12746         if (!info) {
12747                 error("cannot open file system");
12748                 ret = -EIO;
12749                 err |= !!ret;
12750                 goto err_out;
12751         }
12752
12753         global_info = info;
12754         root = info->fs_root;
12755         if (clear_space_cache == 1) {
12756                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12757                         error(
12758                 "free space cache v2 detected, use --clear-space-cache v2");
12759                         ret = 1;
12760                         goto close_out;
12761                 }
12762                 printf("Clearing free space cache\n");
12763                 ret = clear_free_space_cache(info);
12764                 if (ret) {
12765                         error("failed to clear free space cache");
12766                         ret = 1;
12767                 } else {
12768                         printf("Free space cache cleared\n");
12769                 }
12770                 goto close_out;
12771         } else if (clear_space_cache == 2) {
12772                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12773                         printf("no free space cache v2 to clear\n");
12774                         ret = 0;
12775                         goto close_out;
12776                 }
12777                 printf("Clear free space cache v2\n");
12778                 ret = btrfs_clear_free_space_tree(info);
12779                 if (ret) {
12780                         error("failed to clear free space cache v2: %d", ret);
12781                         ret = 1;
12782                 } else {
12783                         printf("free space cache v2 cleared\n");
12784                 }
12785                 goto close_out;
12786         }
12787
12788         /*
12789          * repair mode will force us to commit transaction which
12790          * will make us fail to load log tree when mounting.
12791          */
12792         if (repair && btrfs_super_log_root(info->super_copy)) {
12793                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12794                 if (!ret) {
12795                         ret = 1;
12796                         err |= !!ret;
12797                         goto close_out;
12798                 }
12799                 ret = zero_log_tree(root);
12800                 err |= !!ret;
12801                 if (ret) {
12802                         error("failed to zero log tree: %d", ret);
12803                         goto close_out;
12804                 }
12805         }
12806
12807         uuid_unparse(info->super_copy->fsid, uuidbuf);
12808         if (qgroup_report) {
12809                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12810                        uuidbuf);
12811                 ret = qgroup_verify_all(info);
12812                 err |= !!ret;
12813                 if (ret == 0)
12814                         report_qgroups(1);
12815                 goto close_out;
12816         }
12817         if (subvolid) {
12818                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12819                        subvolid, argv[optind], uuidbuf);
12820                 ret = print_extent_state(info, subvolid);
12821                 err |= !!ret;
12822                 goto close_out;
12823         }
12824         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12825
12826         if (!extent_buffer_uptodate(info->tree_root->node) ||
12827             !extent_buffer_uptodate(info->dev_root->node) ||
12828             !extent_buffer_uptodate(info->chunk_root->node)) {
12829                 error("critical roots corrupted, unable to check the filesystem");
12830                 err |= !!ret;
12831                 ret = -EIO;
12832                 goto close_out;
12833         }
12834
12835         if (init_extent_tree || init_csum_tree) {
12836                 struct btrfs_trans_handle *trans;
12837
12838                 trans = btrfs_start_transaction(info->extent_root, 0);
12839                 if (IS_ERR(trans)) {
12840                         error("error starting transaction");
12841                         ret = PTR_ERR(trans);
12842                         err |= !!ret;
12843                         goto close_out;
12844                 }
12845
12846                 if (init_extent_tree) {
12847                         printf("Creating a new extent tree\n");
12848                         ret = reinit_extent_tree(trans, info);
12849                         err |= !!ret;
12850                         if (ret)
12851                                 goto close_out;
12852                 }
12853
12854                 if (init_csum_tree) {
12855                         printf("Reinitialize checksum tree\n");
12856                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12857                         if (ret) {
12858                                 error("checksum tree initialization failed: %d",
12859                                                 ret);
12860                                 ret = -EIO;
12861                                 err |= !!ret;
12862                                 goto close_out;
12863                         }
12864
12865                         ret = fill_csum_tree(trans, info->csum_root,
12866                                              init_extent_tree);
12867                         err |= !!ret;
12868                         if (ret) {
12869                                 error("checksum tree refilling failed: %d", ret);
12870                                 return -EIO;
12871                         }
12872                 }
12873                 /*
12874                  * Ok now we commit and run the normal fsck, which will add
12875                  * extent entries for all of the items it finds.
12876                  */
12877                 ret = btrfs_commit_transaction(trans, info->extent_root);
12878                 err |= !!ret;
12879                 if (ret)
12880                         goto close_out;
12881         }
12882         if (!extent_buffer_uptodate(info->extent_root->node)) {
12883                 error("critical: extent_root, unable to check the filesystem");
12884                 ret = -EIO;
12885                 err |= !!ret;
12886                 goto close_out;
12887         }
12888         if (!extent_buffer_uptodate(info->csum_root->node)) {
12889                 error("critical: csum_root, unable to check the filesystem");
12890                 ret = -EIO;
12891                 err |= !!ret;
12892                 goto close_out;
12893         }
12894
12895         if (!ctx.progress_enabled)
12896                 fprintf(stderr, "checking extents\n");
12897         if (check_mode == CHECK_MODE_LOWMEM)
12898                 ret = check_chunks_and_extents_v2(root);
12899         else
12900                 ret = check_chunks_and_extents(root);
12901         err |= !!ret;
12902         if (ret)
12903                 error(
12904                 "errors found in extent allocation tree or chunk allocation");
12905
12906         ret = repair_root_items(info);
12907         err |= !!ret;
12908         if (ret < 0)
12909                 goto close_out;
12910         if (repair) {
12911                 fprintf(stderr, "Fixed %d roots.\n", ret);
12912                 ret = 0;
12913         } else if (ret > 0) {
12914                 fprintf(stderr,
12915                        "Found %d roots with an outdated root item.\n",
12916                        ret);
12917                 fprintf(stderr,
12918                         "Please run a filesystem check with the option --repair to fix them.\n");
12919                 ret = 1;
12920                 err |= !!ret;
12921                 goto close_out;
12922         }
12923
12924         if (!ctx.progress_enabled) {
12925                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12926                         fprintf(stderr, "checking free space tree\n");
12927                 else
12928                         fprintf(stderr, "checking free space cache\n");
12929         }
12930         ret = check_space_cache(root);
12931         err |= !!ret;
12932         if (ret)
12933                 goto out;
12934
12935         /*
12936          * We used to have to have these hole extents in between our real
12937          * extents so if we don't have this flag set we need to make sure there
12938          * are no gaps in the file extents for inodes, otherwise we can just
12939          * ignore it when this happens.
12940          */
12941         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12942         if (!ctx.progress_enabled)
12943                 fprintf(stderr, "checking fs roots\n");
12944         if (check_mode == CHECK_MODE_LOWMEM)
12945                 ret = check_fs_roots_v2(root->fs_info);
12946         else
12947                 ret = check_fs_roots(root, &root_cache);
12948         err |= !!ret;
12949         if (ret)
12950                 goto out;
12951
12952         fprintf(stderr, "checking csums\n");
12953         ret = check_csums(root);
12954         err |= !!ret;
12955         if (ret)
12956                 goto out;
12957
12958         fprintf(stderr, "checking root refs\n");
12959         /* For low memory mode, check_fs_roots_v2 handles root refs */
12960         if (check_mode != CHECK_MODE_LOWMEM) {
12961                 ret = check_root_refs(root, &root_cache);
12962                 err |= !!ret;
12963                 if (ret)
12964                         goto out;
12965         }
12966
12967         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12968                 struct extent_buffer *eb;
12969
12970                 eb = list_first_entry(&root->fs_info->recow_ebs,
12971                                       struct extent_buffer, recow);
12972                 list_del_init(&eb->recow);
12973                 ret = recow_extent_buffer(root, eb);
12974                 err |= !!ret;
12975                 if (ret)
12976                         break;
12977         }
12978
12979         while (!list_empty(&delete_items)) {
12980                 struct bad_item *bad;
12981
12982                 bad = list_first_entry(&delete_items, struct bad_item, list);
12983                 list_del_init(&bad->list);
12984                 if (repair) {
12985                         ret = delete_bad_item(root, bad);
12986                         err |= !!ret;
12987                 }
12988                 free(bad);
12989         }
12990
12991         if (info->quota_enabled) {
12992                 fprintf(stderr, "checking quota groups\n");
12993                 ret = qgroup_verify_all(info);
12994                 err |= !!ret;
12995                 if (ret)
12996                         goto out;
12997                 report_qgroups(0);
12998                 ret = repair_qgroups(info, &qgroups_repaired);
12999                 err |= !!ret;
13000                 if (err)
13001                         goto out;
13002                 ret = 0;
13003         }
13004
13005         if (!list_empty(&root->fs_info->recow_ebs)) {
13006                 error("transid errors in file system");
13007                 ret = 1;
13008                 err |= !!ret;
13009         }
13010 out:
13011         if (found_old_backref) { /*
13012                  * there was a disk format change when mixed
13013                  * backref was in testing tree. The old format
13014                  * existed about one week.
13015                  */
13016                 printf("\n * Found old mixed backref format. "
13017                        "The old format is not supported! *"
13018                        "\n * Please mount the FS in readonly mode, "
13019                        "backup data and re-format the FS. *\n\n");
13020                 err |= 1;
13021         }
13022         printf("found %llu bytes used err is %d\n",
13023                (unsigned long long)bytes_used, ret);
13024         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13025         printf("total tree bytes: %llu\n",
13026                (unsigned long long)total_btree_bytes);
13027         printf("total fs tree bytes: %llu\n",
13028                (unsigned long long)total_fs_tree_bytes);
13029         printf("total extent tree bytes: %llu\n",
13030                (unsigned long long)total_extent_tree_bytes);
13031         printf("btree space waste bytes: %llu\n",
13032                (unsigned long long)btree_space_waste);
13033         printf("file data blocks allocated: %llu\n referenced %llu\n",
13034                 (unsigned long long)data_bytes_allocated,
13035                 (unsigned long long)data_bytes_referenced);
13036
13037         free_qgroup_counts();
13038         free_root_recs_tree(&root_cache);
13039 close_out:
13040         close_ctree(root);
13041 err_out:
13042         if (ctx.progress_enabled)
13043                 task_deinit(ctx.info);
13044
13045         return err;
13046 }