7e9492774e5cc72320ed8c952580fc79bd73286d
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct btrfs_root *root,
1482                             struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (name_len <= BTRFS_NAME_LEN) {
1517                         len = name_len;
1518                         error = 0;
1519                 } else {
1520                         len = BTRFS_NAME_LEN;
1521                         error = REF_ERR_NAME_TOO_LONG;
1522                 }
1523                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524
1525                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1526                         add_inode_backref(inode_cache, location.objectid,
1527                                           key->objectid, key->offset, namebuf,
1528                                           len, filetype, key->type, error);
1529                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1530                         add_inode_backref(root_cache, location.objectid,
1531                                           key->objectid, key->offset,
1532                                           namebuf, len, filetype,
1533                                           key->type, error);
1534                 } else {
1535                         fprintf(stderr, "invalid location in dir item %u\n",
1536                                 location.type);
1537                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1538                                           key->objectid, key->offset, namebuf,
1539                                           len, filetype, key->type, error);
1540                 }
1541
1542                 len = sizeof(*di) + name_len + data_len;
1543                 di = (struct btrfs_dir_item *)((char *)di + len);
1544                 cur += len;
1545         }
1546         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1547                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1548
1549         return 0;
1550 }
1551
1552 static int process_inode_ref(struct extent_buffer *eb,
1553                              int slot, struct btrfs_key *key,
1554                              struct shared_node *active_node)
1555 {
1556         u32 total;
1557         u32 cur = 0;
1558         u32 len;
1559         u32 name_len;
1560         u64 index;
1561         int error;
1562         struct cache_tree *inode_cache;
1563         struct btrfs_inode_ref *ref;
1564         char namebuf[BTRFS_NAME_LEN];
1565
1566         inode_cache = &active_node->inode_cache;
1567
1568         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1569         total = btrfs_item_size_nr(eb, slot);
1570         while (cur < total) {
1571                 name_len = btrfs_inode_ref_name_len(eb, ref);
1572                 index = btrfs_inode_ref_index(eb, ref);
1573                 if (name_len <= BTRFS_NAME_LEN) {
1574                         len = name_len;
1575                         error = 0;
1576                 } else {
1577                         len = BTRFS_NAME_LEN;
1578                         error = REF_ERR_NAME_TOO_LONG;
1579                 }
1580                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1581                 add_inode_backref(inode_cache, key->objectid, key->offset,
1582                                   index, namebuf, len, 0, key->type, error);
1583
1584                 len = sizeof(*ref) + name_len;
1585                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1586                 cur += len;
1587         }
1588         return 0;
1589 }
1590
1591 static int process_inode_extref(struct extent_buffer *eb,
1592                                 int slot, struct btrfs_key *key,
1593                                 struct shared_node *active_node)
1594 {
1595         u32 total;
1596         u32 cur = 0;
1597         u32 len;
1598         u32 name_len;
1599         u64 index;
1600         u64 parent;
1601         int error;
1602         struct cache_tree *inode_cache;
1603         struct btrfs_inode_extref *extref;
1604         char namebuf[BTRFS_NAME_LEN];
1605
1606         inode_cache = &active_node->inode_cache;
1607
1608         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1609         total = btrfs_item_size_nr(eb, slot);
1610         while (cur < total) {
1611                 name_len = btrfs_inode_extref_name_len(eb, extref);
1612                 index = btrfs_inode_extref_index(eb, extref);
1613                 parent = btrfs_inode_extref_parent(eb, extref);
1614                 if (name_len <= BTRFS_NAME_LEN) {
1615                         len = name_len;
1616                         error = 0;
1617                 } else {
1618                         len = BTRFS_NAME_LEN;
1619                         error = REF_ERR_NAME_TOO_LONG;
1620                 }
1621                 read_extent_buffer(eb, namebuf,
1622                                    (unsigned long)(extref + 1), len);
1623                 add_inode_backref(inode_cache, key->objectid, parent,
1624                                   index, namebuf, len, 0, key->type, error);
1625
1626                 len = sizeof(*extref) + name_len;
1627                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1628                 cur += len;
1629         }
1630         return 0;
1631
1632 }
1633
1634 static int count_csum_range(struct btrfs_root *root, u64 start,
1635                             u64 len, u64 *found)
1636 {
1637         struct btrfs_key key;
1638         struct btrfs_path path;
1639         struct extent_buffer *leaf;
1640         int ret;
1641         size_t size;
1642         *found = 0;
1643         u64 csum_end;
1644         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645
1646         btrfs_init_path(&path);
1647
1648         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649         key.offset = start;
1650         key.type = BTRFS_EXTENT_CSUM_KEY;
1651
1652         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1653                                 &key, &path, 0, 0);
1654         if (ret < 0)
1655                 goto out;
1656         if (ret > 0 && path.slots[0] > 0) {
1657                 leaf = path.nodes[0];
1658                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1659                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1660                     key.type == BTRFS_EXTENT_CSUM_KEY)
1661                         path.slots[0]--;
1662         }
1663
1664         while (len > 0) {
1665                 leaf = path.nodes[0];
1666                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1667                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1668                         if (ret > 0)
1669                                 break;
1670                         else if (ret < 0)
1671                                 goto out;
1672                         leaf = path.nodes[0];
1673                 }
1674
1675                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1676                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1677                     key.type != BTRFS_EXTENT_CSUM_KEY)
1678                         break;
1679
1680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1681                 if (key.offset >= start + len)
1682                         break;
1683
1684                 if (key.offset > start)
1685                         start = key.offset;
1686
1687                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1688                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1689                 if (csum_end > start) {
1690                         size = min(csum_end - start, len);
1691                         len -= size;
1692                         start += size;
1693                         *found += size;
1694                 }
1695
1696                 path.slots[0]++;
1697         }
1698 out:
1699         btrfs_release_path(&path);
1700         if (ret < 0)
1701                 return ret;
1702         return 0;
1703 }
1704
1705 static int process_file_extent(struct btrfs_root *root,
1706                                 struct extent_buffer *eb,
1707                                 int slot, struct btrfs_key *key,
1708                                 struct shared_node *active_node)
1709 {
1710         struct inode_record *rec;
1711         struct btrfs_file_extent_item *fi;
1712         u64 num_bytes = 0;
1713         u64 disk_bytenr = 0;
1714         u64 extent_offset = 0;
1715         u64 mask = root->sectorsize - 1;
1716         int extent_type;
1717         int ret;
1718
1719         rec = active_node->current;
1720         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1721         rec->found_file_extent = 1;
1722
1723         if (rec->extent_start == (u64)-1) {
1724                 rec->extent_start = key->offset;
1725                 rec->extent_end = key->offset;
1726         }
1727
1728         if (rec->extent_end > key->offset)
1729                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1730         else if (rec->extent_end < key->offset) {
1731                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1732                                            key->offset - rec->extent_end);
1733                 if (ret < 0)
1734                         return ret;
1735         }
1736
1737         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1738         extent_type = btrfs_file_extent_type(eb, fi);
1739
1740         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1741                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742                 if (num_bytes == 0)
1743                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1744                 rec->found_size += num_bytes;
1745                 num_bytes = (num_bytes + mask) & ~mask;
1746         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1747                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1748                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1749                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1750                 extent_offset = btrfs_file_extent_offset(eb, fi);
1751                 if (num_bytes == 0 || (num_bytes & mask))
1752                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1753                 if (num_bytes + extent_offset >
1754                     btrfs_file_extent_ram_bytes(eb, fi))
1755                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1756                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1757                     (btrfs_file_extent_compression(eb, fi) ||
1758                      btrfs_file_extent_encryption(eb, fi) ||
1759                      btrfs_file_extent_other_encoding(eb, fi)))
1760                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1761                 if (disk_bytenr > 0)
1762                         rec->found_size += num_bytes;
1763         } else {
1764                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765         }
1766         rec->extent_end = key->offset + num_bytes;
1767
1768         /*
1769          * The data reloc tree will copy full extents into its inode and then
1770          * copy the corresponding csums.  Because the extent it copied could be
1771          * a preallocated extent that hasn't been written to yet there may be no
1772          * csums to copy, ergo we won't have csums for our file extent.  This is
1773          * ok so just don't bother checking csums if the inode belongs to the
1774          * data reloc tree.
1775          */
1776         if (disk_bytenr > 0 &&
1777             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778                 u64 found;
1779                 if (btrfs_file_extent_compression(eb, fi))
1780                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781                 else
1782                         disk_bytenr += extent_offset;
1783
1784                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1785                 if (ret < 0)
1786                         return ret;
1787                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788                         if (found > 0)
1789                                 rec->found_csum_item = 1;
1790                         if (found < num_bytes)
1791                                 rec->some_csum_missing = 1;
1792                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793                         if (found > 0)
1794                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1795                 }
1796         }
1797         return 0;
1798 }
1799
1800 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1801                             struct walk_control *wc)
1802 {
1803         struct btrfs_key key;
1804         u32 nritems;
1805         int i;
1806         int ret = 0;
1807         struct cache_tree *inode_cache;
1808         struct shared_node *active_node;
1809
1810         if (wc->root_level == wc->active_node &&
1811             btrfs_root_refs(&root->root_item) == 0)
1812                 return 0;
1813
1814         active_node = wc->nodes[wc->active_node];
1815         inode_cache = &active_node->inode_cache;
1816         nritems = btrfs_header_nritems(eb);
1817         for (i = 0; i < nritems; i++) {
1818                 btrfs_item_key_to_cpu(eb, &key, i);
1819
1820                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821                         continue;
1822                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1823                         continue;
1824
1825                 if (active_node->current == NULL ||
1826                     active_node->current->ino < key.objectid) {
1827                         if (active_node->current) {
1828                                 active_node->current->checked = 1;
1829                                 maybe_free_inode_rec(inode_cache,
1830                                                      active_node->current);
1831                         }
1832                         active_node->current = get_inode_rec(inode_cache,
1833                                                              key.objectid, 1);
1834                         BUG_ON(IS_ERR(active_node->current));
1835                 }
1836                 switch (key.type) {
1837                 case BTRFS_DIR_ITEM_KEY:
1838                 case BTRFS_DIR_INDEX_KEY:
1839                         ret = process_dir_item(root, eb, i, &key, active_node);
1840                         break;
1841                 case BTRFS_INODE_REF_KEY:
1842                         ret = process_inode_ref(eb, i, &key, active_node);
1843                         break;
1844                 case BTRFS_INODE_EXTREF_KEY:
1845                         ret = process_inode_extref(eb, i, &key, active_node);
1846                         break;
1847                 case BTRFS_INODE_ITEM_KEY:
1848                         ret = process_inode_item(eb, i, &key, active_node);
1849                         break;
1850                 case BTRFS_EXTENT_DATA_KEY:
1851                         ret = process_file_extent(root, eb, i, &key,
1852                                                   active_node);
1853                         break;
1854                 default:
1855                         break;
1856                 };
1857         }
1858         return ret;
1859 }
1860
1861 struct node_refs {
1862         u64 bytenr[BTRFS_MAX_LEVEL];
1863         u64 refs[BTRFS_MAX_LEVEL];
1864         int need_check[BTRFS_MAX_LEVEL];
1865 };
1866
1867 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1868                              struct node_refs *nrefs, u64 level);
1869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1870                             unsigned int ext_ref);
1871
1872 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1873                                struct node_refs *nrefs, int *level, int ext_ref)
1874 {
1875         struct extent_buffer *cur = path->nodes[0];
1876         struct btrfs_key key;
1877         u64 cur_bytenr;
1878         u32 nritems;
1879         u64 first_ino = 0;
1880         int root_level = btrfs_header_level(root->node);
1881         int i;
1882         int ret = 0; /* Final return value */
1883         int err = 0; /* Positive error bitmap */
1884
1885         cur_bytenr = cur->start;
1886
1887         /* skip to first inode item or the first inode number change */
1888         nritems = btrfs_header_nritems(cur);
1889         for (i = 0; i < nritems; i++) {
1890                 btrfs_item_key_to_cpu(cur, &key, i);
1891                 if (i == 0)
1892                         first_ino = key.objectid;
1893                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1894                     (first_ino && first_ino != key.objectid))
1895                         break;
1896         }
1897         if (i == nritems) {
1898                 path->slots[0] = nritems;
1899                 return 0;
1900         }
1901         path->slots[0] = i;
1902
1903 again:
1904         err |= check_inode_item(root, path, ext_ref);
1905
1906         if (err & LAST_ITEM)
1907                 goto out;
1908
1909         /* still have inode items in thie leaf */
1910         if (cur->start == cur_bytenr)
1911                 goto again;
1912
1913         /*
1914          * we have switched to another leaf, above nodes may
1915          * have changed, here walk down the path, if a node
1916          * or leaf is shared, check whether we can skip this
1917          * node or leaf.
1918          */
1919         for (i = root_level; i >= 0; i--) {
1920                 if (path->nodes[i]->start == nrefs->bytenr[i])
1921                         continue;
1922
1923                 ret = update_nodes_refs(root,
1924                                 path->nodes[i]->start,
1925                                 nrefs, i);
1926                 if (ret)
1927                         goto out;
1928
1929                 if (!nrefs->need_check[i]) {
1930                         *level += 1;
1931                         break;
1932                 }
1933         }
1934
1935         for (i = 0; i < *level; i++) {
1936                 free_extent_buffer(path->nodes[i]);
1937                 path->nodes[i] = NULL;
1938         }
1939 out:
1940         err &= ~LAST_ITEM;
1941         /*
1942          * Convert any error bitmap to -EIO, as we should avoid
1943          * mixing positive and negative return value to represent
1944          * error
1945          */
1946         if (err && !ret)
1947                 ret = -EIO;
1948         return ret;
1949 }
1950
1951 static void reada_walk_down(struct btrfs_root *root,
1952                             struct extent_buffer *node, int slot)
1953 {
1954         u64 bytenr;
1955         u64 ptr_gen;
1956         u32 nritems;
1957         u32 blocksize;
1958         int i;
1959         int level;
1960
1961         level = btrfs_header_level(node);
1962         if (level != 1)
1963                 return;
1964
1965         nritems = btrfs_header_nritems(node);
1966         blocksize = root->nodesize;
1967         for (i = slot; i < nritems; i++) {
1968                 bytenr = btrfs_node_blockptr(node, i);
1969                 ptr_gen = btrfs_node_ptr_generation(node, i);
1970                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1971         }
1972 }
1973
1974 /*
1975  * Check the child node/leaf by the following condition:
1976  * 1. the first item key of the node/leaf should be the same with the one
1977  *    in parent.
1978  * 2. block in parent node should match the child node/leaf.
1979  * 3. generation of parent node and child's header should be consistent.
1980  *
1981  * Or the child node/leaf pointed by the key in parent is not valid.
1982  *
1983  * We hope to check leaf owner too, but since subvol may share leaves,
1984  * which makes leaf owner check not so strong, key check should be
1985  * sufficient enough for that case.
1986  */
1987 static int check_child_node(struct btrfs_root *root,
1988                             struct extent_buffer *parent, int slot,
1989                             struct extent_buffer *child)
1990 {
1991         struct btrfs_key parent_key;
1992         struct btrfs_key child_key;
1993         int ret = 0;
1994
1995         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1996         if (btrfs_header_level(child) == 0)
1997                 btrfs_item_key_to_cpu(child, &child_key, 0);
1998         else
1999                 btrfs_node_key_to_cpu(child, &child_key, 0);
2000
2001         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002                 ret = -EINVAL;
2003                 fprintf(stderr,
2004                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2005                         parent_key.objectid, parent_key.type, parent_key.offset,
2006                         child_key.objectid, child_key.type, child_key.offset);
2007         }
2008         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2009                 ret = -EINVAL;
2010                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2011                         btrfs_node_blockptr(parent, slot),
2012                         btrfs_header_bytenr(child));
2013         }
2014         if (btrfs_node_ptr_generation(parent, slot) !=
2015             btrfs_header_generation(child)) {
2016                 ret = -EINVAL;
2017                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2018                         btrfs_header_generation(child),
2019                         btrfs_node_ptr_generation(parent, slot));
2020         }
2021         return ret;
2022 }
2023
2024 /*
2025  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2026  * in every fs or file tree check. Here we find its all root ids, and only check
2027  * it in the fs or file tree which has the smallest root id.
2028  */
2029 static int need_check(struct btrfs_root *root, struct ulist *roots)
2030 {
2031         struct rb_node *node;
2032         struct ulist_node *u;
2033
2034         if (roots->nnodes == 1)
2035                 return 1;
2036
2037         node = rb_first(&roots->root);
2038         u = rb_entry(node, struct ulist_node, rb_node);
2039         /*
2040          * current root id is not smallest, we skip it and let it be checked
2041          * in the fs or file tree who hash the smallest root id.
2042          */
2043         if (root->objectid != u->val)
2044                 return 0;
2045
2046         return 1;
2047 }
2048
2049 /*
2050  * for a tree node or leaf, we record its reference count, so later if we still
2051  * process this node or leaf, don't need to compute its reference count again.
2052  */
2053 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2054                              struct node_refs *nrefs, u64 level)
2055 {
2056         int check, ret;
2057         u64 refs;
2058         struct ulist *roots;
2059
2060         if (nrefs->bytenr[level] != bytenr) {
2061                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2062                                        level, 1, &refs, NULL);
2063                 if (ret < 0)
2064                         return ret;
2065
2066                 nrefs->bytenr[level] = bytenr;
2067                 nrefs->refs[level] = refs;
2068                 if (refs > 1) {
2069                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2070                                                    0, &roots);
2071                         if (ret)
2072                                 return -EIO;
2073
2074                         check = need_check(root, roots);
2075                         ulist_free(roots);
2076                         nrefs->need_check[level] = check;
2077                 } else {
2078                         nrefs->need_check[level] = 1;
2079                 }
2080         }
2081
2082         return 0;
2083 }
2084
2085 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2086                           struct walk_control *wc, int *level,
2087                           struct node_refs *nrefs)
2088 {
2089         enum btrfs_tree_block_status status;
2090         u64 bytenr;
2091         u64 ptr_gen;
2092         struct extent_buffer *next;
2093         struct extent_buffer *cur;
2094         u32 blocksize;
2095         int ret, err = 0;
2096         u64 refs;
2097
2098         WARN_ON(*level < 0);
2099         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2100
2101         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2102                 refs = nrefs->refs[*level];
2103                 ret = 0;
2104         } else {
2105                 ret = btrfs_lookup_extent_info(NULL, root,
2106                                        path->nodes[*level]->start,
2107                                        *level, 1, &refs, NULL);
2108                 if (ret < 0) {
2109                         err = ret;
2110                         goto out;
2111                 }
2112                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2113                 nrefs->refs[*level] = refs;
2114         }
2115
2116         if (refs > 1) {
2117                 ret = enter_shared_node(root, path->nodes[*level]->start,
2118                                         refs, wc, *level);
2119                 if (ret > 0) {
2120                         err = ret;
2121                         goto out;
2122                 }
2123         }
2124
2125         while (*level >= 0) {
2126                 WARN_ON(*level < 0);
2127                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2128                 cur = path->nodes[*level];
2129
2130                 if (btrfs_header_level(cur) != *level)
2131                         WARN_ON(1);
2132
2133                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134                         break;
2135                 if (*level == 0) {
2136                         ret = process_one_leaf(root, cur, wc);
2137                         if (ret < 0)
2138                                 err = ret;
2139                         break;
2140                 }
2141                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2142                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2143                 blocksize = root->nodesize;
2144
2145                 if (bytenr == nrefs->bytenr[*level - 1]) {
2146                         refs = nrefs->refs[*level - 1];
2147                 } else {
2148                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2149                                         *level - 1, 1, &refs, NULL);
2150                         if (ret < 0) {
2151                                 refs = 0;
2152                         } else {
2153                                 nrefs->bytenr[*level - 1] = bytenr;
2154                                 nrefs->refs[*level - 1] = refs;
2155                         }
2156                 }
2157
2158                 if (refs > 1) {
2159                         ret = enter_shared_node(root, bytenr, refs,
2160                                                 wc, *level - 1);
2161                         if (ret > 0) {
2162                                 path->slots[*level]++;
2163                                 continue;
2164                         }
2165                 }
2166
2167                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2168                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2169                         free_extent_buffer(next);
2170                         reada_walk_down(root, cur, path->slots[*level]);
2171                         next = read_tree_block(root, bytenr, blocksize,
2172                                                ptr_gen);
2173                         if (!extent_buffer_uptodate(next)) {
2174                                 struct btrfs_key node_key;
2175
2176                                 btrfs_node_key_to_cpu(path->nodes[*level],
2177                                                       &node_key,
2178                                                       path->slots[*level]);
2179                                 btrfs_add_corrupt_extent_record(root->fs_info,
2180                                                 &node_key,
2181                                                 path->nodes[*level]->start,
2182                                                 root->nodesize, *level);
2183                                 err = -EIO;
2184                                 goto out;
2185                         }
2186                 }
2187
2188                 ret = check_child_node(root, cur, path->slots[*level], next);
2189                 if (ret) {
2190                         err = ret;
2191                         goto out;
2192                 }
2193
2194                 if (btrfs_is_leaf(next))
2195                         status = btrfs_check_leaf(root, NULL, next);
2196                 else
2197                         status = btrfs_check_node(root, NULL, next);
2198                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2199                         free_extent_buffer(next);
2200                         err = -EIO;
2201                         goto out;
2202                 }
2203
2204                 *level = *level - 1;
2205                 free_extent_buffer(path->nodes[*level]);
2206                 path->nodes[*level] = next;
2207                 path->slots[*level] = 0;
2208         }
2209 out:
2210         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2211         return err;
2212 }
2213
2214 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2215                             unsigned int ext_ref);
2216
2217 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2218                              int *level, struct node_refs *nrefs, int ext_ref)
2219 {
2220         enum btrfs_tree_block_status status;
2221         u64 bytenr;
2222         u64 ptr_gen;
2223         struct extent_buffer *next;
2224         struct extent_buffer *cur;
2225         u32 blocksize;
2226         int ret;
2227
2228         WARN_ON(*level < 0);
2229         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2230
2231         ret = update_nodes_refs(root, path->nodes[*level]->start,
2232                                 nrefs, *level);
2233         if (ret < 0)
2234                 return ret;
2235
2236         while (*level >= 0) {
2237                 WARN_ON(*level < 0);
2238                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2239                 cur = path->nodes[*level];
2240
2241                 if (btrfs_header_level(cur) != *level)
2242                         WARN_ON(1);
2243
2244                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2245                         break;
2246                 /* Don't forgot to check leaf/node validation */
2247                 if (*level == 0) {
2248                         ret = btrfs_check_leaf(root, NULL, cur);
2249                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2250                                 ret = -EIO;
2251                                 break;
2252                         }
2253                         ret = process_one_leaf_v2(root, path, nrefs,
2254                                                   level, ext_ref);
2255                         break;
2256                 } else {
2257                         ret = btrfs_check_node(root, NULL, cur);
2258                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2259                                 ret = -EIO;
2260                                 break;
2261                         }
2262                 }
2263                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2264                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2265                 blocksize = root->nodesize;
2266
2267                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2268                 if (ret)
2269                         break;
2270                 if (!nrefs->need_check[*level - 1]) {
2271                         path->slots[*level]++;
2272                         continue;
2273                 }
2274
2275                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2276                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2277                         free_extent_buffer(next);
2278                         reada_walk_down(root, cur, path->slots[*level]);
2279                         next = read_tree_block(root, bytenr, blocksize,
2280                                                ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->nodesize, *level);
2291                                 ret = -EIO;
2292                                 break;
2293                         }
2294                 }
2295
2296                 ret = check_child_node(root, cur, path->slots[*level], next);
2297                 if (ret < 0) 
2298                         break;
2299
2300                 if (btrfs_is_leaf(next))
2301                         status = btrfs_check_leaf(root, NULL, next);
2302                 else
2303                         status = btrfs_check_node(root, NULL, next);
2304                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2305                         free_extent_buffer(next);
2306                         ret = -EIO;
2307                         break;
2308                 }
2309
2310                 *level = *level - 1;
2311                 free_extent_buffer(path->nodes[*level]);
2312                 path->nodes[*level] = next;
2313                 path->slots[*level] = 0;
2314         }
2315         return ret;
2316 }
2317
2318 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2319                         struct walk_control *wc, int *level)
2320 {
2321         int i;
2322         struct extent_buffer *leaf;
2323
2324         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2325                 leaf = path->nodes[i];
2326                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2327                         path->slots[i]++;
2328                         *level = i;
2329                         return 0;
2330                 } else {
2331                         free_extent_buffer(path->nodes[*level]);
2332                         path->nodes[*level] = NULL;
2333                         BUG_ON(*level > wc->active_node);
2334                         if (*level == wc->active_node)
2335                                 leave_shared_node(root, wc, *level);
2336                         *level = i + 1;
2337                 }
2338         }
2339         return 1;
2340 }
2341
2342 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2343                            int *level)
2344 {
2345         int i;
2346         struct extent_buffer *leaf;
2347
2348         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349                 leaf = path->nodes[i];
2350                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2351                         path->slots[i]++;
2352                         *level = i;
2353                         return 0;
2354                 } else {
2355                         free_extent_buffer(path->nodes[*level]);
2356                         path->nodes[*level] = NULL;
2357                         *level = i + 1;
2358                 }
2359         }
2360         return 1;
2361 }
2362
2363 static int check_root_dir(struct inode_record *rec)
2364 {
2365         struct inode_backref *backref;
2366         int ret = -1;
2367
2368         if (!rec->found_inode_item || rec->errors)
2369                 goto out;
2370         if (rec->nlink != 1 || rec->found_link != 0)
2371                 goto out;
2372         if (list_empty(&rec->backrefs))
2373                 goto out;
2374         backref = to_inode_backref(rec->backrefs.next);
2375         if (!backref->found_inode_ref)
2376                 goto out;
2377         if (backref->index != 0 || backref->namelen != 2 ||
2378             memcmp(backref->name, "..", 2))
2379                 goto out;
2380         if (backref->found_dir_index || backref->found_dir_item)
2381                 goto out;
2382         ret = 0;
2383 out:
2384         return ret;
2385 }
2386
2387 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2388                               struct btrfs_root *root, struct btrfs_path *path,
2389                               struct inode_record *rec)
2390 {
2391         struct btrfs_inode_item *ei;
2392         struct btrfs_key key;
2393         int ret;
2394
2395         key.objectid = rec->ino;
2396         key.type = BTRFS_INODE_ITEM_KEY;
2397         key.offset = (u64)-1;
2398
2399         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2400         if (ret < 0)
2401                 goto out;
2402         if (ret) {
2403                 if (!path->slots[0]) {
2404                         ret = -ENOENT;
2405                         goto out;
2406                 }
2407                 path->slots[0]--;
2408                 ret = 0;
2409         }
2410         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2411         if (key.objectid != rec->ino) {
2412                 ret = -ENOENT;
2413                 goto out;
2414         }
2415
2416         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2417                             struct btrfs_inode_item);
2418         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2419         btrfs_mark_buffer_dirty(path->nodes[0]);
2420         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2421         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2422                root->root_key.objectid);
2423 out:
2424         btrfs_release_path(path);
2425         return ret;
2426 }
2427
2428 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2429                                     struct btrfs_root *root,
2430                                     struct btrfs_path *path,
2431                                     struct inode_record *rec)
2432 {
2433         int ret;
2434
2435         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2436         btrfs_release_path(path);
2437         if (!ret)
2438                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2439         return ret;
2440 }
2441
2442 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2443                                struct btrfs_root *root,
2444                                struct btrfs_path *path,
2445                                struct inode_record *rec)
2446 {
2447         struct btrfs_inode_item *ei;
2448         struct btrfs_key key;
2449         int ret = 0;
2450
2451         key.objectid = rec->ino;
2452         key.type = BTRFS_INODE_ITEM_KEY;
2453         key.offset = 0;
2454
2455         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2456         if (ret) {
2457                 if (ret > 0)
2458                         ret = -ENOENT;
2459                 goto out;
2460         }
2461
2462         /* Since ret == 0, no need to check anything */
2463         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2464                             struct btrfs_inode_item);
2465         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2466         btrfs_mark_buffer_dirty(path->nodes[0]);
2467         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2468         printf("reset nbytes for ino %llu root %llu\n",
2469                rec->ino, root->root_key.objectid);
2470 out:
2471         btrfs_release_path(path);
2472         return ret;
2473 }
2474
2475 static int add_missing_dir_index(struct btrfs_root *root,
2476                                  struct cache_tree *inode_cache,
2477                                  struct inode_record *rec,
2478                                  struct inode_backref *backref)
2479 {
2480         struct btrfs_path path;
2481         struct btrfs_trans_handle *trans;
2482         struct btrfs_dir_item *dir_item;
2483         struct extent_buffer *leaf;
2484         struct btrfs_key key;
2485         struct btrfs_disk_key disk_key;
2486         struct inode_record *dir_rec;
2487         unsigned long name_ptr;
2488         u32 data_size = sizeof(*dir_item) + backref->namelen;
2489         int ret;
2490
2491         trans = btrfs_start_transaction(root, 1);
2492         if (IS_ERR(trans))
2493                 return PTR_ERR(trans);
2494
2495         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2496                 (unsigned long long)rec->ino);
2497
2498         btrfs_init_path(&path);
2499         key.objectid = backref->dir;
2500         key.type = BTRFS_DIR_INDEX_KEY;
2501         key.offset = backref->index;
2502         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2503         BUG_ON(ret);
2504
2505         leaf = path.nodes[0];
2506         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2507
2508         disk_key.objectid = cpu_to_le64(rec->ino);
2509         disk_key.type = BTRFS_INODE_ITEM_KEY;
2510         disk_key.offset = 0;
2511
2512         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2513         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2514         btrfs_set_dir_data_len(leaf, dir_item, 0);
2515         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2516         name_ptr = (unsigned long)(dir_item + 1);
2517         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2518         btrfs_mark_buffer_dirty(leaf);
2519         btrfs_release_path(&path);
2520         btrfs_commit_transaction(trans, root);
2521
2522         backref->found_dir_index = 1;
2523         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2524         BUG_ON(IS_ERR(dir_rec));
2525         if (!dir_rec)
2526                 return 0;
2527         dir_rec->found_size += backref->namelen;
2528         if (dir_rec->found_size == dir_rec->isize &&
2529             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2530                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2531         if (dir_rec->found_size != dir_rec->isize)
2532                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2533
2534         return 0;
2535 }
2536
2537 static int delete_dir_index(struct btrfs_root *root,
2538                             struct cache_tree *inode_cache,
2539                             struct inode_record *rec,
2540                             struct inode_backref *backref)
2541 {
2542         struct btrfs_trans_handle *trans;
2543         struct btrfs_dir_item *di;
2544         struct btrfs_path path;
2545         int ret = 0;
2546
2547         trans = btrfs_start_transaction(root, 1);
2548         if (IS_ERR(trans))
2549                 return PTR_ERR(trans);
2550
2551         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2552                 (unsigned long long)backref->dir,
2553                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2554                 (unsigned long long)root->objectid);
2555
2556         btrfs_init_path(&path);
2557         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2558                                     backref->name, backref->namelen,
2559                                     backref->index, -1);
2560         if (IS_ERR(di)) {
2561                 ret = PTR_ERR(di);
2562                 btrfs_release_path(&path);
2563                 btrfs_commit_transaction(trans, root);
2564                 if (ret == -ENOENT)
2565                         return 0;
2566                 return ret;
2567         }
2568
2569         if (!di)
2570                 ret = btrfs_del_item(trans, root, &path);
2571         else
2572                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2573         BUG_ON(ret);
2574         btrfs_release_path(&path);
2575         btrfs_commit_transaction(trans, root);
2576         return ret;
2577 }
2578
2579 static int create_inode_item(struct btrfs_root *root,
2580                              struct inode_record *rec,
2581                              struct inode_backref *backref, int root_dir)
2582 {
2583         struct btrfs_trans_handle *trans;
2584         struct btrfs_inode_item inode_item;
2585         time_t now = time(NULL);
2586         int ret;
2587
2588         trans = btrfs_start_transaction(root, 1);
2589         if (IS_ERR(trans)) {
2590                 ret = PTR_ERR(trans);
2591                 return ret;
2592         }
2593
2594         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2595                 "be incomplete, please check permissions and content after "
2596                 "the fsck completes.\n", (unsigned long long)root->objectid,
2597                 (unsigned long long)rec->ino);
2598
2599         memset(&inode_item, 0, sizeof(inode_item));
2600         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2601         if (root_dir)
2602                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2603         else
2604                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2605         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2606         if (rec->found_dir_item) {
2607                 if (rec->found_file_extent)
2608                         fprintf(stderr, "root %llu inode %llu has both a dir "
2609                                 "item and extents, unsure if it is a dir or a "
2610                                 "regular file so setting it as a directory\n",
2611                                 (unsigned long long)root->objectid,
2612                                 (unsigned long long)rec->ino);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2614                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2615         } else if (!rec->found_dir_item) {
2616                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2617                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2618         }
2619         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2626         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2627
2628         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2629         BUG_ON(ret);
2630         btrfs_commit_transaction(trans, root);
2631         return 0;
2632 }
2633
2634 static int repair_inode_backrefs(struct btrfs_root *root,
2635                                  struct inode_record *rec,
2636                                  struct cache_tree *inode_cache,
2637                                  int delete)
2638 {
2639         struct inode_backref *tmp, *backref;
2640         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2641         int ret = 0;
2642         int repaired = 0;
2643
2644         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2645                 if (!delete && rec->ino == root_dirid) {
2646                         if (!rec->found_inode_item) {
2647                                 ret = create_inode_item(root, rec, backref, 1);
2648                                 if (ret)
2649                                         break;
2650                                 repaired++;
2651                         }
2652                 }
2653
2654                 /* Index 0 for root dir's are special, don't mess with it */
2655                 if (rec->ino == root_dirid && backref->index == 0)
2656                         continue;
2657
2658                 if (delete &&
2659                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2660                      (backref->found_dir_index && backref->found_inode_ref &&
2661                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2662                         ret = delete_dir_index(root, inode_cache, rec, backref);
2663                         if (ret)
2664                                 break;
2665                         repaired++;
2666                         list_del(&backref->list);
2667                         free(backref);
2668                 }
2669
2670                 if (!delete && !backref->found_dir_index &&
2671                     backref->found_dir_item && backref->found_inode_ref) {
2672                         ret = add_missing_dir_index(root, inode_cache, rec,
2673                                                     backref);
2674                         if (ret)
2675                                 break;
2676                         repaired++;
2677                         if (backref->found_dir_item &&
2678                             backref->found_dir_index &&
2679                             backref->found_dir_index) {
2680                                 if (!backref->errors &&
2681                                     backref->found_inode_ref) {
2682                                         list_del(&backref->list);
2683                                         free(backref);
2684                                 }
2685                         }
2686                 }
2687
2688                 if (!delete && (!backref->found_dir_index &&
2689                                 !backref->found_dir_item &&
2690                                 backref->found_inode_ref)) {
2691                         struct btrfs_trans_handle *trans;
2692                         struct btrfs_key location;
2693
2694                         ret = check_dir_conflict(root, backref->name,
2695                                                  backref->namelen,
2696                                                  backref->dir,
2697                                                  backref->index);
2698                         if (ret) {
2699                                 /*
2700                                  * let nlink fixing routine to handle it,
2701                                  * which can do it better.
2702                                  */
2703                                 ret = 0;
2704                                 break;
2705                         }
2706                         location.objectid = rec->ino;
2707                         location.type = BTRFS_INODE_ITEM_KEY;
2708                         location.offset = 0;
2709
2710                         trans = btrfs_start_transaction(root, 1);
2711                         if (IS_ERR(trans)) {
2712                                 ret = PTR_ERR(trans);
2713                                 break;
2714                         }
2715                         fprintf(stderr, "adding missing dir index/item pair "
2716                                 "for inode %llu\n",
2717                                 (unsigned long long)rec->ino);
2718                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2719                                                     backref->namelen,
2720                                                     backref->dir, &location,
2721                                                     imode_to_type(rec->imode),
2722                                                     backref->index);
2723                         BUG_ON(ret);
2724                         btrfs_commit_transaction(trans, root);
2725                         repaired++;
2726                 }
2727
2728                 if (!delete && (backref->found_inode_ref &&
2729                                 backref->found_dir_index &&
2730                                 backref->found_dir_item &&
2731                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2732                                 !rec->found_inode_item)) {
2733                         ret = create_inode_item(root, rec, backref, 0);
2734                         if (ret)
2735                                 break;
2736                         repaired++;
2737                 }
2738
2739         }
2740         return ret ? ret : repaired;
2741 }
2742
2743 /*
2744  * To determine the file type for nlink/inode_item repair
2745  *
2746  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2747  * Return -ENOENT if file type is not found.
2748  */
2749 static int find_file_type(struct inode_record *rec, u8 *type)
2750 {
2751         struct inode_backref *backref;
2752
2753         /* For inode item recovered case */
2754         if (rec->found_inode_item) {
2755                 *type = imode_to_type(rec->imode);
2756                 return 0;
2757         }
2758
2759         list_for_each_entry(backref, &rec->backrefs, list) {
2760                 if (backref->found_dir_index || backref->found_dir_item) {
2761                         *type = backref->filetype;
2762                         return 0;
2763                 }
2764         }
2765         return -ENOENT;
2766 }
2767
2768 /*
2769  * To determine the file name for nlink repair
2770  *
2771  * Return 0 if file name is found, set name and namelen.
2772  * Return -ENOENT if file name is not found.
2773  */
2774 static int find_file_name(struct inode_record *rec,
2775                           char *name, int *namelen)
2776 {
2777         struct inode_backref *backref;
2778
2779         list_for_each_entry(backref, &rec->backrefs, list) {
2780                 if (backref->found_dir_index || backref->found_dir_item ||
2781                     backref->found_inode_ref) {
2782                         memcpy(name, backref->name, backref->namelen);
2783                         *namelen = backref->namelen;
2784                         return 0;
2785                 }
2786         }
2787         return -ENOENT;
2788 }
2789
2790 /* Reset the nlink of the inode to the correct one */
2791 static int reset_nlink(struct btrfs_trans_handle *trans,
2792                        struct btrfs_root *root,
2793                        struct btrfs_path *path,
2794                        struct inode_record *rec)
2795 {
2796         struct inode_backref *backref;
2797         struct inode_backref *tmp;
2798         struct btrfs_key key;
2799         struct btrfs_inode_item *inode_item;
2800         int ret = 0;
2801
2802         /* We don't believe this either, reset it and iterate backref */
2803         rec->found_link = 0;
2804
2805         /* Remove all backref including the valid ones */
2806         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2807                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2808                                    backref->index, backref->name,
2809                                    backref->namelen, 0);
2810                 if (ret < 0)
2811                         goto out;
2812
2813                 /* remove invalid backref, so it won't be added back */
2814                 if (!(backref->found_dir_index &&
2815                       backref->found_dir_item &&
2816                       backref->found_inode_ref)) {
2817                         list_del(&backref->list);
2818                         free(backref);
2819                 } else {
2820                         rec->found_link++;
2821                 }
2822         }
2823
2824         /* Set nlink to 0 */
2825         key.objectid = rec->ino;
2826         key.type = BTRFS_INODE_ITEM_KEY;
2827         key.offset = 0;
2828         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2829         if (ret < 0)
2830                 goto out;
2831         if (ret > 0) {
2832                 ret = -ENOENT;
2833                 goto out;
2834         }
2835         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2836                                     struct btrfs_inode_item);
2837         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2838         btrfs_mark_buffer_dirty(path->nodes[0]);
2839         btrfs_release_path(path);
2840
2841         /*
2842          * Add back valid inode_ref/dir_item/dir_index,
2843          * add_link() will handle the nlink inc, so new nlink must be correct
2844          */
2845         list_for_each_entry(backref, &rec->backrefs, list) {
2846                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2847                                      backref->name, backref->namelen,
2848                                      backref->filetype, &backref->index, 1);
2849                 if (ret < 0)
2850                         goto out;
2851         }
2852 out:
2853         btrfs_release_path(path);
2854         return ret;
2855 }
2856
2857 static int get_highest_inode(struct btrfs_trans_handle *trans,
2858                                 struct btrfs_root *root,
2859                                 struct btrfs_path *path,
2860                                 u64 *highest_ino)
2861 {
2862         struct btrfs_key key, found_key;
2863         int ret;
2864
2865         btrfs_init_path(path);
2866         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2867         key.offset = -1;
2868         key.type = BTRFS_INODE_ITEM_KEY;
2869         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2870         if (ret == 1) {
2871                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2872                                 path->slots[0] - 1);
2873                 *highest_ino = found_key.objectid;
2874                 ret = 0;
2875         }
2876         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2877                 ret = -EOVERFLOW;
2878         btrfs_release_path(path);
2879         return ret;
2880 }
2881
2882 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2883                                struct btrfs_root *root,
2884                                struct btrfs_path *path,
2885                                struct inode_record *rec)
2886 {
2887         char *dir_name = "lost+found";
2888         char namebuf[BTRFS_NAME_LEN] = {0};
2889         u64 lost_found_ino;
2890         u32 mode = 0700;
2891         u8 type = 0;
2892         int namelen = 0;
2893         int name_recovered = 0;
2894         int type_recovered = 0;
2895         int ret = 0;
2896
2897         /*
2898          * Get file name and type first before these invalid inode ref
2899          * are deleted by remove_all_invalid_backref()
2900          */
2901         name_recovered = !find_file_name(rec, namebuf, &namelen);
2902         type_recovered = !find_file_type(rec, &type);
2903
2904         if (!name_recovered) {
2905                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2906                        rec->ino, rec->ino);
2907                 namelen = count_digits(rec->ino);
2908                 sprintf(namebuf, "%llu", rec->ino);
2909                 name_recovered = 1;
2910         }
2911         if (!type_recovered) {
2912                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2913                        rec->ino);
2914                 type = BTRFS_FT_REG_FILE;
2915                 type_recovered = 1;
2916         }
2917
2918         ret = reset_nlink(trans, root, path, rec);
2919         if (ret < 0) {
2920                 fprintf(stderr,
2921                         "Failed to reset nlink for inode %llu: %s\n",
2922                         rec->ino, strerror(-ret));
2923                 goto out;
2924         }
2925
2926         if (rec->found_link == 0) {
2927                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2928                 if (ret < 0)
2929                         goto out;
2930                 lost_found_ino++;
2931                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2932                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2933                                   mode);
2934                 if (ret < 0) {
2935                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2936                                 dir_name, strerror(-ret));
2937                         goto out;
2938                 }
2939                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2940                                      namebuf, namelen, type, NULL, 1);
2941                 /*
2942                  * Add ".INO" suffix several times to handle case where
2943                  * "FILENAME.INO" is already taken by another file.
2944                  */
2945                 while (ret == -EEXIST) {
2946                         /*
2947                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2948                          */
2949                         if (namelen + count_digits(rec->ino) + 1 >
2950                             BTRFS_NAME_LEN) {
2951                                 ret = -EFBIG;
2952                                 goto out;
2953                         }
2954                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2955                                  ".%llu", rec->ino);
2956                         namelen += count_digits(rec->ino) + 1;
2957                         ret = btrfs_add_link(trans, root, rec->ino,
2958                                              lost_found_ino, namebuf,
2959                                              namelen, type, NULL, 1);
2960                 }
2961                 if (ret < 0) {
2962                         fprintf(stderr,
2963                                 "Failed to link the inode %llu to %s dir: %s\n",
2964                                 rec->ino, dir_name, strerror(-ret));
2965                         goto out;
2966                 }
2967                 /*
2968                  * Just increase the found_link, don't actually add the
2969                  * backref. This will make things easier and this inode
2970                  * record will be freed after the repair is done.
2971                  * So fsck will not report problem about this inode.
2972                  */
2973                 rec->found_link++;
2974                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2975                        namelen, namebuf, dir_name);
2976         }
2977         printf("Fixed the nlink of inode %llu\n", rec->ino);
2978 out:
2979         /*
2980          * Clear the flag anyway, or we will loop forever for the same inode
2981          * as it will not be removed from the bad inode list and the dead loop
2982          * happens.
2983          */
2984         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2985         btrfs_release_path(path);
2986         return ret;
2987 }
2988
2989 /*
2990  * Check if there is any normal(reg or prealloc) file extent for given
2991  * ino.
2992  * This is used to determine the file type when neither its dir_index/item or
2993  * inode_item exists.
2994  *
2995  * This will *NOT* report error, if any error happens, just consider it does
2996  * not have any normal file extent.
2997  */
2998 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2999 {
3000         struct btrfs_path path;
3001         struct btrfs_key key;
3002         struct btrfs_key found_key;
3003         struct btrfs_file_extent_item *fi;
3004         u8 type;
3005         int ret = 0;
3006
3007         btrfs_init_path(&path);
3008         key.objectid = ino;
3009         key.type = BTRFS_EXTENT_DATA_KEY;
3010         key.offset = 0;
3011
3012         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3013         if (ret < 0) {
3014                 ret = 0;
3015                 goto out;
3016         }
3017         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3018                 ret = btrfs_next_leaf(root, &path);
3019                 if (ret) {
3020                         ret = 0;
3021                         goto out;
3022                 }
3023         }
3024         while (1) {
3025                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3026                                       path.slots[0]);
3027                 if (found_key.objectid != ino ||
3028                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3029                         break;
3030                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3031                                     struct btrfs_file_extent_item);
3032                 type = btrfs_file_extent_type(path.nodes[0], fi);
3033                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3034                         ret = 1;
3035                         goto out;
3036                 }
3037         }
3038 out:
3039         btrfs_release_path(&path);
3040         return ret;
3041 }
3042
3043 static u32 btrfs_type_to_imode(u8 type)
3044 {
3045         static u32 imode_by_btrfs_type[] = {
3046                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3047                 [BTRFS_FT_DIR]          = S_IFDIR,
3048                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3049                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3050                 [BTRFS_FT_FIFO]         = S_IFIFO,
3051                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3052                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3053         };
3054
3055         return imode_by_btrfs_type[(type)];
3056 }
3057
3058 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3059                                 struct btrfs_root *root,
3060                                 struct btrfs_path *path,
3061                                 struct inode_record *rec)
3062 {
3063         u8 filetype;
3064         u32 mode = 0700;
3065         int type_recovered = 0;
3066         int ret = 0;
3067
3068         printf("Trying to rebuild inode:%llu\n", rec->ino);
3069
3070         type_recovered = !find_file_type(rec, &filetype);
3071
3072         /*
3073          * Try to determine inode type if type not found.
3074          *
3075          * For found regular file extent, it must be FILE.
3076          * For found dir_item/index, it must be DIR.
3077          *
3078          * For undetermined one, use FILE as fallback.
3079          *
3080          * TODO:
3081          * 1. If found backref(inode_index/item is already handled) to it,
3082          *    it must be DIR.
3083          *    Need new inode-inode ref structure to allow search for that.
3084          */
3085         if (!type_recovered) {
3086                 if (rec->found_file_extent &&
3087                     find_normal_file_extent(root, rec->ino)) {
3088                         type_recovered = 1;
3089                         filetype = BTRFS_FT_REG_FILE;
3090                 } else if (rec->found_dir_item) {
3091                         type_recovered = 1;
3092                         filetype = BTRFS_FT_DIR;
3093                 } else if (!list_empty(&rec->orphan_extents)) {
3094                         type_recovered = 1;
3095                         filetype = BTRFS_FT_REG_FILE;
3096                 } else{
3097                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3098                                rec->ino);
3099                         type_recovered = 1;
3100                         filetype = BTRFS_FT_REG_FILE;
3101                 }
3102         }
3103
3104         ret = btrfs_new_inode(trans, root, rec->ino,
3105                               mode | btrfs_type_to_imode(filetype));
3106         if (ret < 0)
3107                 goto out;
3108
3109         /*
3110          * Here inode rebuild is done, we only rebuild the inode item,
3111          * don't repair the nlink(like move to lost+found).
3112          * That is the job of nlink repair.
3113          *
3114          * We just fill the record and return
3115          */
3116         rec->found_dir_item = 1;
3117         rec->imode = mode | btrfs_type_to_imode(filetype);
3118         rec->nlink = 0;
3119         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3120         /* Ensure the inode_nlinks repair function will be called */
3121         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3122 out:
3123         return ret;
3124 }
3125
3126 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3127                                       struct btrfs_root *root,
3128                                       struct btrfs_path *path,
3129                                       struct inode_record *rec)
3130 {
3131         struct orphan_data_extent *orphan;
3132         struct orphan_data_extent *tmp;
3133         int ret = 0;
3134
3135         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3136                 /*
3137                  * Check for conflicting file extents
3138                  *
3139                  * Here we don't know whether the extents is compressed or not,
3140                  * so we can only assume it not compressed nor data offset,
3141                  * and use its disk_len as extent length.
3142                  */
3143                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3144                                        orphan->offset, orphan->disk_len, 0);
3145                 btrfs_release_path(path);
3146                 if (ret < 0)
3147                         goto out;
3148                 if (!ret) {
3149                         fprintf(stderr,
3150                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3151                                 orphan->disk_bytenr, orphan->disk_len);
3152                         ret = btrfs_free_extent(trans,
3153                                         root->fs_info->extent_root,
3154                                         orphan->disk_bytenr, orphan->disk_len,
3155                                         0, root->objectid, orphan->objectid,
3156                                         orphan->offset);
3157                         if (ret < 0)
3158                                 goto out;
3159                 }
3160                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3161                                 orphan->offset, orphan->disk_bytenr,
3162                                 orphan->disk_len, orphan->disk_len);
3163                 if (ret < 0)
3164                         goto out;
3165
3166                 /* Update file size info */
3167                 rec->found_size += orphan->disk_len;
3168                 if (rec->found_size == rec->nbytes)
3169                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3170
3171                 /* Update the file extent hole info too */
3172                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3173                                            orphan->disk_len);
3174                 if (ret < 0)
3175                         goto out;
3176                 if (RB_EMPTY_ROOT(&rec->holes))
3177                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3178
3179                 list_del(&orphan->list);
3180                 free(orphan);
3181         }
3182         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3183 out:
3184         return ret;
3185 }
3186
3187 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3188                                         struct btrfs_root *root,
3189                                         struct btrfs_path *path,
3190                                         struct inode_record *rec)
3191 {
3192         struct rb_node *node;
3193         struct file_extent_hole *hole;
3194         int found = 0;
3195         int ret = 0;
3196
3197         node = rb_first(&rec->holes);
3198
3199         while (node) {
3200                 found = 1;
3201                 hole = rb_entry(node, struct file_extent_hole, node);
3202                 ret = btrfs_punch_hole(trans, root, rec->ino,
3203                                        hole->start, hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 ret = del_file_extent_hole(&rec->holes, hole->start,
3207                                            hole->len);
3208                 if (ret < 0)
3209                         goto out;
3210                 if (RB_EMPTY_ROOT(&rec->holes))
3211                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3212                 node = rb_first(&rec->holes);
3213         }
3214         /* special case for a file losing all its file extent */
3215         if (!found) {
3216                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3217                                        round_up(rec->isize, root->sectorsize));
3218                 if (ret < 0)
3219                         goto out;
3220         }
3221         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3222                rec->ino, root->objectid);
3223 out:
3224         return ret;
3225 }
3226
3227 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3228 {
3229         struct btrfs_trans_handle *trans;
3230         struct btrfs_path path;
3231         int ret = 0;
3232
3233         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3234                              I_ERR_NO_ORPHAN_ITEM |
3235                              I_ERR_LINK_COUNT_WRONG |
3236                              I_ERR_NO_INODE_ITEM |
3237                              I_ERR_FILE_EXTENT_ORPHAN |
3238                              I_ERR_FILE_EXTENT_DISCOUNT|
3239                              I_ERR_FILE_NBYTES_WRONG)))
3240                 return rec->errors;
3241
3242         /*
3243          * For nlink repair, it may create a dir and add link, so
3244          * 2 for parent(256)'s dir_index and dir_item
3245          * 2 for lost+found dir's inode_item and inode_ref
3246          * 1 for the new inode_ref of the file
3247          * 2 for lost+found dir's dir_index and dir_item for the file
3248          */
3249         trans = btrfs_start_transaction(root, 7);
3250         if (IS_ERR(trans))
3251                 return PTR_ERR(trans);
3252
3253         btrfs_init_path(&path);
3254         if (rec->errors & I_ERR_NO_INODE_ITEM)
3255                 ret = repair_inode_no_item(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3257                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3259                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3261                 ret = repair_inode_isize(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3263                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3265                 ret = repair_inode_nlinks(trans, root, &path, rec);
3266         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3267                 ret = repair_inode_nbytes(trans, root, &path, rec);
3268         btrfs_commit_transaction(trans, root);
3269         btrfs_release_path(&path);
3270         return ret;
3271 }
3272
3273 static int check_inode_recs(struct btrfs_root *root,
3274                             struct cache_tree *inode_cache)
3275 {
3276         struct cache_extent *cache;
3277         struct ptr_node *node;
3278         struct inode_record *rec;
3279         struct inode_backref *backref;
3280         int stage = 0;
3281         int ret = 0;
3282         int err = 0;
3283         u64 error = 0;
3284         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3285
3286         if (btrfs_root_refs(&root->root_item) == 0) {
3287                 if (!cache_tree_empty(inode_cache))
3288                         fprintf(stderr, "warning line %d\n", __LINE__);
3289                 return 0;
3290         }
3291
3292         /*
3293          * We need to repair backrefs first because we could change some of the
3294          * errors in the inode recs.
3295          *
3296          * We also need to go through and delete invalid backrefs first and then
3297          * add the correct ones second.  We do this because we may get EEXIST
3298          * when adding back the correct index because we hadn't yet deleted the
3299          * invalid index.
3300          *
3301          * For example, if we were missing a dir index then the directories
3302          * isize would be wrong, so if we fixed the isize to what we thought it
3303          * would be and then fixed the backref we'd still have a invalid fs, so
3304          * we need to add back the dir index and then check to see if the isize
3305          * is still wrong.
3306          */
3307         while (stage < 3) {
3308                 stage++;
3309                 if (stage == 3 && !err)
3310                         break;
3311
3312                 cache = search_cache_extent(inode_cache, 0);
3313                 while (repair && cache) {
3314                         node = container_of(cache, struct ptr_node, cache);
3315                         rec = node->data;
3316                         cache = next_cache_extent(cache);
3317
3318                         /* Need to free everything up and rescan */
3319                         if (stage == 3) {
3320                                 remove_cache_extent(inode_cache, &node->cache);
3321                                 free(node);
3322                                 free_inode_rec(rec);
3323                                 continue;
3324                         }
3325
3326                         if (list_empty(&rec->backrefs))
3327                                 continue;
3328
3329                         ret = repair_inode_backrefs(root, rec, inode_cache,
3330                                                     stage == 1);
3331                         if (ret < 0) {
3332                                 err = ret;
3333                                 stage = 2;
3334                                 break;
3335                         } if (ret > 0) {
3336                                 err = -EAGAIN;
3337                         }
3338                 }
3339         }
3340         if (err)
3341                 return err;
3342
3343         rec = get_inode_rec(inode_cache, root_dirid, 0);
3344         BUG_ON(IS_ERR(rec));
3345         if (rec) {
3346                 ret = check_root_dir(rec);
3347                 if (ret) {
3348                         fprintf(stderr, "root %llu root dir %llu error\n",
3349                                 (unsigned long long)root->root_key.objectid,
3350                                 (unsigned long long)root_dirid);
3351                         print_inode_error(root, rec);
3352                         error++;
3353                 }
3354         } else {
3355                 if (repair) {
3356                         struct btrfs_trans_handle *trans;
3357
3358                         trans = btrfs_start_transaction(root, 1);
3359                         if (IS_ERR(trans)) {
3360                                 err = PTR_ERR(trans);
3361                                 return err;
3362                         }
3363
3364                         fprintf(stderr,
3365                                 "root %llu missing its root dir, recreating\n",
3366                                 (unsigned long long)root->objectid);
3367
3368                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3369                         BUG_ON(ret);
3370
3371                         btrfs_commit_transaction(trans, root);
3372                         return -EAGAIN;
3373                 }
3374
3375                 fprintf(stderr, "root %llu root dir %llu not found\n",
3376                         (unsigned long long)root->root_key.objectid,
3377                         (unsigned long long)root_dirid);
3378         }
3379
3380         while (1) {
3381                 cache = search_cache_extent(inode_cache, 0);
3382                 if (!cache)
3383                         break;
3384                 node = container_of(cache, struct ptr_node, cache);
3385                 rec = node->data;
3386                 remove_cache_extent(inode_cache, &node->cache);
3387                 free(node);
3388                 if (rec->ino == root_dirid ||
3389                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3390                         free_inode_rec(rec);
3391                         continue;
3392                 }
3393
3394                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3395                         ret = check_orphan_item(root, rec->ino);
3396                         if (ret == 0)
3397                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3398                         if (can_free_inode_rec(rec)) {
3399                                 free_inode_rec(rec);
3400                                 continue;
3401                         }
3402                 }
3403
3404                 if (!rec->found_inode_item)
3405                         rec->errors |= I_ERR_NO_INODE_ITEM;
3406                 if (rec->found_link != rec->nlink)
3407                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3408                 if (repair) {
3409                         ret = try_repair_inode(root, rec);
3410                         if (ret == 0 && can_free_inode_rec(rec)) {
3411                                 free_inode_rec(rec);
3412                                 continue;
3413                         }
3414                         ret = 0;
3415                 }
3416
3417                 if (!(repair && ret == 0))
3418                         error++;
3419                 print_inode_error(root, rec);
3420                 list_for_each_entry(backref, &rec->backrefs, list) {
3421                         if (!backref->found_dir_item)
3422                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3423                         if (!backref->found_dir_index)
3424                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3425                         if (!backref->found_inode_ref)
3426                                 backref->errors |= REF_ERR_NO_INODE_REF;
3427                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3428                                 " namelen %u name %s filetype %d errors %x",
3429                                 (unsigned long long)backref->dir,
3430                                 (unsigned long long)backref->index,
3431                                 backref->namelen, backref->name,
3432                                 backref->filetype, backref->errors);
3433                         print_ref_error(backref->errors);
3434                 }
3435                 free_inode_rec(rec);
3436         }
3437         return (error > 0) ? -1 : 0;
3438 }
3439
3440 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3441                                         u64 objectid)
3442 {
3443         struct cache_extent *cache;
3444         struct root_record *rec = NULL;
3445         int ret;
3446
3447         cache = lookup_cache_extent(root_cache, objectid, 1);
3448         if (cache) {
3449                 rec = container_of(cache, struct root_record, cache);
3450         } else {
3451                 rec = calloc(1, sizeof(*rec));
3452                 if (!rec)
3453                         return ERR_PTR(-ENOMEM);
3454                 rec->objectid = objectid;
3455                 INIT_LIST_HEAD(&rec->backrefs);
3456                 rec->cache.start = objectid;
3457                 rec->cache.size = 1;
3458
3459                 ret = insert_cache_extent(root_cache, &rec->cache);
3460                 if (ret)
3461                         return ERR_PTR(-EEXIST);
3462         }
3463         return rec;
3464 }
3465
3466 static struct root_backref *get_root_backref(struct root_record *rec,
3467                                              u64 ref_root, u64 dir, u64 index,
3468                                              const char *name, int namelen)
3469 {
3470         struct root_backref *backref;
3471
3472         list_for_each_entry(backref, &rec->backrefs, list) {
3473                 if (backref->ref_root != ref_root || backref->dir != dir ||
3474                     backref->namelen != namelen)
3475                         continue;
3476                 if (memcmp(name, backref->name, namelen))
3477                         continue;
3478                 return backref;
3479         }
3480
3481         backref = calloc(1, sizeof(*backref) + namelen + 1);
3482         if (!backref)
3483                 return NULL;
3484         backref->ref_root = ref_root;
3485         backref->dir = dir;
3486         backref->index = index;
3487         backref->namelen = namelen;
3488         memcpy(backref->name, name, namelen);
3489         backref->name[namelen] = '\0';
3490         list_add_tail(&backref->list, &rec->backrefs);
3491         return backref;
3492 }
3493
3494 static void free_root_record(struct cache_extent *cache)
3495 {
3496         struct root_record *rec;
3497         struct root_backref *backref;
3498
3499         rec = container_of(cache, struct root_record, cache);
3500         while (!list_empty(&rec->backrefs)) {
3501                 backref = to_root_backref(rec->backrefs.next);
3502                 list_del(&backref->list);
3503                 free(backref);
3504         }
3505
3506         free(rec);
3507 }
3508
3509 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3510
3511 static int add_root_backref(struct cache_tree *root_cache,
3512                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3513                             const char *name, int namelen,
3514                             int item_type, int errors)
3515 {
3516         struct root_record *rec;
3517         struct root_backref *backref;
3518
3519         rec = get_root_rec(root_cache, root_id);
3520         BUG_ON(IS_ERR(rec));
3521         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3522         BUG_ON(!backref);
3523
3524         backref->errors |= errors;
3525
3526         if (item_type != BTRFS_DIR_ITEM_KEY) {
3527                 if (backref->found_dir_index || backref->found_back_ref ||
3528                     backref->found_forward_ref) {
3529                         if (backref->index != index)
3530                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3531                 } else {
3532                         backref->index = index;
3533                 }
3534         }
3535
3536         if (item_type == BTRFS_DIR_ITEM_KEY) {
3537                 if (backref->found_forward_ref)
3538                         rec->found_ref++;
3539                 backref->found_dir_item = 1;
3540         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3541                 backref->found_dir_index = 1;
3542         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3543                 if (backref->found_forward_ref)
3544                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3545                 else if (backref->found_dir_item)
3546                         rec->found_ref++;
3547                 backref->found_forward_ref = 1;
3548         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3549                 if (backref->found_back_ref)
3550                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3551                 backref->found_back_ref = 1;
3552         } else {
3553                 BUG_ON(1);
3554         }
3555
3556         if (backref->found_forward_ref && backref->found_dir_item)
3557                 backref->reachable = 1;
3558         return 0;
3559 }
3560
3561 static int merge_root_recs(struct btrfs_root *root,
3562                            struct cache_tree *src_cache,
3563                            struct cache_tree *dst_cache)
3564 {
3565         struct cache_extent *cache;
3566         struct ptr_node *node;
3567         struct inode_record *rec;
3568         struct inode_backref *backref;
3569         int ret = 0;
3570
3571         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3572                 free_inode_recs_tree(src_cache);
3573                 return 0;
3574         }
3575
3576         while (1) {
3577                 cache = search_cache_extent(src_cache, 0);
3578                 if (!cache)
3579                         break;
3580                 node = container_of(cache, struct ptr_node, cache);
3581                 rec = node->data;
3582                 remove_cache_extent(src_cache, &node->cache);
3583                 free(node);
3584
3585                 ret = is_child_root(root, root->objectid, rec->ino);
3586                 if (ret < 0)
3587                         break;
3588                 else if (ret == 0)
3589                         goto skip;
3590
3591                 list_for_each_entry(backref, &rec->backrefs, list) {
3592                         BUG_ON(backref->found_inode_ref);
3593                         if (backref->found_dir_item)
3594                                 add_root_backref(dst_cache, rec->ino,
3595                                         root->root_key.objectid, backref->dir,
3596                                         backref->index, backref->name,
3597                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3598                                         backref->errors);
3599                         if (backref->found_dir_index)
3600                                 add_root_backref(dst_cache, rec->ino,
3601                                         root->root_key.objectid, backref->dir,
3602                                         backref->index, backref->name,
3603                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3604                                         backref->errors);
3605                 }
3606 skip:
3607                 free_inode_rec(rec);
3608         }
3609         if (ret < 0)
3610                 return ret;
3611         return 0;
3612 }
3613
3614 static int check_root_refs(struct btrfs_root *root,
3615                            struct cache_tree *root_cache)
3616 {
3617         struct root_record *rec;
3618         struct root_record *ref_root;
3619         struct root_backref *backref;
3620         struct cache_extent *cache;
3621         int loop = 1;
3622         int ret;
3623         int error;
3624         int errors = 0;
3625
3626         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3627         BUG_ON(IS_ERR(rec));
3628         rec->found_ref = 1;
3629
3630         /* fixme: this can not detect circular references */
3631         while (loop) {
3632                 loop = 0;
3633                 cache = search_cache_extent(root_cache, 0);
3634                 while (1) {
3635                         if (!cache)
3636                                 break;
3637                         rec = container_of(cache, struct root_record, cache);
3638                         cache = next_cache_extent(cache);
3639
3640                         if (rec->found_ref == 0)
3641                                 continue;
3642
3643                         list_for_each_entry(backref, &rec->backrefs, list) {
3644                                 if (!backref->reachable)
3645                                         continue;
3646
3647                                 ref_root = get_root_rec(root_cache,
3648                                                         backref->ref_root);
3649                                 BUG_ON(IS_ERR(ref_root));
3650                                 if (ref_root->found_ref > 0)
3651                                         continue;
3652
3653                                 backref->reachable = 0;
3654                                 rec->found_ref--;
3655                                 if (rec->found_ref == 0)
3656                                         loop = 1;
3657                         }
3658                 }
3659         }
3660
3661         cache = search_cache_extent(root_cache, 0);
3662         while (1) {
3663                 if (!cache)
3664                         break;
3665                 rec = container_of(cache, struct root_record, cache);
3666                 cache = next_cache_extent(cache);
3667
3668                 if (rec->found_ref == 0 &&
3669                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3670                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3671                         ret = check_orphan_item(root->fs_info->tree_root,
3672                                                 rec->objectid);
3673                         if (ret == 0)
3674                                 continue;
3675
3676                         /*
3677                          * If we don't have a root item then we likely just have
3678                          * a dir item in a snapshot for this root but no actual
3679                          * ref key or anything so it's meaningless.
3680                          */
3681                         if (!rec->found_root_item)
3682                                 continue;
3683                         errors++;
3684                         fprintf(stderr, "fs tree %llu not referenced\n",
3685                                 (unsigned long long)rec->objectid);
3686                 }
3687
3688                 error = 0;
3689                 if (rec->found_ref > 0 && !rec->found_root_item)
3690                         error = 1;
3691                 list_for_each_entry(backref, &rec->backrefs, list) {
3692                         if (!backref->found_dir_item)
3693                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3694                         if (!backref->found_dir_index)
3695                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3696                         if (!backref->found_back_ref)
3697                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3698                         if (!backref->found_forward_ref)
3699                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3700                         if (backref->reachable && backref->errors)
3701                                 error = 1;
3702                 }
3703                 if (!error)
3704                         continue;
3705
3706                 errors++;
3707                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3708                         (unsigned long long)rec->objectid, rec->found_ref,
3709                          rec->found_root_item ? "" : "not found");
3710
3711                 list_for_each_entry(backref, &rec->backrefs, list) {
3712                         if (!backref->reachable)
3713                                 continue;
3714                         if (!backref->errors && rec->found_root_item)
3715                                 continue;
3716                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3717                                 " index %llu namelen %u name %s errors %x\n",
3718                                 (unsigned long long)backref->ref_root,
3719                                 (unsigned long long)backref->dir,
3720                                 (unsigned long long)backref->index,
3721                                 backref->namelen, backref->name,
3722                                 backref->errors);
3723                         print_ref_error(backref->errors);
3724                 }
3725         }
3726         return errors > 0 ? 1 : 0;
3727 }
3728
3729 static int process_root_ref(struct extent_buffer *eb, int slot,
3730                             struct btrfs_key *key,
3731                             struct cache_tree *root_cache)
3732 {
3733         u64 dirid;
3734         u64 index;
3735         u32 len;
3736         u32 name_len;
3737         struct btrfs_root_ref *ref;
3738         char namebuf[BTRFS_NAME_LEN];
3739         int error;
3740
3741         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3742
3743         dirid = btrfs_root_ref_dirid(eb, ref);
3744         index = btrfs_root_ref_sequence(eb, ref);
3745         name_len = btrfs_root_ref_name_len(eb, ref);
3746
3747         if (name_len <= BTRFS_NAME_LEN) {
3748                 len = name_len;
3749                 error = 0;
3750         } else {
3751                 len = BTRFS_NAME_LEN;
3752                 error = REF_ERR_NAME_TOO_LONG;
3753         }
3754         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3755
3756         if (key->type == BTRFS_ROOT_REF_KEY) {
3757                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3758                                  index, namebuf, len, key->type, error);
3759         } else {
3760                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3761                                  index, namebuf, len, key->type, error);
3762         }
3763         return 0;
3764 }
3765
3766 static void free_corrupt_block(struct cache_extent *cache)
3767 {
3768         struct btrfs_corrupt_block *corrupt;
3769
3770         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3771         free(corrupt);
3772 }
3773
3774 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3775
3776 /*
3777  * Repair the btree of the given root.
3778  *
3779  * The fix is to remove the node key in corrupt_blocks cache_tree.
3780  * and rebalance the tree.
3781  * After the fix, the btree should be writeable.
3782  */
3783 static int repair_btree(struct btrfs_root *root,
3784                         struct cache_tree *corrupt_blocks)
3785 {
3786         struct btrfs_trans_handle *trans;
3787         struct btrfs_path path;
3788         struct btrfs_corrupt_block *corrupt;
3789         struct cache_extent *cache;
3790         struct btrfs_key key;
3791         u64 offset;
3792         int level;
3793         int ret = 0;
3794
3795         if (cache_tree_empty(corrupt_blocks))
3796                 return 0;
3797
3798         trans = btrfs_start_transaction(root, 1);
3799         if (IS_ERR(trans)) {
3800                 ret = PTR_ERR(trans);
3801                 fprintf(stderr, "Error starting transaction: %s\n",
3802                         strerror(-ret));
3803                 return ret;
3804         }
3805         btrfs_init_path(&path);
3806         cache = first_cache_extent(corrupt_blocks);
3807         while (cache) {
3808                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3809                                        cache);
3810                 level = corrupt->level;
3811                 path.lowest_level = level;
3812                 key.objectid = corrupt->key.objectid;
3813                 key.type = corrupt->key.type;
3814                 key.offset = corrupt->key.offset;
3815
3816                 /*
3817                  * Here we don't want to do any tree balance, since it may
3818                  * cause a balance with corrupted brother leaf/node,
3819                  * so ins_len set to 0 here.
3820                  * Balance will be done after all corrupt node/leaf is deleted.
3821                  */
3822                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3823                 if (ret < 0)
3824                         goto out;
3825                 offset = btrfs_node_blockptr(path.nodes[level],
3826                                              path.slots[level]);
3827
3828                 /* Remove the ptr */
3829                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3830                 if (ret < 0)
3831                         goto out;
3832                 /*
3833                  * Remove the corresponding extent
3834                  * return value is not concerned.
3835                  */
3836                 btrfs_release_path(&path);
3837                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3838                                         0, root->root_key.objectid,
3839                                         level - 1, 0);
3840                 cache = next_cache_extent(cache);
3841         }
3842
3843         /* Balance the btree using btrfs_search_slot() */
3844         cache = first_cache_extent(corrupt_blocks);
3845         while (cache) {
3846                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3847                                        cache);
3848                 memcpy(&key, &corrupt->key, sizeof(key));
3849                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3850                 if (ret < 0)
3851                         goto out;
3852                 /* return will always >0 since it won't find the item */
3853                 ret = 0;
3854                 btrfs_release_path(&path);
3855                 cache = next_cache_extent(cache);
3856         }
3857 out:
3858         btrfs_commit_transaction(trans, root);
3859         btrfs_release_path(&path);
3860         return ret;
3861 }
3862
3863 static int check_fs_root(struct btrfs_root *root,
3864                          struct cache_tree *root_cache,
3865                          struct walk_control *wc)
3866 {
3867         int ret = 0;
3868         int err = 0;
3869         int wret;
3870         int level;
3871         struct btrfs_path path;
3872         struct shared_node root_node;
3873         struct root_record *rec;
3874         struct btrfs_root_item *root_item = &root->root_item;
3875         struct cache_tree corrupt_blocks;
3876         struct orphan_data_extent *orphan;
3877         struct orphan_data_extent *tmp;
3878         enum btrfs_tree_block_status status;
3879         struct node_refs nrefs;
3880
3881         /*
3882          * Reuse the corrupt_block cache tree to record corrupted tree block
3883          *
3884          * Unlike the usage in extent tree check, here we do it in a per
3885          * fs/subvol tree base.
3886          */
3887         cache_tree_init(&corrupt_blocks);
3888         root->fs_info->corrupt_blocks = &corrupt_blocks;
3889
3890         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3891                 rec = get_root_rec(root_cache, root->root_key.objectid);
3892                 BUG_ON(IS_ERR(rec));
3893                 if (btrfs_root_refs(root_item) > 0)
3894                         rec->found_root_item = 1;
3895         }
3896
3897         btrfs_init_path(&path);
3898         memset(&root_node, 0, sizeof(root_node));
3899         cache_tree_init(&root_node.root_cache);
3900         cache_tree_init(&root_node.inode_cache);
3901         memset(&nrefs, 0, sizeof(nrefs));
3902
3903         /* Move the orphan extent record to corresponding inode_record */
3904         list_for_each_entry_safe(orphan, tmp,
3905                                  &root->orphan_data_extents, list) {
3906                 struct inode_record *inode;
3907
3908                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3909                                       1);
3910                 BUG_ON(IS_ERR(inode));
3911                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3912                 list_move(&orphan->list, &inode->orphan_extents);
3913         }
3914
3915         level = btrfs_header_level(root->node);
3916         memset(wc->nodes, 0, sizeof(wc->nodes));
3917         wc->nodes[level] = &root_node;
3918         wc->active_node = level;
3919         wc->root_level = level;
3920
3921         /* We may not have checked the root block, lets do that now */
3922         if (btrfs_is_leaf(root->node))
3923                 status = btrfs_check_leaf(root, NULL, root->node);
3924         else
3925                 status = btrfs_check_node(root, NULL, root->node);
3926         if (status != BTRFS_TREE_BLOCK_CLEAN)
3927                 return -EIO;
3928
3929         if (btrfs_root_refs(root_item) > 0 ||
3930             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3931                 path.nodes[level] = root->node;
3932                 extent_buffer_get(root->node);
3933                 path.slots[level] = 0;
3934         } else {
3935                 struct btrfs_key key;
3936                 struct btrfs_disk_key found_key;
3937
3938                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3939                 level = root_item->drop_level;
3940                 path.lowest_level = level;
3941                 if (level > btrfs_header_level(root->node) ||
3942                     level >= BTRFS_MAX_LEVEL) {
3943                         error("ignoring invalid drop level: %u", level);
3944                         goto skip_walking;
3945                 }
3946                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3947                 if (wret < 0)
3948                         goto skip_walking;
3949                 btrfs_node_key(path.nodes[level], &found_key,
3950                                 path.slots[level]);
3951                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3952                                         sizeof(found_key)));
3953         }
3954
3955         while (1) {
3956                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3957                 if (wret < 0)
3958                         ret = wret;
3959                 if (wret != 0)
3960                         break;
3961
3962                 wret = walk_up_tree(root, &path, wc, &level);
3963                 if (wret < 0)
3964                         ret = wret;
3965                 if (wret != 0)
3966                         break;
3967         }
3968 skip_walking:
3969         btrfs_release_path(&path);
3970
3971         if (!cache_tree_empty(&corrupt_blocks)) {
3972                 struct cache_extent *cache;
3973                 struct btrfs_corrupt_block *corrupt;
3974
3975                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3976                        root->root_key.objectid);
3977                 cache = first_cache_extent(&corrupt_blocks);
3978                 while (cache) {
3979                         corrupt = container_of(cache,
3980                                                struct btrfs_corrupt_block,
3981                                                cache);
3982                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3983                                cache->start, corrupt->level,
3984                                corrupt->key.objectid, corrupt->key.type,
3985                                corrupt->key.offset);
3986                         cache = next_cache_extent(cache);
3987                 }
3988                 if (repair) {
3989                         printf("Try to repair the btree for root %llu\n",
3990                                root->root_key.objectid);
3991                         ret = repair_btree(root, &corrupt_blocks);
3992                         if (ret < 0)
3993                                 fprintf(stderr, "Failed to repair btree: %s\n",
3994                                         strerror(-ret));
3995                         if (!ret)
3996                                 printf("Btree for root %llu is fixed\n",
3997                                        root->root_key.objectid);
3998                 }
3999         }
4000
4001         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4002         if (err < 0)
4003                 ret = err;
4004
4005         if (root_node.current) {
4006                 root_node.current->checked = 1;
4007                 maybe_free_inode_rec(&root_node.inode_cache,
4008                                 root_node.current);
4009         }
4010
4011         err = check_inode_recs(root, &root_node.inode_cache);
4012         if (!ret)
4013                 ret = err;
4014
4015         free_corrupt_blocks_tree(&corrupt_blocks);
4016         root->fs_info->corrupt_blocks = NULL;
4017         free_orphan_data_extents(&root->orphan_data_extents);
4018         return ret;
4019 }
4020
4021 static int fs_root_objectid(u64 objectid)
4022 {
4023         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4024             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4025                 return 1;
4026         return is_fstree(objectid);
4027 }
4028
4029 static int check_fs_roots(struct btrfs_root *root,
4030                           struct cache_tree *root_cache)
4031 {
4032         struct btrfs_path path;
4033         struct btrfs_key key;
4034         struct walk_control wc;
4035         struct extent_buffer *leaf, *tree_node;
4036         struct btrfs_root *tmp_root;
4037         struct btrfs_root *tree_root = root->fs_info->tree_root;
4038         int ret;
4039         int err = 0;
4040
4041         if (ctx.progress_enabled) {
4042                 ctx.tp = TASK_FS_ROOTS;
4043                 task_start(ctx.info);
4044         }
4045
4046         /*
4047          * Just in case we made any changes to the extent tree that weren't
4048          * reflected into the free space cache yet.
4049          */
4050         if (repair)
4051                 reset_cached_block_groups(root->fs_info);
4052         memset(&wc, 0, sizeof(wc));
4053         cache_tree_init(&wc.shared);
4054         btrfs_init_path(&path);
4055
4056 again:
4057         key.offset = 0;
4058         key.objectid = 0;
4059         key.type = BTRFS_ROOT_ITEM_KEY;
4060         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4061         if (ret < 0) {
4062                 err = 1;
4063                 goto out;
4064         }
4065         tree_node = tree_root->node;
4066         while (1) {
4067                 if (tree_node != tree_root->node) {
4068                         free_root_recs_tree(root_cache);
4069                         btrfs_release_path(&path);
4070                         goto again;
4071                 }
4072                 leaf = path.nodes[0];
4073                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4074                         ret = btrfs_next_leaf(tree_root, &path);
4075                         if (ret) {
4076                                 if (ret < 0)
4077                                         err = 1;
4078                                 break;
4079                         }
4080                         leaf = path.nodes[0];
4081                 }
4082                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4083                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4084                     fs_root_objectid(key.objectid)) {
4085                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4086                                 tmp_root = btrfs_read_fs_root_no_cache(
4087                                                 root->fs_info, &key);
4088                         } else {
4089                                 key.offset = (u64)-1;
4090                                 tmp_root = btrfs_read_fs_root(
4091                                                 root->fs_info, &key);
4092                         }
4093                         if (IS_ERR(tmp_root)) {
4094                                 err = 1;
4095                                 goto next;
4096                         }
4097                         ret = check_fs_root(tmp_root, root_cache, &wc);
4098                         if (ret == -EAGAIN) {
4099                                 free_root_recs_tree(root_cache);
4100                                 btrfs_release_path(&path);
4101                                 goto again;
4102                         }
4103                         if (ret)
4104                                 err = 1;
4105                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4106                                 btrfs_free_fs_root(tmp_root);
4107                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4108                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4109                         process_root_ref(leaf, path.slots[0], &key,
4110                                          root_cache);
4111                 }
4112 next:
4113                 path.slots[0]++;
4114         }
4115 out:
4116         btrfs_release_path(&path);
4117         if (err)
4118                 free_extent_cache_tree(&wc.shared);
4119         if (!cache_tree_empty(&wc.shared))
4120                 fprintf(stderr, "warning line %d\n", __LINE__);
4121
4122         task_stop(ctx.info);
4123
4124         return err;
4125 }
4126
4127 /*
4128  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4129  * INODE_REF/INODE_EXTREF match.
4130  *
4131  * @root:       the root of the fs/file tree
4132  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4133  * @key:        the key of the DIR_ITEM/DIR_INDEX
4134  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4135  *              distinguish root_dir between normal dir/file
4136  * @name:       the name in the INODE_REF/INODE_EXTREF
4137  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4138  * @mode:       the st_mode of INODE_ITEM
4139  *
4140  * Return 0 if no error occurred.
4141  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4142  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4143  * dir/file.
4144  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4145  * not match for normal dir/file.
4146  */
4147 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4148                          struct btrfs_key *key, u64 index, char *name,
4149                          u32 namelen, u32 mode)
4150 {
4151         struct btrfs_path path;
4152         struct extent_buffer *node;
4153         struct btrfs_dir_item *di;
4154         struct btrfs_key location;
4155         char namebuf[BTRFS_NAME_LEN] = {0};
4156         u32 total;
4157         u32 cur = 0;
4158         u32 len;
4159         u32 name_len;
4160         u32 data_len;
4161         u8 filetype;
4162         int slot;
4163         int ret;
4164
4165         btrfs_init_path(&path);
4166         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4167         if (ret < 0) {
4168                 ret = DIR_ITEM_MISSING;
4169                 goto out;
4170         }
4171
4172         /* Process root dir and goto out*/
4173         if (index == 0) {
4174                 if (ret == 0) {
4175                         ret = ROOT_DIR_ERROR;
4176                         error(
4177                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4178                                 root->objectid,
4179                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4180                                         "REF" : "EXTREF",
4181                                 ref_key->objectid, ref_key->offset,
4182                                 key->type == BTRFS_DIR_ITEM_KEY ?
4183                                         "DIR_ITEM" : "DIR_INDEX");
4184                 } else {
4185                         ret = 0;
4186                 }
4187
4188                 goto out;
4189         }
4190
4191         /* Process normal file/dir */
4192         if (ret > 0) {
4193                 ret = DIR_ITEM_MISSING;
4194                 error(
4195                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4196                         root->objectid,
4197                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4198                         ref_key->objectid, ref_key->offset,
4199                         key->type == BTRFS_DIR_ITEM_KEY ?
4200                                 "DIR_ITEM" : "DIR_INDEX",
4201                         key->objectid, key->offset, namelen, name,
4202                         imode_to_type(mode));
4203                 goto out;
4204         }
4205
4206         /* Check whether inode_id/filetype/name match */
4207         node = path.nodes[0];
4208         slot = path.slots[0];
4209         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4210         total = btrfs_item_size_nr(node, slot);
4211         while (cur < total) {
4212                 ret = DIR_ITEM_MISMATCH;
4213                 name_len = btrfs_dir_name_len(node, di);
4214                 data_len = btrfs_dir_data_len(node, di);
4215
4216                 btrfs_dir_item_key_to_cpu(node, di, &location);
4217                 if (location.objectid != ref_key->objectid ||
4218                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4219                     location.offset != 0)
4220                         goto next;
4221
4222                 filetype = btrfs_dir_type(node, di);
4223                 if (imode_to_type(mode) != filetype)
4224                         goto next;
4225
4226                 if (name_len <= BTRFS_NAME_LEN) {
4227                         len = name_len;
4228                 } else {
4229                         len = BTRFS_NAME_LEN;
4230                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4231                         root->objectid,
4232                         key->type == BTRFS_DIR_ITEM_KEY ?
4233                         "DIR_ITEM" : "DIR_INDEX",
4234                         key->objectid, key->offset, name_len);
4235                 }
4236                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4237                 if (len != namelen || strncmp(namebuf, name, len))
4238                         goto next;
4239
4240                 ret = 0;
4241                 goto out;
4242 next:
4243                 len = sizeof(*di) + name_len + data_len;
4244                 di = (struct btrfs_dir_item *)((char *)di + len);
4245                 cur += len;
4246         }
4247         if (ret == DIR_ITEM_MISMATCH)
4248                 error(
4249                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4250                         root->objectid,
4251                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4252                         ref_key->objectid, ref_key->offset,
4253                         key->type == BTRFS_DIR_ITEM_KEY ?
4254                                 "DIR_ITEM" : "DIR_INDEX",
4255                         key->objectid, key->offset, namelen, name,
4256                         imode_to_type(mode));
4257 out:
4258         btrfs_release_path(&path);
4259         return ret;
4260 }
4261
4262 /*
4263  * Traverse the given INODE_REF and call find_dir_item() to find related
4264  * DIR_ITEM/DIR_INDEX.
4265  *
4266  * @root:       the root of the fs/file tree
4267  * @ref_key:    the key of the INODE_REF
4268  * @refs:       the count of INODE_REF
4269  * @mode:       the st_mode of INODE_ITEM
4270  *
4271  * Return 0 if no error occurred.
4272  */
4273 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4274                            struct extent_buffer *node, int slot, u64 *refs,
4275                            int mode)
4276 {
4277         struct btrfs_key key;
4278         struct btrfs_inode_ref *ref;
4279         char namebuf[BTRFS_NAME_LEN] = {0};
4280         u32 total;
4281         u32 cur = 0;
4282         u32 len;
4283         u32 name_len;
4284         u64 index;
4285         int ret, err = 0;
4286
4287         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4288         total = btrfs_item_size_nr(node, slot);
4289
4290 next:
4291         /* Update inode ref count */
4292         (*refs)++;
4293
4294         index = btrfs_inode_ref_index(node, ref);
4295         name_len = btrfs_inode_ref_name_len(node, ref);
4296         if (name_len <= BTRFS_NAME_LEN) {
4297                 len = name_len;
4298         } else {
4299                 len = BTRFS_NAME_LEN;
4300                 warning("root %llu INODE_REF[%llu %llu] name too long",
4301                         root->objectid, ref_key->objectid, ref_key->offset);
4302         }
4303
4304         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4305
4306         /* Check root dir ref name */
4307         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4308                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4309                       root->objectid, ref_key->objectid, ref_key->offset,
4310                       namebuf);
4311                 err |= ROOT_DIR_ERROR;
4312         }
4313
4314         /* Find related DIR_INDEX */
4315         key.objectid = ref_key->offset;
4316         key.type = BTRFS_DIR_INDEX_KEY;
4317         key.offset = index;
4318         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4319         err |= ret;
4320
4321         /* Find related dir_item */
4322         key.objectid = ref_key->offset;
4323         key.type = BTRFS_DIR_ITEM_KEY;
4324         key.offset = btrfs_name_hash(namebuf, len);
4325         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4326         err |= ret;
4327
4328         len = sizeof(*ref) + name_len;
4329         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4330         cur += len;
4331         if (cur < total)
4332                 goto next;
4333
4334         return err;
4335 }
4336
4337 /*
4338  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4339  * DIR_ITEM/DIR_INDEX.
4340  *
4341  * @root:       the root of the fs/file tree
4342  * @ref_key:    the key of the INODE_EXTREF
4343  * @refs:       the count of INODE_EXTREF
4344  * @mode:       the st_mode of INODE_ITEM
4345  *
4346  * Return 0 if no error occurred.
4347  */
4348 static int check_inode_extref(struct btrfs_root *root,
4349                               struct btrfs_key *ref_key,
4350                               struct extent_buffer *node, int slot, u64 *refs,
4351                               int mode)
4352 {
4353         struct btrfs_key key;
4354         struct btrfs_inode_extref *extref;
4355         char namebuf[BTRFS_NAME_LEN] = {0};
4356         u32 total;
4357         u32 cur = 0;
4358         u32 len;
4359         u32 name_len;
4360         u64 index;
4361         u64 parent;
4362         int ret;
4363         int err = 0;
4364
4365         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4366         total = btrfs_item_size_nr(node, slot);
4367
4368 next:
4369         /* update inode ref count */
4370         (*refs)++;
4371         name_len = btrfs_inode_extref_name_len(node, extref);
4372         index = btrfs_inode_extref_index(node, extref);
4373         parent = btrfs_inode_extref_parent(node, extref);
4374         if (name_len <= BTRFS_NAME_LEN) {
4375                 len = name_len;
4376         } else {
4377                 len = BTRFS_NAME_LEN;
4378                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4379                         root->objectid, ref_key->objectid, ref_key->offset);
4380         }
4381         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4382
4383         /* Check root dir ref name */
4384         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4385                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4386                       root->objectid, ref_key->objectid, ref_key->offset,
4387                       namebuf);
4388                 err |= ROOT_DIR_ERROR;
4389         }
4390
4391         /* find related dir_index */
4392         key.objectid = parent;
4393         key.type = BTRFS_DIR_INDEX_KEY;
4394         key.offset = index;
4395         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4396         err |= ret;
4397
4398         /* find related dir_item */
4399         key.objectid = parent;
4400         key.type = BTRFS_DIR_ITEM_KEY;
4401         key.offset = btrfs_name_hash(namebuf, len);
4402         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4403         err |= ret;
4404
4405         len = sizeof(*extref) + name_len;
4406         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4407         cur += len;
4408
4409         if (cur < total)
4410                 goto next;
4411
4412         return err;
4413 }
4414
4415 /*
4416  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4417  * DIR_ITEM/DIR_INDEX match.
4418  *
4419  * @root:       the root of the fs/file tree
4420  * @key:        the key of the INODE_REF/INODE_EXTREF
4421  * @name:       the name in the INODE_REF/INODE_EXTREF
4422  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4423  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4424  * to (u64)-1
4425  * @ext_ref:    the EXTENDED_IREF feature
4426  *
4427  * Return 0 if no error occurred.
4428  * Return >0 for error bitmap
4429  */
4430 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4431                           char *name, int namelen, u64 index,
4432                           unsigned int ext_ref)
4433 {
4434         struct btrfs_path path;
4435         struct btrfs_inode_ref *ref;
4436         struct btrfs_inode_extref *extref;
4437         struct extent_buffer *node;
4438         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4439         u32 total;
4440         u32 cur = 0;
4441         u32 len;
4442         u32 ref_namelen;
4443         u64 ref_index;
4444         u64 parent;
4445         u64 dir_id;
4446         int slot;
4447         int ret;
4448
4449         btrfs_init_path(&path);
4450         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4451         if (ret) {
4452                 ret = INODE_REF_MISSING;
4453                 goto extref;
4454         }
4455
4456         node = path.nodes[0];
4457         slot = path.slots[0];
4458
4459         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4460         total = btrfs_item_size_nr(node, slot);
4461
4462         /* Iterate all entry of INODE_REF */
4463         while (cur < total) {
4464                 ret = INODE_REF_MISSING;
4465
4466                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4467                 ref_index = btrfs_inode_ref_index(node, ref);
4468                 if (index != (u64)-1 && index != ref_index)
4469                         goto next_ref;
4470
4471                 if (ref_namelen <= BTRFS_NAME_LEN) {
4472                         len = ref_namelen;
4473                 } else {
4474                         len = BTRFS_NAME_LEN;
4475                         warning("root %llu INODE %s[%llu %llu] name too long",
4476                                 root->objectid,
4477                                 key->type == BTRFS_INODE_REF_KEY ?
4478                                         "REF" : "EXTREF",
4479                                 key->objectid, key->offset);
4480                 }
4481                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4482                                    len);
4483
4484                 if (len != namelen || strncmp(ref_namebuf, name, len))
4485                         goto next_ref;
4486
4487                 ret = 0;
4488                 goto out;
4489 next_ref:
4490                 len = sizeof(*ref) + ref_namelen;
4491                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4492                 cur += len;
4493         }
4494
4495 extref:
4496         /* Skip if not support EXTENDED_IREF feature */
4497         if (!ext_ref)
4498                 goto out;
4499
4500         btrfs_release_path(&path);
4501         btrfs_init_path(&path);
4502
4503         dir_id = key->offset;
4504         key->type = BTRFS_INODE_EXTREF_KEY;
4505         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4506
4507         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4508         if (ret) {
4509                 ret = INODE_REF_MISSING;
4510                 goto out;
4511         }
4512
4513         node = path.nodes[0];
4514         slot = path.slots[0];
4515
4516         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4517         cur = 0;
4518         total = btrfs_item_size_nr(node, slot);
4519
4520         /* Iterate all entry of INODE_EXTREF */
4521         while (cur < total) {
4522                 ret = INODE_REF_MISSING;
4523
4524                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4525                 ref_index = btrfs_inode_extref_index(node, extref);
4526                 parent = btrfs_inode_extref_parent(node, extref);
4527                 if (index != (u64)-1 && index != ref_index)
4528                         goto next_extref;
4529
4530                 if (parent != dir_id)
4531                         goto next_extref;
4532
4533                 if (ref_namelen <= BTRFS_NAME_LEN) {
4534                         len = ref_namelen;
4535                 } else {
4536                         len = BTRFS_NAME_LEN;
4537                         warning("root %llu INODE %s[%llu %llu] name too long",
4538                                 root->objectid,
4539                                 key->type == BTRFS_INODE_REF_KEY ?
4540                                         "REF" : "EXTREF",
4541                                 key->objectid, key->offset);
4542                 }
4543                 read_extent_buffer(node, ref_namebuf,
4544                                    (unsigned long)(extref + 1), len);
4545
4546                 if (len != namelen || strncmp(ref_namebuf, name, len))
4547                         goto next_extref;
4548
4549                 ret = 0;
4550                 goto out;
4551
4552 next_extref:
4553                 len = sizeof(*extref) + ref_namelen;
4554                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4555                 cur += len;
4556
4557         }
4558 out:
4559         btrfs_release_path(&path);
4560         return ret;
4561 }
4562
4563 /*
4564  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4565  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4566  *
4567  * @root:       the root of the fs/file tree
4568  * @key:        the key of the INODE_REF/INODE_EXTREF
4569  * @size:       the st_size of the INODE_ITEM
4570  * @ext_ref:    the EXTENDED_IREF feature
4571  *
4572  * Return 0 if no error occurred.
4573  */
4574 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4575                           struct extent_buffer *node, int slot, u64 *size,
4576                           unsigned int ext_ref)
4577 {
4578         struct btrfs_dir_item *di;
4579         struct btrfs_inode_item *ii;
4580         struct btrfs_path path;
4581         struct btrfs_key location;
4582         char namebuf[BTRFS_NAME_LEN] = {0};
4583         u32 total;
4584         u32 cur = 0;
4585         u32 len;
4586         u32 name_len;
4587         u32 data_len;
4588         u8 filetype;
4589         u32 mode;
4590         u64 index;
4591         int ret;
4592         int err = 0;
4593
4594         /*
4595          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4596          * ignore index check.
4597          */
4598         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4599
4600         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4601         total = btrfs_item_size_nr(node, slot);
4602
4603         while (cur < total) {
4604                 data_len = btrfs_dir_data_len(node, di);
4605                 if (data_len)
4606                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4607                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4608                               "DIR_ITEM" : "DIR_INDEX",
4609                               key->objectid, key->offset, data_len);
4610
4611                 name_len = btrfs_dir_name_len(node, di);
4612                 if (name_len <= BTRFS_NAME_LEN) {
4613                         len = name_len;
4614                 } else {
4615                         len = BTRFS_NAME_LEN;
4616                         warning("root %llu %s[%llu %llu] name too long",
4617                                 root->objectid,
4618                                 key->type == BTRFS_DIR_ITEM_KEY ?
4619                                 "DIR_ITEM" : "DIR_INDEX",
4620                                 key->objectid, key->offset);
4621                 }
4622                 (*size) += name_len;
4623
4624                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4625                 filetype = btrfs_dir_type(node, di);
4626
4627                 btrfs_init_path(&path);
4628                 btrfs_dir_item_key_to_cpu(node, di, &location);
4629
4630                 /* Ignore related ROOT_ITEM check */
4631                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4632                         goto next;
4633
4634                 /* Check relative INODE_ITEM(existence/filetype) */
4635                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4636                 if (ret) {
4637                         err |= INODE_ITEM_MISSING;
4638                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4639                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4640                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4641                               key->offset, location.objectid, name_len,
4642                               namebuf, filetype);
4643                         goto next;
4644                 }
4645
4646                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4647                                     struct btrfs_inode_item);
4648                 mode = btrfs_inode_mode(path.nodes[0], ii);
4649
4650                 if (imode_to_type(mode) != filetype) {
4651                         err |= INODE_ITEM_MISMATCH;
4652                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4653                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4654                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4655                               key->offset, name_len, namebuf, filetype);
4656                 }
4657
4658                 /* Check relative INODE_REF/INODE_EXTREF */
4659                 location.type = BTRFS_INODE_REF_KEY;
4660                 location.offset = key->objectid;
4661                 ret = find_inode_ref(root, &location, namebuf, len,
4662                                        index, ext_ref);
4663                 err |= ret;
4664                 if (ret & INODE_REF_MISSING)
4665                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4666                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4667                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4668                               key->offset, name_len, namebuf, filetype);
4669
4670 next:
4671                 btrfs_release_path(&path);
4672                 len = sizeof(*di) + name_len + data_len;
4673                 di = (struct btrfs_dir_item *)((char *)di + len);
4674                 cur += len;
4675
4676                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4677                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4678                               root->objectid, key->objectid, key->offset);
4679                         break;
4680                 }
4681         }
4682
4683         return err;
4684 }
4685
4686 /*
4687  * Check file extent datasum/hole, update the size of the file extents,
4688  * check and update the last offset of the file extent.
4689  *
4690  * @root:       the root of fs/file tree.
4691  * @fkey:       the key of the file extent.
4692  * @nodatasum:  INODE_NODATASUM feature.
4693  * @size:       the sum of all EXTENT_DATA items size for this inode.
4694  * @end:        the offset of the last extent.
4695  *
4696  * Return 0 if no error occurred.
4697  */
4698 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4699                              struct extent_buffer *node, int slot,
4700                              unsigned int nodatasum, u64 *size, u64 *end)
4701 {
4702         struct btrfs_file_extent_item *fi;
4703         u64 disk_bytenr;
4704         u64 disk_num_bytes;
4705         u64 extent_num_bytes;
4706         u64 found;
4707         unsigned int extent_type;
4708         unsigned int is_hole;
4709         int ret;
4710         int err = 0;
4711
4712         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4713
4714         extent_type = btrfs_file_extent_type(node, fi);
4715         /* Skip if file extent is inline */
4716         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717                 struct btrfs_item *e = btrfs_item_nr(slot);
4718                 u32 item_inline_len;
4719
4720                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722                 if (extent_num_bytes == 0 ||
4723                     extent_num_bytes != item_inline_len)
4724                         err |= FILE_EXTENT_ERROR;
4725                 *size += extent_num_bytes;
4726                 return err;
4727         }
4728
4729         /* Check extent type */
4730         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4731                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4732                 err |= FILE_EXTENT_ERROR;
4733                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4734                       root->objectid, fkey->objectid, fkey->offset);
4735                 return err;
4736         }
4737
4738         /* Check REG_EXTENT/PREALLOC_EXTENT */
4739         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4740         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4741         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4742         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4743
4744         /* Check EXTENT_DATA datasum */
4745         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4746         if (found > 0 && nodatasum) {
4747                 err |= ODD_CSUM_ITEM;
4748                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4749                       root->objectid, fkey->objectid, fkey->offset);
4750         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4751                    !is_hole &&
4752                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4753                 err |= CSUM_ITEM_MISSING;
4754                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4757                 err |= ODD_CSUM_ITEM;
4758                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4759                       root->objectid, fkey->objectid, fkey->offset);
4760         }
4761
4762         /* Check EXTENT_DATA hole */
4763         if (no_holes && is_hole) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         } else if (!no_holes && *end != fkey->offset) {
4768                 err |= FILE_EXTENT_ERROR;
4769                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4770                       root->objectid, fkey->objectid, fkey->offset);
4771         }
4772
4773         *end += extent_num_bytes;
4774         if (!is_hole)
4775                 *size += extent_num_bytes;
4776
4777         return err;
4778 }
4779
4780 /*
4781  * Check INODE_ITEM and related ITEMs (the same inode number)
4782  * 1. check link count
4783  * 2. check inode ref/extref
4784  * 3. check dir item/index
4785  *
4786  * @ext_ref:    the EXTENDED_IREF feature
4787  *
4788  * Return 0 if no error occurred.
4789  * Return >0 for error or hit the traversal is done(by error bitmap)
4790  */
4791 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4792                             unsigned int ext_ref)
4793 {
4794         struct extent_buffer *node;
4795         struct btrfs_inode_item *ii;
4796         struct btrfs_key key;
4797         u64 inode_id;
4798         u32 mode;
4799         u64 nlink;
4800         u64 nbytes;
4801         u64 isize;
4802         u64 size = 0;
4803         u64 refs = 0;
4804         u64 extent_end = 0;
4805         u64 extent_size = 0;
4806         unsigned int dir;
4807         unsigned int nodatasum;
4808         int slot;
4809         int ret;
4810         int err = 0;
4811
4812         node = path->nodes[0];
4813         slot = path->slots[0];
4814
4815         btrfs_item_key_to_cpu(node, &key, slot);
4816         inode_id = key.objectid;
4817
4818         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4819                 ret = btrfs_next_item(root, path);
4820                 if (ret > 0)
4821                         err |= LAST_ITEM;
4822                 return err;
4823         }
4824
4825         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4826         isize = btrfs_inode_size(node, ii);
4827         nbytes = btrfs_inode_nbytes(node, ii);
4828         mode = btrfs_inode_mode(node, ii);
4829         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4830         nlink = btrfs_inode_nlink(node, ii);
4831         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4832
4833         while (1) {
4834                 ret = btrfs_next_item(root, path);
4835                 if (ret < 0) {
4836                         /* out will fill 'err' rusing current statistics */
4837                         goto out;
4838                 } else if (ret > 0) {
4839                         err |= LAST_ITEM;
4840                         goto out;
4841                 }
4842
4843                 node = path->nodes[0];
4844                 slot = path->slots[0];
4845                 btrfs_item_key_to_cpu(node, &key, slot);
4846                 if (key.objectid != inode_id)
4847                         goto out;
4848
4849                 switch (key.type) {
4850                 case BTRFS_INODE_REF_KEY:
4851                         ret = check_inode_ref(root, &key, node, slot, &refs,
4852                                               mode);
4853                         err |= ret;
4854                         break;
4855                 case BTRFS_INODE_EXTREF_KEY:
4856                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4857                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4858                                         root->objectid, key.objectid,
4859                                         key.offset);
4860                         ret = check_inode_extref(root, &key, node, slot, &refs,
4861                                                  mode);
4862                         err |= ret;
4863                         break;
4864                 case BTRFS_DIR_ITEM_KEY:
4865                 case BTRFS_DIR_INDEX_KEY:
4866                         if (!dir) {
4867                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4868                                         root->objectid, inode_id,
4869                                         imode_to_type(mode), key.objectid,
4870                                         key.offset);
4871                         }
4872                         ret = check_dir_item(root, &key, node, slot, &size,
4873                                              ext_ref);
4874                         err |= ret;
4875                         break;
4876                 case BTRFS_EXTENT_DATA_KEY:
4877                         if (dir) {
4878                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4879                                         root->objectid, inode_id, key.objectid,
4880                                         key.offset);
4881                         }
4882                         ret = check_file_extent(root, &key, node, slot,
4883                                                 nodatasum, &extent_size,
4884                                                 &extent_end);
4885                         err |= ret;
4886                         break;
4887                 case BTRFS_XATTR_ITEM_KEY:
4888                         break;
4889                 default:
4890                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4891                               key.objectid, key.type, key.offset);
4892                 }
4893         }
4894
4895 out:
4896         /* verify INODE_ITEM nlink/isize/nbytes */
4897         if (dir) {
4898                 if (nlink != 1) {
4899                         err |= LINK_COUNT_ERROR;
4900                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4901                               root->objectid, inode_id, nlink);
4902                 }
4903
4904                 /*
4905                  * Just a warning, as dir inode nbytes is just an
4906                  * instructive value.
4907                  */
4908                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4909                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4910                                 root->objectid, inode_id, root->nodesize);
4911                 }
4912
4913                 if (isize != size) {
4914                         err |= ISIZE_ERROR;
4915                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4916                               root->objectid, inode_id, isize, size);
4917                 }
4918         } else {
4919                 if (nlink != refs) {
4920                         err |= LINK_COUNT_ERROR;
4921                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4922                               root->objectid, inode_id, nlink, refs);
4923                 } else if (!nlink) {
4924                         err |= ORPHAN_ITEM;
4925                 }
4926
4927                 if (!nbytes && !no_holes && extent_end < isize) {
4928                         err |= NBYTES_ERROR;
4929                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4930                               root->objectid, inode_id, isize);
4931                 }
4932
4933                 if (nbytes != extent_size) {
4934                         err |= NBYTES_ERROR;
4935                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4936                               root->objectid, inode_id, nbytes, extent_size);
4937                 }
4938         }
4939
4940         return err;
4941 }
4942
4943 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4944 {
4945         struct btrfs_path path;
4946         struct btrfs_key key;
4947         int err = 0;
4948         int ret;
4949
4950         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4951         key.type = BTRFS_INODE_ITEM_KEY;
4952         key.offset = 0;
4953
4954         /* For root being dropped, we don't need to check first inode */
4955         if (btrfs_root_refs(&root->root_item) == 0 &&
4956             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4957             key.objectid)
4958                 return 0;
4959
4960         btrfs_init_path(&path);
4961
4962         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4963         if (ret < 0)
4964                 goto out;
4965         if (ret > 0) {
4966                 ret = 0;
4967                 err |= INODE_ITEM_MISSING;
4968         }
4969
4970         err |= check_inode_item(root, &path, ext_ref);
4971         err &= ~LAST_ITEM;
4972         if (err && !ret)
4973                 ret = -EIO;
4974 out:
4975         btrfs_release_path(&path);
4976         return ret;
4977 }
4978
4979 /*
4980  * Iterate all item on the tree and call check_inode_item() to check.
4981  *
4982  * @root:       the root of the tree to be checked.
4983  * @ext_ref:    the EXTENDED_IREF feature
4984  *
4985  * Return 0 if no error found.
4986  * Return <0 for error.
4987  */
4988 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4989 {
4990         struct btrfs_path path;
4991         struct node_refs nrefs;
4992         struct btrfs_root_item *root_item = &root->root_item;
4993         int ret, wret;
4994         int level;
4995
4996         /*
4997          * We need to manually check the first inode item(256)
4998          * As the following traversal function will only start from
4999          * the first inode item in the leaf, if inode item(256) is missing
5000          * we will just skip it forever.
5001          */
5002         ret = check_fs_first_inode(root, ext_ref);
5003         if (ret < 0)
5004                 return ret;
5005
5006         memset(&nrefs, 0, sizeof(nrefs));
5007         level = btrfs_header_level(root->node);
5008         btrfs_init_path(&path);
5009
5010         if (btrfs_root_refs(root_item) > 0 ||
5011             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5012                 path.nodes[level] = root->node;
5013                 path.slots[level] = 0;
5014                 extent_buffer_get(root->node);
5015         } else {
5016                 struct btrfs_key key;
5017
5018                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5019                 level = root_item->drop_level;
5020                 path.lowest_level = level;
5021                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5022                 if (ret < 0)
5023                         goto out;
5024                 ret = 0;
5025         }
5026
5027         while (1) {
5028                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5029                 if (wret < 0)
5030                         ret = wret;
5031                 if (wret != 0)
5032                         break;
5033
5034                 wret = walk_up_tree_v2(root, &path, &level);
5035                 if (wret < 0)
5036                         ret = wret;
5037                 if (wret != 0)
5038                         break;
5039         }
5040
5041 out:
5042         btrfs_release_path(&path);
5043         return ret;
5044 }
5045
5046 /*
5047  * Find the relative ref for root_ref and root_backref.
5048  *
5049  * @root:       the root of the root tree.
5050  * @ref_key:    the key of the root ref.
5051  *
5052  * Return 0 if no error occurred.
5053  */
5054 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5055                           struct extent_buffer *node, int slot)
5056 {
5057         struct btrfs_path path;
5058         struct btrfs_key key;
5059         struct btrfs_root_ref *ref;
5060         struct btrfs_root_ref *backref;
5061         char ref_name[BTRFS_NAME_LEN] = {0};
5062         char backref_name[BTRFS_NAME_LEN] = {0};
5063         u64 ref_dirid;
5064         u64 ref_seq;
5065         u32 ref_namelen;
5066         u64 backref_dirid;
5067         u64 backref_seq;
5068         u32 backref_namelen;
5069         u32 len;
5070         int ret;
5071         int err = 0;
5072
5073         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5074         ref_dirid = btrfs_root_ref_dirid(node, ref);
5075         ref_seq = btrfs_root_ref_sequence(node, ref);
5076         ref_namelen = btrfs_root_ref_name_len(node, ref);
5077
5078         if (ref_namelen <= BTRFS_NAME_LEN) {
5079                 len = ref_namelen;
5080         } else {
5081                 len = BTRFS_NAME_LEN;
5082                 warning("%s[%llu %llu] ref_name too long",
5083                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5084                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5085                         ref_key->offset);
5086         }
5087         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5088
5089         /* Find relative root_ref */
5090         key.objectid = ref_key->offset;
5091         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5092         key.offset = ref_key->objectid;
5093
5094         btrfs_init_path(&path);
5095         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5096         if (ret) {
5097                 err |= ROOT_REF_MISSING;
5098                 error("%s[%llu %llu] couldn't find relative ref",
5099                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5100                       "ROOT_REF" : "ROOT_BACKREF",
5101                       ref_key->objectid, ref_key->offset);
5102                 goto out;
5103         }
5104
5105         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5106                                  struct btrfs_root_ref);
5107         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5108         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5109         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5110
5111         if (backref_namelen <= BTRFS_NAME_LEN) {
5112                 len = backref_namelen;
5113         } else {
5114                 len = BTRFS_NAME_LEN;
5115                 warning("%s[%llu %llu] ref_name too long",
5116                         key.type == BTRFS_ROOT_REF_KEY ?
5117                         "ROOT_REF" : "ROOT_BACKREF",
5118                         key.objectid, key.offset);
5119         }
5120         read_extent_buffer(path.nodes[0], backref_name,
5121                            (unsigned long)(backref + 1), len);
5122
5123         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5124             ref_namelen != backref_namelen ||
5125             strncmp(ref_name, backref_name, len)) {
5126                 err |= ROOT_REF_MISMATCH;
5127                 error("%s[%llu %llu] mismatch relative ref",
5128                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5129                       "ROOT_REF" : "ROOT_BACKREF",
5130                       ref_key->objectid, ref_key->offset);
5131         }
5132 out:
5133         btrfs_release_path(&path);
5134         return err;
5135 }
5136
5137 /*
5138  * Check all fs/file tree in low_memory mode.
5139  *
5140  * 1. for fs tree root item, call check_fs_root_v2()
5141  * 2. for fs tree root ref/backref, call check_root_ref()
5142  *
5143  * Return 0 if no error occurred.
5144  */
5145 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5146 {
5147         struct btrfs_root *tree_root = fs_info->tree_root;
5148         struct btrfs_root *cur_root = NULL;
5149         struct btrfs_path path;
5150         struct btrfs_key key;
5151         struct extent_buffer *node;
5152         unsigned int ext_ref;
5153         int slot;
5154         int ret;
5155         int err = 0;
5156
5157         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5158
5159         btrfs_init_path(&path);
5160         key.objectid = BTRFS_FS_TREE_OBJECTID;
5161         key.offset = 0;
5162         key.type = BTRFS_ROOT_ITEM_KEY;
5163
5164         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5165         if (ret < 0) {
5166                 err = ret;
5167                 goto out;
5168         } else if (ret > 0) {
5169                 err = -ENOENT;
5170                 goto out;
5171         }
5172
5173         while (1) {
5174                 node = path.nodes[0];
5175                 slot = path.slots[0];
5176                 btrfs_item_key_to_cpu(node, &key, slot);
5177                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5178                         goto out;
5179                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5180                     fs_root_objectid(key.objectid)) {
5181                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5182                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5183                                                                        &key);
5184                         } else {
5185                                 key.offset = (u64)-1;
5186                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5187                         }
5188
5189                         if (IS_ERR(cur_root)) {
5190                                 error("Fail to read fs/subvol tree: %lld",
5191                                       key.objectid);
5192                                 err = -EIO;
5193                                 goto next;
5194                         }
5195
5196                         ret = check_fs_root_v2(cur_root, ext_ref);
5197                         err |= ret;
5198
5199                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5200                                 btrfs_free_fs_root(cur_root);
5201                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5202                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5203                         ret = check_root_ref(tree_root, &key, node, slot);
5204                         err |= ret;
5205                 }
5206 next:
5207                 ret = btrfs_next_item(tree_root, &path);
5208                 if (ret > 0)
5209                         goto out;
5210                 if (ret < 0) {
5211                         err = ret;
5212                         goto out;
5213                 }
5214         }
5215
5216 out:
5217         btrfs_release_path(&path);
5218         return err;
5219 }
5220
5221 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5222 {
5223         struct list_head *cur = rec->backrefs.next;
5224         struct extent_backref *back;
5225         struct tree_backref *tback;
5226         struct data_backref *dback;
5227         u64 found = 0;
5228         int err = 0;
5229
5230         while(cur != &rec->backrefs) {
5231                 back = to_extent_backref(cur);
5232                 cur = cur->next;
5233                 if (!back->found_extent_tree) {
5234                         err = 1;
5235                         if (!print_errs)
5236                                 goto out;
5237                         if (back->is_data) {
5238                                 dback = to_data_backref(back);
5239                                 fprintf(stderr, "Backref %llu %s %llu"
5240                                         " owner %llu offset %llu num_refs %lu"
5241                                         " not found in extent tree\n",
5242                                         (unsigned long long)rec->start,
5243                                         back->full_backref ?
5244                                         "parent" : "root",
5245                                         back->full_backref ?
5246                                         (unsigned long long)dback->parent:
5247                                         (unsigned long long)dback->root,
5248                                         (unsigned long long)dback->owner,
5249                                         (unsigned long long)dback->offset,
5250                                         (unsigned long)dback->num_refs);
5251                         } else {
5252                                 tback = to_tree_backref(back);
5253                                 fprintf(stderr, "Backref %llu parent %llu"
5254                                         " root %llu not found in extent tree\n",
5255                                         (unsigned long long)rec->start,
5256                                         (unsigned long long)tback->parent,
5257                                         (unsigned long long)tback->root);
5258                         }
5259                 }
5260                 if (!back->is_data && !back->found_ref) {
5261                         err = 1;
5262                         if (!print_errs)
5263                                 goto out;
5264                         tback = to_tree_backref(back);
5265                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5266                                 (unsigned long long)rec->start,
5267                                 back->full_backref ? "parent" : "root",
5268                                 back->full_backref ?
5269                                 (unsigned long long)tback->parent :
5270                                 (unsigned long long)tback->root, back);
5271                 }
5272                 if (back->is_data) {
5273                         dback = to_data_backref(back);
5274                         if (dback->found_ref != dback->num_refs) {
5275                                 err = 1;
5276                                 if (!print_errs)
5277                                         goto out;
5278                                 fprintf(stderr, "Incorrect local backref count"
5279                                         " on %llu %s %llu owner %llu"
5280                                         " offset %llu found %u wanted %u back %p\n",
5281                                         (unsigned long long)rec->start,
5282                                         back->full_backref ?
5283                                         "parent" : "root",
5284                                         back->full_backref ?
5285                                         (unsigned long long)dback->parent:
5286                                         (unsigned long long)dback->root,
5287                                         (unsigned long long)dback->owner,
5288                                         (unsigned long long)dback->offset,
5289                                         dback->found_ref, dback->num_refs, back);
5290                         }
5291                         if (dback->disk_bytenr != rec->start) {
5292                                 err = 1;
5293                                 if (!print_errs)
5294                                         goto out;
5295                                 fprintf(stderr, "Backref disk bytenr does not"
5296                                         " match extent record, bytenr=%llu, "
5297                                         "ref bytenr=%llu\n",
5298                                         (unsigned long long)rec->start,
5299                                         (unsigned long long)dback->disk_bytenr);
5300                         }
5301
5302                         if (dback->bytes != rec->nr) {
5303                                 err = 1;
5304                                 if (!print_errs)
5305                                         goto out;
5306                                 fprintf(stderr, "Backref bytes do not match "
5307                                         "extent backref, bytenr=%llu, ref "
5308                                         "bytes=%llu, backref bytes=%llu\n",
5309                                         (unsigned long long)rec->start,
5310                                         (unsigned long long)rec->nr,
5311                                         (unsigned long long)dback->bytes);
5312                         }
5313                 }
5314                 if (!back->is_data) {
5315                         found += 1;
5316                 } else {
5317                         dback = to_data_backref(back);
5318                         found += dback->found_ref;
5319                 }
5320         }
5321         if (found != rec->refs) {
5322                 err = 1;
5323                 if (!print_errs)
5324                         goto out;
5325                 fprintf(stderr, "Incorrect global backref count "
5326                         "on %llu found %llu wanted %llu\n",
5327                         (unsigned long long)rec->start,
5328                         (unsigned long long)found,
5329                         (unsigned long long)rec->refs);
5330         }
5331 out:
5332         return err;
5333 }
5334
5335 static int free_all_extent_backrefs(struct extent_record *rec)
5336 {
5337         struct extent_backref *back;
5338         struct list_head *cur;
5339         while (!list_empty(&rec->backrefs)) {
5340                 cur = rec->backrefs.next;
5341                 back = to_extent_backref(cur);
5342                 list_del(cur);
5343                 free(back);
5344         }
5345         return 0;
5346 }
5347
5348 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5349                                      struct cache_tree *extent_cache)
5350 {
5351         struct cache_extent *cache;
5352         struct extent_record *rec;
5353
5354         while (1) {
5355                 cache = first_cache_extent(extent_cache);
5356                 if (!cache)
5357                         break;
5358                 rec = container_of(cache, struct extent_record, cache);
5359                 remove_cache_extent(extent_cache, cache);
5360                 free_all_extent_backrefs(rec);
5361                 free(rec);
5362         }
5363 }
5364
5365 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5366                                  struct extent_record *rec)
5367 {
5368         if (rec->content_checked && rec->owner_ref_checked &&
5369             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5370             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5371             !rec->bad_full_backref && !rec->crossing_stripes &&
5372             !rec->wrong_chunk_type) {
5373                 remove_cache_extent(extent_cache, &rec->cache);
5374                 free_all_extent_backrefs(rec);
5375                 list_del_init(&rec->list);
5376                 free(rec);
5377         }
5378         return 0;
5379 }
5380
5381 static int check_owner_ref(struct btrfs_root *root,
5382                             struct extent_record *rec,
5383                             struct extent_buffer *buf)
5384 {
5385         struct extent_backref *node;
5386         struct tree_backref *back;
5387         struct btrfs_root *ref_root;
5388         struct btrfs_key key;
5389         struct btrfs_path path;
5390         struct extent_buffer *parent;
5391         int level;
5392         int found = 0;
5393         int ret;
5394
5395         list_for_each_entry(node, &rec->backrefs, list) {
5396                 if (node->is_data)
5397                         continue;
5398                 if (!node->found_ref)
5399                         continue;
5400                 if (node->full_backref)
5401                         continue;
5402                 back = to_tree_backref(node);
5403                 if (btrfs_header_owner(buf) == back->root)
5404                         return 0;
5405         }
5406         BUG_ON(rec->is_root);
5407
5408         /* try to find the block by search corresponding fs tree */
5409         key.objectid = btrfs_header_owner(buf);
5410         key.type = BTRFS_ROOT_ITEM_KEY;
5411         key.offset = (u64)-1;
5412
5413         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5414         if (IS_ERR(ref_root))
5415                 return 1;
5416
5417         level = btrfs_header_level(buf);
5418         if (level == 0)
5419                 btrfs_item_key_to_cpu(buf, &key, 0);
5420         else
5421                 btrfs_node_key_to_cpu(buf, &key, 0);
5422
5423         btrfs_init_path(&path);
5424         path.lowest_level = level + 1;
5425         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5426         if (ret < 0)
5427                 return 0;
5428
5429         parent = path.nodes[level + 1];
5430         if (parent && buf->start == btrfs_node_blockptr(parent,
5431                                                         path.slots[level + 1]))
5432                 found = 1;
5433
5434         btrfs_release_path(&path);
5435         return found ? 0 : 1;
5436 }
5437
5438 static int is_extent_tree_record(struct extent_record *rec)
5439 {
5440         struct list_head *cur = rec->backrefs.next;
5441         struct extent_backref *node;
5442         struct tree_backref *back;
5443         int is_extent = 0;
5444
5445         while(cur != &rec->backrefs) {
5446                 node = to_extent_backref(cur);
5447                 cur = cur->next;
5448                 if (node->is_data)
5449                         return 0;
5450                 back = to_tree_backref(node);
5451                 if (node->full_backref)
5452                         return 0;
5453                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5454                         is_extent = 1;
5455         }
5456         return is_extent;
5457 }
5458
5459
5460 static int record_bad_block_io(struct btrfs_fs_info *info,
5461                                struct cache_tree *extent_cache,
5462                                u64 start, u64 len)
5463 {
5464         struct extent_record *rec;
5465         struct cache_extent *cache;
5466         struct btrfs_key key;
5467
5468         cache = lookup_cache_extent(extent_cache, start, len);
5469         if (!cache)
5470                 return 0;
5471
5472         rec = container_of(cache, struct extent_record, cache);
5473         if (!is_extent_tree_record(rec))
5474                 return 0;
5475
5476         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5477         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5478 }
5479
5480 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5481                        struct extent_buffer *buf, int slot)
5482 {
5483         if (btrfs_header_level(buf)) {
5484                 struct btrfs_key_ptr ptr1, ptr2;
5485
5486                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5487                                    sizeof(struct btrfs_key_ptr));
5488                 read_extent_buffer(buf, &ptr2,
5489                                    btrfs_node_key_ptr_offset(slot + 1),
5490                                    sizeof(struct btrfs_key_ptr));
5491                 write_extent_buffer(buf, &ptr1,
5492                                     btrfs_node_key_ptr_offset(slot + 1),
5493                                     sizeof(struct btrfs_key_ptr));
5494                 write_extent_buffer(buf, &ptr2,
5495                                     btrfs_node_key_ptr_offset(slot),
5496                                     sizeof(struct btrfs_key_ptr));
5497                 if (slot == 0) {
5498                         struct btrfs_disk_key key;
5499                         btrfs_node_key(buf, &key, 0);
5500                         btrfs_fixup_low_keys(root, path, &key,
5501                                              btrfs_header_level(buf) + 1);
5502                 }
5503         } else {
5504                 struct btrfs_item *item1, *item2;
5505                 struct btrfs_key k1, k2;
5506                 char *item1_data, *item2_data;
5507                 u32 item1_offset, item2_offset, item1_size, item2_size;
5508
5509                 item1 = btrfs_item_nr(slot);
5510                 item2 = btrfs_item_nr(slot + 1);
5511                 btrfs_item_key_to_cpu(buf, &k1, slot);
5512                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5513                 item1_offset = btrfs_item_offset(buf, item1);
5514                 item2_offset = btrfs_item_offset(buf, item2);
5515                 item1_size = btrfs_item_size(buf, item1);
5516                 item2_size = btrfs_item_size(buf, item2);
5517
5518                 item1_data = malloc(item1_size);
5519                 if (!item1_data)
5520                         return -ENOMEM;
5521                 item2_data = malloc(item2_size);
5522                 if (!item2_data) {
5523                         free(item1_data);
5524                         return -ENOMEM;
5525                 }
5526
5527                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5528                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5529
5530                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5531                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5532                 free(item1_data);
5533                 free(item2_data);
5534
5535                 btrfs_set_item_offset(buf, item1, item2_offset);
5536                 btrfs_set_item_offset(buf, item2, item1_offset);
5537                 btrfs_set_item_size(buf, item1, item2_size);
5538                 btrfs_set_item_size(buf, item2, item1_size);
5539
5540                 path->slots[0] = slot;
5541                 btrfs_set_item_key_unsafe(root, path, &k2);
5542                 path->slots[0] = slot + 1;
5543                 btrfs_set_item_key_unsafe(root, path, &k1);
5544         }
5545         return 0;
5546 }
5547
5548 static int fix_key_order(struct btrfs_trans_handle *trans,
5549                          struct btrfs_root *root,
5550                          struct btrfs_path *path)
5551 {
5552         struct extent_buffer *buf;
5553         struct btrfs_key k1, k2;
5554         int i;
5555         int level = path->lowest_level;
5556         int ret = -EIO;
5557
5558         buf = path->nodes[level];
5559         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5560                 if (level) {
5561                         btrfs_node_key_to_cpu(buf, &k1, i);
5562                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5563                 } else {
5564                         btrfs_item_key_to_cpu(buf, &k1, i);
5565                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5566                 }
5567                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5568                         continue;
5569                 ret = swap_values(root, path, buf, i);
5570                 if (ret)
5571                         break;
5572                 btrfs_mark_buffer_dirty(buf);
5573                 i = 0;
5574         }
5575         return ret;
5576 }
5577
5578 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5579                              struct btrfs_root *root,
5580                              struct btrfs_path *path,
5581                              struct extent_buffer *buf, int slot)
5582 {
5583         struct btrfs_key key;
5584         int nritems = btrfs_header_nritems(buf);
5585
5586         btrfs_item_key_to_cpu(buf, &key, slot);
5587
5588         /* These are all the keys we can deal with missing. */
5589         if (key.type != BTRFS_DIR_INDEX_KEY &&
5590             key.type != BTRFS_EXTENT_ITEM_KEY &&
5591             key.type != BTRFS_METADATA_ITEM_KEY &&
5592             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5593             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5594                 return -1;
5595
5596         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5597                (unsigned long long)key.objectid, key.type,
5598                (unsigned long long)key.offset, slot, buf->start);
5599         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5600                               btrfs_item_nr_offset(slot + 1),
5601                               sizeof(struct btrfs_item) *
5602                               (nritems - slot - 1));
5603         btrfs_set_header_nritems(buf, nritems - 1);
5604         if (slot == 0) {
5605                 struct btrfs_disk_key disk_key;
5606
5607                 btrfs_item_key(buf, &disk_key, 0);
5608                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5609         }
5610         btrfs_mark_buffer_dirty(buf);
5611         return 0;
5612 }
5613
5614 static int fix_item_offset(struct btrfs_trans_handle *trans,
5615                            struct btrfs_root *root,
5616                            struct btrfs_path *path)
5617 {
5618         struct extent_buffer *buf;
5619         int i;
5620         int ret = 0;
5621
5622         /* We should only get this for leaves */
5623         BUG_ON(path->lowest_level);
5624         buf = path->nodes[0];
5625 again:
5626         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5627                 unsigned int shift = 0, offset;
5628
5629                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5630                     BTRFS_LEAF_DATA_SIZE(root)) {
5631                         if (btrfs_item_end_nr(buf, i) >
5632                             BTRFS_LEAF_DATA_SIZE(root)) {
5633                                 ret = delete_bogus_item(trans, root, path,
5634                                                         buf, i);
5635                                 if (!ret)
5636                                         goto again;
5637                                 fprintf(stderr, "item is off the end of the "
5638                                         "leaf, can't fix\n");
5639                                 ret = -EIO;
5640                                 break;
5641                         }
5642                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5643                                 btrfs_item_end_nr(buf, i);
5644                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5645                            btrfs_item_offset_nr(buf, i - 1)) {
5646                         if (btrfs_item_end_nr(buf, i) >
5647                             btrfs_item_offset_nr(buf, i - 1)) {
5648                                 ret = delete_bogus_item(trans, root, path,
5649                                                         buf, i);
5650                                 if (!ret)
5651                                         goto again;
5652                                 fprintf(stderr, "items overlap, can't fix\n");
5653                                 ret = -EIO;
5654                                 break;
5655                         }
5656                         shift = btrfs_item_offset_nr(buf, i - 1) -
5657                                 btrfs_item_end_nr(buf, i);
5658                 }
5659                 if (!shift)
5660                         continue;
5661
5662                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5663                        i, shift, (unsigned long long)buf->start);
5664                 offset = btrfs_item_offset_nr(buf, i);
5665                 memmove_extent_buffer(buf,
5666                                       btrfs_leaf_data(buf) + offset + shift,
5667                                       btrfs_leaf_data(buf) + offset,
5668                                       btrfs_item_size_nr(buf, i));
5669                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5670                                       offset + shift);
5671                 btrfs_mark_buffer_dirty(buf);
5672         }
5673
5674         /*
5675          * We may have moved things, in which case we want to exit so we don't
5676          * write those changes out.  Once we have proper abort functionality in
5677          * progs this can be changed to something nicer.
5678          */
5679         BUG_ON(ret);
5680         return ret;
5681 }
5682
5683 /*
5684  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5685  * then just return -EIO.
5686  */
5687 static int try_to_fix_bad_block(struct btrfs_root *root,
5688                                 struct extent_buffer *buf,
5689                                 enum btrfs_tree_block_status status)
5690 {
5691         struct btrfs_trans_handle *trans;
5692         struct ulist *roots;
5693         struct ulist_node *node;
5694         struct btrfs_root *search_root;
5695         struct btrfs_path path;
5696         struct ulist_iterator iter;
5697         struct btrfs_key root_key, key;
5698         int ret;
5699
5700         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5701             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5702                 return -EIO;
5703
5704         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5705         if (ret)
5706                 return -EIO;
5707
5708         btrfs_init_path(&path);
5709         ULIST_ITER_INIT(&iter);
5710         while ((node = ulist_next(roots, &iter))) {
5711                 root_key.objectid = node->val;
5712                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5713                 root_key.offset = (u64)-1;
5714
5715                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5716                 if (IS_ERR(root)) {
5717                         ret = -EIO;
5718                         break;
5719                 }
5720
5721
5722                 trans = btrfs_start_transaction(search_root, 0);
5723                 if (IS_ERR(trans)) {
5724                         ret = PTR_ERR(trans);
5725                         break;
5726                 }
5727
5728                 path.lowest_level = btrfs_header_level(buf);
5729                 path.skip_check_block = 1;
5730                 if (path.lowest_level)
5731                         btrfs_node_key_to_cpu(buf, &key, 0);
5732                 else
5733                         btrfs_item_key_to_cpu(buf, &key, 0);
5734                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5735                 if (ret) {
5736                         ret = -EIO;
5737                         btrfs_commit_transaction(trans, search_root);
5738                         break;
5739                 }
5740                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5741                         ret = fix_key_order(trans, search_root, &path);
5742                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5743                         ret = fix_item_offset(trans, search_root, &path);
5744                 if (ret) {
5745                         btrfs_commit_transaction(trans, search_root);
5746                         break;
5747                 }
5748                 btrfs_release_path(&path);
5749                 btrfs_commit_transaction(trans, search_root);
5750         }
5751         ulist_free(roots);
5752         btrfs_release_path(&path);
5753         return ret;
5754 }
5755
5756 static int check_block(struct btrfs_root *root,
5757                        struct cache_tree *extent_cache,
5758                        struct extent_buffer *buf, u64 flags)
5759 {
5760         struct extent_record *rec;
5761         struct cache_extent *cache;
5762         struct btrfs_key key;
5763         enum btrfs_tree_block_status status;
5764         int ret = 0;
5765         int level;
5766
5767         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5768         if (!cache)
5769                 return 1;
5770         rec = container_of(cache, struct extent_record, cache);
5771         rec->generation = btrfs_header_generation(buf);
5772
5773         level = btrfs_header_level(buf);
5774         if (btrfs_header_nritems(buf) > 0) {
5775
5776                 if (level == 0)
5777                         btrfs_item_key_to_cpu(buf, &key, 0);
5778                 else
5779                         btrfs_node_key_to_cpu(buf, &key, 0);
5780
5781                 rec->info_objectid = key.objectid;
5782         }
5783         rec->info_level = level;
5784
5785         if (btrfs_is_leaf(buf))
5786                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5787         else
5788                 status = btrfs_check_node(root, &rec->parent_key, buf);
5789
5790         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5791                 if (repair)
5792                         status = try_to_fix_bad_block(root, buf, status);
5793                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5794                         ret = -EIO;
5795                         fprintf(stderr, "bad block %llu\n",
5796                                 (unsigned long long)buf->start);
5797                 } else {
5798                         /*
5799                          * Signal to callers we need to start the scan over
5800                          * again since we'll have cowed blocks.
5801                          */
5802                         ret = -EAGAIN;
5803                 }
5804         } else {
5805                 rec->content_checked = 1;
5806                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5807                         rec->owner_ref_checked = 1;
5808                 else {
5809                         ret = check_owner_ref(root, rec, buf);
5810                         if (!ret)
5811                                 rec->owner_ref_checked = 1;
5812                 }
5813         }
5814         if (!ret)
5815                 maybe_free_extent_rec(extent_cache, rec);
5816         return ret;
5817 }
5818
5819 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5820                                                 u64 parent, u64 root)
5821 {
5822         struct list_head *cur = rec->backrefs.next;
5823         struct extent_backref *node;
5824         struct tree_backref *back;
5825
5826         while(cur != &rec->backrefs) {
5827                 node = to_extent_backref(cur);
5828                 cur = cur->next;
5829                 if (node->is_data)
5830                         continue;
5831                 back = to_tree_backref(node);
5832                 if (parent > 0) {
5833                         if (!node->full_backref)
5834                                 continue;
5835                         if (parent == back->parent)
5836                                 return back;
5837                 } else {
5838                         if (node->full_backref)
5839                                 continue;
5840                         if (back->root == root)
5841                                 return back;
5842                 }
5843         }
5844         return NULL;
5845 }
5846
5847 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5848                                                 u64 parent, u64 root)
5849 {
5850         struct tree_backref *ref = malloc(sizeof(*ref));
5851
5852         if (!ref)
5853                 return NULL;
5854         memset(&ref->node, 0, sizeof(ref->node));
5855         if (parent > 0) {
5856                 ref->parent = parent;
5857                 ref->node.full_backref = 1;
5858         } else {
5859                 ref->root = root;
5860                 ref->node.full_backref = 0;
5861         }
5862         list_add_tail(&ref->node.list, &rec->backrefs);
5863
5864         return ref;
5865 }
5866
5867 static struct data_backref *find_data_backref(struct extent_record *rec,
5868                                                 u64 parent, u64 root,
5869                                                 u64 owner, u64 offset,
5870                                                 int found_ref,
5871                                                 u64 disk_bytenr, u64 bytes)
5872 {
5873         struct list_head *cur = rec->backrefs.next;
5874         struct extent_backref *node;
5875         struct data_backref *back;
5876
5877         while(cur != &rec->backrefs) {
5878                 node = to_extent_backref(cur);
5879                 cur = cur->next;
5880                 if (!node->is_data)
5881                         continue;
5882                 back = to_data_backref(node);
5883                 if (parent > 0) {
5884                         if (!node->full_backref)
5885                                 continue;
5886                         if (parent == back->parent)
5887                                 return back;
5888                 } else {
5889                         if (node->full_backref)
5890                                 continue;
5891                         if (back->root == root && back->owner == owner &&
5892                             back->offset == offset) {
5893                                 if (found_ref && node->found_ref &&
5894                                     (back->bytes != bytes ||
5895                                     back->disk_bytenr != disk_bytenr))
5896                                         continue;
5897                                 return back;
5898                         }
5899                 }
5900         }
5901         return NULL;
5902 }
5903
5904 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5905                                                 u64 parent, u64 root,
5906                                                 u64 owner, u64 offset,
5907                                                 u64 max_size)
5908 {
5909         struct data_backref *ref = malloc(sizeof(*ref));
5910
5911         if (!ref)
5912                 return NULL;
5913         memset(&ref->node, 0, sizeof(ref->node));
5914         ref->node.is_data = 1;
5915
5916         if (parent > 0) {
5917                 ref->parent = parent;
5918                 ref->owner = 0;
5919                 ref->offset = 0;
5920                 ref->node.full_backref = 1;
5921         } else {
5922                 ref->root = root;
5923                 ref->owner = owner;
5924                 ref->offset = offset;
5925                 ref->node.full_backref = 0;
5926         }
5927         ref->bytes = max_size;
5928         ref->found_ref = 0;
5929         ref->num_refs = 0;
5930         list_add_tail(&ref->node.list, &rec->backrefs);
5931         if (max_size > rec->max_size)
5932                 rec->max_size = max_size;
5933         return ref;
5934 }
5935
5936 /* Check if the type of extent matches with its chunk */
5937 static void check_extent_type(struct extent_record *rec)
5938 {
5939         struct btrfs_block_group_cache *bg_cache;
5940
5941         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5942         if (!bg_cache)
5943                 return;
5944
5945         /* data extent, check chunk directly*/
5946         if (!rec->metadata) {
5947                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5948                         rec->wrong_chunk_type = 1;
5949                 return;
5950         }
5951
5952         /* metadata extent, check the obvious case first */
5953         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5954                                  BTRFS_BLOCK_GROUP_METADATA))) {
5955                 rec->wrong_chunk_type = 1;
5956                 return;
5957         }
5958
5959         /*
5960          * Check SYSTEM extent, as it's also marked as metadata, we can only
5961          * make sure it's a SYSTEM extent by its backref
5962          */
5963         if (!list_empty(&rec->backrefs)) {
5964                 struct extent_backref *node;
5965                 struct tree_backref *tback;
5966                 u64 bg_type;
5967
5968                 node = to_extent_backref(rec->backrefs.next);
5969                 if (node->is_data) {
5970                         /* tree block shouldn't have data backref */
5971                         rec->wrong_chunk_type = 1;
5972                         return;
5973                 }
5974                 tback = container_of(node, struct tree_backref, node);
5975
5976                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5977                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5978                 else
5979                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5980                 if (!(bg_cache->flags & bg_type))
5981                         rec->wrong_chunk_type = 1;
5982         }
5983 }
5984
5985 /*
5986  * Allocate a new extent record, fill default values from @tmpl and insert int
5987  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5988  * the cache, otherwise it fails.
5989  */
5990 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5991                 struct extent_record *tmpl)
5992 {
5993         struct extent_record *rec;
5994         int ret = 0;
5995
5996         rec = malloc(sizeof(*rec));
5997         if (!rec)
5998                 return -ENOMEM;
5999         rec->start = tmpl->start;
6000         rec->max_size = tmpl->max_size;
6001         rec->nr = max(tmpl->nr, tmpl->max_size);
6002         rec->found_rec = tmpl->found_rec;
6003         rec->content_checked = tmpl->content_checked;
6004         rec->owner_ref_checked = tmpl->owner_ref_checked;
6005         rec->num_duplicates = 0;
6006         rec->metadata = tmpl->metadata;
6007         rec->flag_block_full_backref = FLAG_UNSET;
6008         rec->bad_full_backref = 0;
6009         rec->crossing_stripes = 0;
6010         rec->wrong_chunk_type = 0;
6011         rec->is_root = tmpl->is_root;
6012         rec->refs = tmpl->refs;
6013         rec->extent_item_refs = tmpl->extent_item_refs;
6014         rec->parent_generation = tmpl->parent_generation;
6015         INIT_LIST_HEAD(&rec->backrefs);
6016         INIT_LIST_HEAD(&rec->dups);
6017         INIT_LIST_HEAD(&rec->list);
6018         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6019         rec->cache.start = tmpl->start;
6020         rec->cache.size = tmpl->nr;
6021         ret = insert_cache_extent(extent_cache, &rec->cache);
6022         if (ret) {
6023                 free(rec);
6024                 return ret;
6025         }
6026         bytes_used += rec->nr;
6027
6028         if (tmpl->metadata)
6029                 rec->crossing_stripes = check_crossing_stripes(global_info,
6030                                 rec->start, global_info->tree_root->nodesize);
6031         check_extent_type(rec);
6032         return ret;
6033 }
6034
6035 /*
6036  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6037  * some are hints:
6038  * - refs              - if found, increase refs
6039  * - is_root           - if found, set
6040  * - content_checked   - if found, set
6041  * - owner_ref_checked - if found, set
6042  *
6043  * If not found, create a new one, initialize and insert.
6044  */
6045 static int add_extent_rec(struct cache_tree *extent_cache,
6046                 struct extent_record *tmpl)
6047 {
6048         struct extent_record *rec;
6049         struct cache_extent *cache;
6050         int ret = 0;
6051         int dup = 0;
6052
6053         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6054         if (cache) {
6055                 rec = container_of(cache, struct extent_record, cache);
6056                 if (tmpl->refs)
6057                         rec->refs++;
6058                 if (rec->nr == 1)
6059                         rec->nr = max(tmpl->nr, tmpl->max_size);
6060
6061                 /*
6062                  * We need to make sure to reset nr to whatever the extent
6063                  * record says was the real size, this way we can compare it to
6064                  * the backrefs.
6065                  */
6066                 if (tmpl->found_rec) {
6067                         if (tmpl->start != rec->start || rec->found_rec) {
6068                                 struct extent_record *tmp;
6069
6070                                 dup = 1;
6071                                 if (list_empty(&rec->list))
6072                                         list_add_tail(&rec->list,
6073                                                       &duplicate_extents);
6074
6075                                 /*
6076                                  * We have to do this song and dance in case we
6077                                  * find an extent record that falls inside of
6078                                  * our current extent record but does not have
6079                                  * the same objectid.
6080                                  */
6081                                 tmp = malloc(sizeof(*tmp));
6082                                 if (!tmp)
6083                                         return -ENOMEM;
6084                                 tmp->start = tmpl->start;
6085                                 tmp->max_size = tmpl->max_size;
6086                                 tmp->nr = tmpl->nr;
6087                                 tmp->found_rec = 1;
6088                                 tmp->metadata = tmpl->metadata;
6089                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6090                                 INIT_LIST_HEAD(&tmp->list);
6091                                 list_add_tail(&tmp->list, &rec->dups);
6092                                 rec->num_duplicates++;
6093                         } else {
6094                                 rec->nr = tmpl->nr;
6095                                 rec->found_rec = 1;
6096                         }
6097                 }
6098
6099                 if (tmpl->extent_item_refs && !dup) {
6100                         if (rec->extent_item_refs) {
6101                                 fprintf(stderr, "block %llu rec "
6102                                         "extent_item_refs %llu, passed %llu\n",
6103                                         (unsigned long long)tmpl->start,
6104                                         (unsigned long long)
6105                                                         rec->extent_item_refs,
6106                                         (unsigned long long)tmpl->extent_item_refs);
6107                         }
6108                         rec->extent_item_refs = tmpl->extent_item_refs;
6109                 }
6110                 if (tmpl->is_root)
6111                         rec->is_root = 1;
6112                 if (tmpl->content_checked)
6113                         rec->content_checked = 1;
6114                 if (tmpl->owner_ref_checked)
6115                         rec->owner_ref_checked = 1;
6116                 memcpy(&rec->parent_key, &tmpl->parent_key,
6117                                 sizeof(tmpl->parent_key));
6118                 if (tmpl->parent_generation)
6119                         rec->parent_generation = tmpl->parent_generation;
6120                 if (rec->max_size < tmpl->max_size)
6121                         rec->max_size = tmpl->max_size;
6122
6123                 /*
6124                  * A metadata extent can't cross stripe_len boundary, otherwise
6125                  * kernel scrub won't be able to handle it.
6126                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6127                  * it.
6128                  */
6129                 if (tmpl->metadata)
6130                         rec->crossing_stripes = check_crossing_stripes(
6131                                         global_info, rec->start,
6132                                         global_info->tree_root->nodesize);
6133                 check_extent_type(rec);
6134                 maybe_free_extent_rec(extent_cache, rec);
6135                 return ret;
6136         }
6137
6138         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6139
6140         return ret;
6141 }
6142
6143 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6144                             u64 parent, u64 root, int found_ref)
6145 {
6146         struct extent_record *rec;
6147         struct tree_backref *back;
6148         struct cache_extent *cache;
6149         int ret;
6150
6151         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6152         if (!cache) {
6153                 struct extent_record tmpl;
6154
6155                 memset(&tmpl, 0, sizeof(tmpl));
6156                 tmpl.start = bytenr;
6157                 tmpl.nr = 1;
6158                 tmpl.metadata = 1;
6159
6160                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6161                 if (ret)
6162                         return ret;
6163
6164                 /* really a bug in cache_extent implement now */
6165                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6166                 if (!cache)
6167                         return -ENOENT;
6168         }
6169
6170         rec = container_of(cache, struct extent_record, cache);
6171         if (rec->start != bytenr) {
6172                 /*
6173                  * Several cause, from unaligned bytenr to over lapping extents
6174                  */
6175                 return -EEXIST;
6176         }
6177
6178         back = find_tree_backref(rec, parent, root);
6179         if (!back) {
6180                 back = alloc_tree_backref(rec, parent, root);
6181                 if (!back)
6182                         return -ENOMEM;
6183         }
6184
6185         if (found_ref) {
6186                 if (back->node.found_ref) {
6187                         fprintf(stderr, "Extent back ref already exists "
6188                                 "for %llu parent %llu root %llu \n",
6189                                 (unsigned long long)bytenr,
6190                                 (unsigned long long)parent,
6191                                 (unsigned long long)root);
6192                 }
6193                 back->node.found_ref = 1;
6194         } else {
6195                 if (back->node.found_extent_tree) {
6196                         fprintf(stderr, "Extent back ref already exists "
6197                                 "for %llu parent %llu root %llu \n",
6198                                 (unsigned long long)bytenr,
6199                                 (unsigned long long)parent,
6200                                 (unsigned long long)root);
6201                 }
6202                 back->node.found_extent_tree = 1;
6203         }
6204         check_extent_type(rec);
6205         maybe_free_extent_rec(extent_cache, rec);
6206         return 0;
6207 }
6208
6209 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6210                             u64 parent, u64 root, u64 owner, u64 offset,
6211                             u32 num_refs, int found_ref, u64 max_size)
6212 {
6213         struct extent_record *rec;
6214         struct data_backref *back;
6215         struct cache_extent *cache;
6216         int ret;
6217
6218         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6219         if (!cache) {
6220                 struct extent_record tmpl;
6221
6222                 memset(&tmpl, 0, sizeof(tmpl));
6223                 tmpl.start = bytenr;
6224                 tmpl.nr = 1;
6225                 tmpl.max_size = max_size;
6226
6227                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6228                 if (ret)
6229                         return ret;
6230
6231                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6232                 if (!cache)
6233                         abort();
6234         }
6235
6236         rec = container_of(cache, struct extent_record, cache);
6237         if (rec->max_size < max_size)
6238                 rec->max_size = max_size;
6239
6240         /*
6241          * If found_ref is set then max_size is the real size and must match the
6242          * existing refs.  So if we have already found a ref then we need to
6243          * make sure that this ref matches the existing one, otherwise we need
6244          * to add a new backref so we can notice that the backrefs don't match
6245          * and we need to figure out who is telling the truth.  This is to
6246          * account for that awful fsync bug I introduced where we'd end up with
6247          * a btrfs_file_extent_item that would have its length include multiple
6248          * prealloc extents or point inside of a prealloc extent.
6249          */
6250         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6251                                  bytenr, max_size);
6252         if (!back) {
6253                 back = alloc_data_backref(rec, parent, root, owner, offset,
6254                                           max_size);
6255                 BUG_ON(!back);
6256         }
6257
6258         if (found_ref) {
6259                 BUG_ON(num_refs != 1);
6260                 if (back->node.found_ref)
6261                         BUG_ON(back->bytes != max_size);
6262                 back->node.found_ref = 1;
6263                 back->found_ref += 1;
6264                 back->bytes = max_size;
6265                 back->disk_bytenr = bytenr;
6266                 rec->refs += 1;
6267                 rec->content_checked = 1;
6268                 rec->owner_ref_checked = 1;
6269         } else {
6270                 if (back->node.found_extent_tree) {
6271                         fprintf(stderr, "Extent back ref already exists "
6272                                 "for %llu parent %llu root %llu "
6273                                 "owner %llu offset %llu num_refs %lu\n",
6274                                 (unsigned long long)bytenr,
6275                                 (unsigned long long)parent,
6276                                 (unsigned long long)root,
6277                                 (unsigned long long)owner,
6278                                 (unsigned long long)offset,
6279                                 (unsigned long)num_refs);
6280                 }
6281                 back->num_refs = num_refs;
6282                 back->node.found_extent_tree = 1;
6283         }
6284         maybe_free_extent_rec(extent_cache, rec);
6285         return 0;
6286 }
6287
6288 static int add_pending(struct cache_tree *pending,
6289                        struct cache_tree *seen, u64 bytenr, u32 size)
6290 {
6291         int ret;
6292         ret = add_cache_extent(seen, bytenr, size);
6293         if (ret)
6294                 return ret;
6295         add_cache_extent(pending, bytenr, size);
6296         return 0;
6297 }
6298
6299 static int pick_next_pending(struct cache_tree *pending,
6300                         struct cache_tree *reada,
6301                         struct cache_tree *nodes,
6302                         u64 last, struct block_info *bits, int bits_nr,
6303                         int *reada_bits)
6304 {
6305         unsigned long node_start = last;
6306         struct cache_extent *cache;
6307         int ret;
6308
6309         cache = search_cache_extent(reada, 0);
6310         if (cache) {
6311                 bits[0].start = cache->start;
6312                 bits[0].size = cache->size;
6313                 *reada_bits = 1;
6314                 return 1;
6315         }
6316         *reada_bits = 0;
6317         if (node_start > 32768)
6318                 node_start -= 32768;
6319
6320         cache = search_cache_extent(nodes, node_start);
6321         if (!cache)
6322                 cache = search_cache_extent(nodes, 0);
6323
6324         if (!cache) {
6325                  cache = search_cache_extent(pending, 0);
6326                  if (!cache)
6327                          return 0;
6328                  ret = 0;
6329                  do {
6330                          bits[ret].start = cache->start;
6331                          bits[ret].size = cache->size;
6332                          cache = next_cache_extent(cache);
6333                          ret++;
6334                  } while (cache && ret < bits_nr);
6335                  return ret;
6336         }
6337
6338         ret = 0;
6339         do {
6340                 bits[ret].start = cache->start;
6341                 bits[ret].size = cache->size;
6342                 cache = next_cache_extent(cache);
6343                 ret++;
6344         } while (cache && ret < bits_nr);
6345
6346         if (bits_nr - ret > 8) {
6347                 u64 lookup = bits[0].start + bits[0].size;
6348                 struct cache_extent *next;
6349                 next = search_cache_extent(pending, lookup);
6350                 while(next) {
6351                         if (next->start - lookup > 32768)
6352                                 break;
6353                         bits[ret].start = next->start;
6354                         bits[ret].size = next->size;
6355                         lookup = next->start + next->size;
6356                         ret++;
6357                         if (ret == bits_nr)
6358                                 break;
6359                         next = next_cache_extent(next);
6360                         if (!next)
6361                                 break;
6362                 }
6363         }
6364         return ret;
6365 }
6366
6367 static void free_chunk_record(struct cache_extent *cache)
6368 {
6369         struct chunk_record *rec;
6370
6371         rec = container_of(cache, struct chunk_record, cache);
6372         list_del_init(&rec->list);
6373         list_del_init(&rec->dextents);
6374         free(rec);
6375 }
6376
6377 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6378 {
6379         cache_tree_free_extents(chunk_cache, free_chunk_record);
6380 }
6381
6382 static void free_device_record(struct rb_node *node)
6383 {
6384         struct device_record *rec;
6385
6386         rec = container_of(node, struct device_record, node);
6387         free(rec);
6388 }
6389
6390 FREE_RB_BASED_TREE(device_cache, free_device_record);
6391
6392 int insert_block_group_record(struct block_group_tree *tree,
6393                               struct block_group_record *bg_rec)
6394 {
6395         int ret;
6396
6397         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6398         if (ret)
6399                 return ret;
6400
6401         list_add_tail(&bg_rec->list, &tree->block_groups);
6402         return 0;
6403 }
6404
6405 static void free_block_group_record(struct cache_extent *cache)
6406 {
6407         struct block_group_record *rec;
6408
6409         rec = container_of(cache, struct block_group_record, cache);
6410         list_del_init(&rec->list);
6411         free(rec);
6412 }
6413
6414 void free_block_group_tree(struct block_group_tree *tree)
6415 {
6416         cache_tree_free_extents(&tree->tree, free_block_group_record);
6417 }
6418
6419 int insert_device_extent_record(struct device_extent_tree *tree,
6420                                 struct device_extent_record *de_rec)
6421 {
6422         int ret;
6423
6424         /*
6425          * Device extent is a bit different from the other extents, because
6426          * the extents which belong to the different devices may have the
6427          * same start and size, so we need use the special extent cache
6428          * search/insert functions.
6429          */
6430         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6431         if (ret)
6432                 return ret;
6433
6434         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6435         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6436         return 0;
6437 }
6438
6439 static void free_device_extent_record(struct cache_extent *cache)
6440 {
6441         struct device_extent_record *rec;
6442
6443         rec = container_of(cache, struct device_extent_record, cache);
6444         if (!list_empty(&rec->chunk_list))
6445                 list_del_init(&rec->chunk_list);
6446         if (!list_empty(&rec->device_list))
6447                 list_del_init(&rec->device_list);
6448         free(rec);
6449 }
6450
6451 void free_device_extent_tree(struct device_extent_tree *tree)
6452 {
6453         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6454 }
6455
6456 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6457 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6458                                  struct extent_buffer *leaf, int slot)
6459 {
6460         struct btrfs_extent_ref_v0 *ref0;
6461         struct btrfs_key key;
6462         int ret;
6463
6464         btrfs_item_key_to_cpu(leaf, &key, slot);
6465         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6466         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6467                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6468                                 0, 0);
6469         } else {
6470                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6471                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6472         }
6473         return ret;
6474 }
6475 #endif
6476
6477 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6478                                             struct btrfs_key *key,
6479                                             int slot)
6480 {
6481         struct btrfs_chunk *ptr;
6482         struct chunk_record *rec;
6483         int num_stripes, i;
6484
6485         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6486         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6487
6488         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6489         if (!rec) {
6490                 fprintf(stderr, "memory allocation failed\n");
6491                 exit(-1);
6492         }
6493
6494         INIT_LIST_HEAD(&rec->list);
6495         INIT_LIST_HEAD(&rec->dextents);
6496         rec->bg_rec = NULL;
6497
6498         rec->cache.start = key->offset;
6499         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6500
6501         rec->generation = btrfs_header_generation(leaf);
6502
6503         rec->objectid = key->objectid;
6504         rec->type = key->type;
6505         rec->offset = key->offset;
6506
6507         rec->length = rec->cache.size;
6508         rec->owner = btrfs_chunk_owner(leaf, ptr);
6509         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6510         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6511         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6512         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6513         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6514         rec->num_stripes = num_stripes;
6515         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6516
6517         for (i = 0; i < rec->num_stripes; ++i) {
6518                 rec->stripes[i].devid =
6519                         btrfs_stripe_devid_nr(leaf, ptr, i);
6520                 rec->stripes[i].offset =
6521                         btrfs_stripe_offset_nr(leaf, ptr, i);
6522                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6523                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6524                                 BTRFS_UUID_SIZE);
6525         }
6526
6527         return rec;
6528 }
6529
6530 static int process_chunk_item(struct cache_tree *chunk_cache,
6531                               struct btrfs_key *key, struct extent_buffer *eb,
6532                               int slot)
6533 {
6534         struct chunk_record *rec;
6535         struct btrfs_chunk *chunk;
6536         int ret = 0;
6537
6538         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6539         /*
6540          * Do extra check for this chunk item,
6541          *
6542          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6543          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6544          * and owner<->key_type check.
6545          */
6546         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6547                                       key->offset);
6548         if (ret < 0) {
6549                 error("chunk(%llu, %llu) is not valid, ignore it",
6550                       key->offset, btrfs_chunk_length(eb, chunk));
6551                 return 0;
6552         }
6553         rec = btrfs_new_chunk_record(eb, key, slot);
6554         ret = insert_cache_extent(chunk_cache, &rec->cache);
6555         if (ret) {
6556                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6557                         rec->offset, rec->length);
6558                 free(rec);
6559         }
6560
6561         return ret;
6562 }
6563
6564 static int process_device_item(struct rb_root *dev_cache,
6565                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6566 {
6567         struct btrfs_dev_item *ptr;
6568         struct device_record *rec;
6569         int ret = 0;
6570
6571         ptr = btrfs_item_ptr(eb,
6572                 slot, struct btrfs_dev_item);
6573
6574         rec = malloc(sizeof(*rec));
6575         if (!rec) {
6576                 fprintf(stderr, "memory allocation failed\n");
6577                 return -ENOMEM;
6578         }
6579
6580         rec->devid = key->offset;
6581         rec->generation = btrfs_header_generation(eb);
6582
6583         rec->objectid = key->objectid;
6584         rec->type = key->type;
6585         rec->offset = key->offset;
6586
6587         rec->devid = btrfs_device_id(eb, ptr);
6588         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6589         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6590
6591         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6592         if (ret) {
6593                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6594                 free(rec);
6595         }
6596
6597         return ret;
6598 }
6599
6600 struct block_group_record *
6601 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6602                              int slot)
6603 {
6604         struct btrfs_block_group_item *ptr;
6605         struct block_group_record *rec;
6606
6607         rec = calloc(1, sizeof(*rec));
6608         if (!rec) {
6609                 fprintf(stderr, "memory allocation failed\n");
6610                 exit(-1);
6611         }
6612
6613         rec->cache.start = key->objectid;
6614         rec->cache.size = key->offset;
6615
6616         rec->generation = btrfs_header_generation(leaf);
6617
6618         rec->objectid = key->objectid;
6619         rec->type = key->type;
6620         rec->offset = key->offset;
6621
6622         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6623         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6624
6625         INIT_LIST_HEAD(&rec->list);
6626
6627         return rec;
6628 }
6629
6630 static int process_block_group_item(struct block_group_tree *block_group_cache,
6631                                     struct btrfs_key *key,
6632                                     struct extent_buffer *eb, int slot)
6633 {
6634         struct block_group_record *rec;
6635         int ret = 0;
6636
6637         rec = btrfs_new_block_group_record(eb, key, slot);
6638         ret = insert_block_group_record(block_group_cache, rec);
6639         if (ret) {
6640                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6641                         rec->objectid, rec->offset);
6642                 free(rec);
6643         }
6644
6645         return ret;
6646 }
6647
6648 struct device_extent_record *
6649 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6650                                struct btrfs_key *key, int slot)
6651 {
6652         struct device_extent_record *rec;
6653         struct btrfs_dev_extent *ptr;
6654
6655         rec = calloc(1, sizeof(*rec));
6656         if (!rec) {
6657                 fprintf(stderr, "memory allocation failed\n");
6658                 exit(-1);
6659         }
6660
6661         rec->cache.objectid = key->objectid;
6662         rec->cache.start = key->offset;
6663
6664         rec->generation = btrfs_header_generation(leaf);
6665
6666         rec->objectid = key->objectid;
6667         rec->type = key->type;
6668         rec->offset = key->offset;
6669
6670         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6671         rec->chunk_objecteid =
6672                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6673         rec->chunk_offset =
6674                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6675         rec->length = btrfs_dev_extent_length(leaf, ptr);
6676         rec->cache.size = rec->length;
6677
6678         INIT_LIST_HEAD(&rec->chunk_list);
6679         INIT_LIST_HEAD(&rec->device_list);
6680
6681         return rec;
6682 }
6683
6684 static int
6685 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6686                            struct btrfs_key *key, struct extent_buffer *eb,
6687                            int slot)
6688 {
6689         struct device_extent_record *rec;
6690         int ret;
6691
6692         rec = btrfs_new_device_extent_record(eb, key, slot);
6693         ret = insert_device_extent_record(dev_extent_cache, rec);
6694         if (ret) {
6695                 fprintf(stderr,
6696                         "Device extent[%llu, %llu, %llu] existed.\n",
6697                         rec->objectid, rec->offset, rec->length);
6698                 free(rec);
6699         }
6700
6701         return ret;
6702 }
6703
6704 static int process_extent_item(struct btrfs_root *root,
6705                                struct cache_tree *extent_cache,
6706                                struct extent_buffer *eb, int slot)
6707 {
6708         struct btrfs_extent_item *ei;
6709         struct btrfs_extent_inline_ref *iref;
6710         struct btrfs_extent_data_ref *dref;
6711         struct btrfs_shared_data_ref *sref;
6712         struct btrfs_key key;
6713         struct extent_record tmpl;
6714         unsigned long end;
6715         unsigned long ptr;
6716         int ret;
6717         int type;
6718         u32 item_size = btrfs_item_size_nr(eb, slot);
6719         u64 refs = 0;
6720         u64 offset;
6721         u64 num_bytes;
6722         int metadata = 0;
6723
6724         btrfs_item_key_to_cpu(eb, &key, slot);
6725
6726         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6727                 metadata = 1;
6728                 num_bytes = root->nodesize;
6729         } else {
6730                 num_bytes = key.offset;
6731         }
6732
6733         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6734                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6735                       key.objectid, root->sectorsize);
6736                 return -EIO;
6737         }
6738         if (item_size < sizeof(*ei)) {
6739 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6740                 struct btrfs_extent_item_v0 *ei0;
6741                 BUG_ON(item_size != sizeof(*ei0));
6742                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6743                 refs = btrfs_extent_refs_v0(eb, ei0);
6744 #else
6745                 BUG();
6746 #endif
6747                 memset(&tmpl, 0, sizeof(tmpl));
6748                 tmpl.start = key.objectid;
6749                 tmpl.nr = num_bytes;
6750                 tmpl.extent_item_refs = refs;
6751                 tmpl.metadata = metadata;
6752                 tmpl.found_rec = 1;
6753                 tmpl.max_size = num_bytes;
6754
6755                 return add_extent_rec(extent_cache, &tmpl);
6756         }
6757
6758         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6759         refs = btrfs_extent_refs(eb, ei);
6760         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6761                 metadata = 1;
6762         else
6763                 metadata = 0;
6764         if (metadata && num_bytes != root->nodesize) {
6765                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6766                       num_bytes, root->nodesize);
6767                 return -EIO;
6768         }
6769         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6770                 error("ignore invalid data extent, length %llu is not aligned to %u",
6771                       num_bytes, root->sectorsize);
6772                 return -EIO;
6773         }
6774
6775         memset(&tmpl, 0, sizeof(tmpl));
6776         tmpl.start = key.objectid;
6777         tmpl.nr = num_bytes;
6778         tmpl.extent_item_refs = refs;
6779         tmpl.metadata = metadata;
6780         tmpl.found_rec = 1;
6781         tmpl.max_size = num_bytes;
6782         add_extent_rec(extent_cache, &tmpl);
6783
6784         ptr = (unsigned long)(ei + 1);
6785         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6786             key.type == BTRFS_EXTENT_ITEM_KEY)
6787                 ptr += sizeof(struct btrfs_tree_block_info);
6788
6789         end = (unsigned long)ei + item_size;
6790         while (ptr < end) {
6791                 iref = (struct btrfs_extent_inline_ref *)ptr;
6792                 type = btrfs_extent_inline_ref_type(eb, iref);
6793                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6794                 switch (type) {
6795                 case BTRFS_TREE_BLOCK_REF_KEY:
6796                         ret = add_tree_backref(extent_cache, key.objectid,
6797                                         0, offset, 0);
6798                         if (ret < 0)
6799                                 error("add_tree_backref failed: %s",
6800                                       strerror(-ret));
6801                         break;
6802                 case BTRFS_SHARED_BLOCK_REF_KEY:
6803                         ret = add_tree_backref(extent_cache, key.objectid,
6804                                         offset, 0, 0);
6805                         if (ret < 0)
6806                                 error("add_tree_backref failed: %s",
6807                                       strerror(-ret));
6808                         break;
6809                 case BTRFS_EXTENT_DATA_REF_KEY:
6810                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6811                         add_data_backref(extent_cache, key.objectid, 0,
6812                                         btrfs_extent_data_ref_root(eb, dref),
6813                                         btrfs_extent_data_ref_objectid(eb,
6814                                                                        dref),
6815                                         btrfs_extent_data_ref_offset(eb, dref),
6816                                         btrfs_extent_data_ref_count(eb, dref),
6817                                         0, num_bytes);
6818                         break;
6819                 case BTRFS_SHARED_DATA_REF_KEY:
6820                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6821                         add_data_backref(extent_cache, key.objectid, offset,
6822                                         0, 0, 0,
6823                                         btrfs_shared_data_ref_count(eb, sref),
6824                                         0, num_bytes);
6825                         break;
6826                 default:
6827                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6828                                 key.objectid, key.type, num_bytes);
6829                         goto out;
6830                 }
6831                 ptr += btrfs_extent_inline_ref_size(type);
6832         }
6833         WARN_ON(ptr > end);
6834 out:
6835         return 0;
6836 }
6837
6838 static int check_cache_range(struct btrfs_root *root,
6839                              struct btrfs_block_group_cache *cache,
6840                              u64 offset, u64 bytes)
6841 {
6842         struct btrfs_free_space *entry;
6843         u64 *logical;
6844         u64 bytenr;
6845         int stripe_len;
6846         int i, nr, ret;
6847
6848         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6849                 bytenr = btrfs_sb_offset(i);
6850                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6851                                        cache->key.objectid, bytenr, 0,
6852                                        &logical, &nr, &stripe_len);
6853                 if (ret)
6854                         return ret;
6855
6856                 while (nr--) {
6857                         if (logical[nr] + stripe_len <= offset)
6858                                 continue;
6859                         if (offset + bytes <= logical[nr])
6860                                 continue;
6861                         if (logical[nr] == offset) {
6862                                 if (stripe_len >= bytes) {
6863                                         free(logical);
6864                                         return 0;
6865                                 }
6866                                 bytes -= stripe_len;
6867                                 offset += stripe_len;
6868                         } else if (logical[nr] < offset) {
6869                                 if (logical[nr] + stripe_len >=
6870                                     offset + bytes) {
6871                                         free(logical);
6872                                         return 0;
6873                                 }
6874                                 bytes = (offset + bytes) -
6875                                         (logical[nr] + stripe_len);
6876                                 offset = logical[nr] + stripe_len;
6877                         } else {
6878                                 /*
6879                                  * Could be tricky, the super may land in the
6880                                  * middle of the area we're checking.  First
6881                                  * check the easiest case, it's at the end.
6882                                  */
6883                                 if (logical[nr] + stripe_len >=
6884                                     bytes + offset) {
6885                                         bytes = logical[nr] - offset;
6886                                         continue;
6887                                 }
6888
6889                                 /* Check the left side */
6890                                 ret = check_cache_range(root, cache,
6891                                                         offset,
6892                                                         logical[nr] - offset);
6893                                 if (ret) {
6894                                         free(logical);
6895                                         return ret;
6896                                 }
6897
6898                                 /* Now we continue with the right side */
6899                                 bytes = (offset + bytes) -
6900                                         (logical[nr] + stripe_len);
6901                                 offset = logical[nr] + stripe_len;
6902                         }
6903                 }
6904
6905                 free(logical);
6906         }
6907
6908         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6909         if (!entry) {
6910                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6911                         offset, offset+bytes);
6912                 return -EINVAL;
6913         }
6914
6915         if (entry->offset != offset) {
6916                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6917                         entry->offset);
6918                 return -EINVAL;
6919         }
6920
6921         if (entry->bytes != bytes) {
6922                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6923                         bytes, entry->bytes, offset);
6924                 return -EINVAL;
6925         }
6926
6927         unlink_free_space(cache->free_space_ctl, entry);
6928         free(entry);
6929         return 0;
6930 }
6931
6932 static int verify_space_cache(struct btrfs_root *root,
6933                               struct btrfs_block_group_cache *cache)
6934 {
6935         struct btrfs_path path;
6936         struct extent_buffer *leaf;
6937         struct btrfs_key key;
6938         u64 last;
6939         int ret = 0;
6940
6941         root = root->fs_info->extent_root;
6942
6943         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6944
6945         btrfs_init_path(&path);
6946         key.objectid = last;
6947         key.offset = 0;
6948         key.type = BTRFS_EXTENT_ITEM_KEY;
6949         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6950         if (ret < 0)
6951                 goto out;
6952         ret = 0;
6953         while (1) {
6954                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6955                         ret = btrfs_next_leaf(root, &path);
6956                         if (ret < 0)
6957                                 goto out;
6958                         if (ret > 0) {
6959                                 ret = 0;
6960                                 break;
6961                         }
6962                 }
6963                 leaf = path.nodes[0];
6964                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6965                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6966                         break;
6967                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6968                     key.type != BTRFS_METADATA_ITEM_KEY) {
6969                         path.slots[0]++;
6970                         continue;
6971                 }
6972
6973                 if (last == key.objectid) {
6974                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6975                                 last = key.objectid + key.offset;
6976                         else
6977                                 last = key.objectid + root->nodesize;
6978                         path.slots[0]++;
6979                         continue;
6980                 }
6981
6982                 ret = check_cache_range(root, cache, last,
6983                                         key.objectid - last);
6984                 if (ret)
6985                         break;
6986                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6987                         last = key.objectid + key.offset;
6988                 else
6989                         last = key.objectid + root->nodesize;
6990                 path.slots[0]++;
6991         }
6992
6993         if (last < cache->key.objectid + cache->key.offset)
6994                 ret = check_cache_range(root, cache, last,
6995                                         cache->key.objectid +
6996                                         cache->key.offset - last);
6997
6998 out:
6999         btrfs_release_path(&path);
7000
7001         if (!ret &&
7002             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7003                 fprintf(stderr, "There are still entries left in the space "
7004                         "cache\n");
7005                 ret = -EINVAL;
7006         }
7007
7008         return ret;
7009 }
7010
7011 static int check_space_cache(struct btrfs_root *root)
7012 {
7013         struct btrfs_block_group_cache *cache;
7014         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7015         int ret;
7016         int error = 0;
7017
7018         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7019             btrfs_super_generation(root->fs_info->super_copy) !=
7020             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7021                 printf("cache and super generation don't match, space cache "
7022                        "will be invalidated\n");
7023                 return 0;
7024         }
7025
7026         if (ctx.progress_enabled) {
7027                 ctx.tp = TASK_FREE_SPACE;
7028                 task_start(ctx.info);
7029         }
7030
7031         while (1) {
7032                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7033                 if (!cache)
7034                         break;
7035
7036                 start = cache->key.objectid + cache->key.offset;
7037                 if (!cache->free_space_ctl) {
7038                         if (btrfs_init_free_space_ctl(cache,
7039                                                       root->sectorsize)) {
7040                                 ret = -ENOMEM;
7041                                 break;
7042                         }
7043                 } else {
7044                         btrfs_remove_free_space_cache(cache);
7045                 }
7046
7047                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7048                         ret = exclude_super_stripes(root, cache);
7049                         if (ret) {
7050                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7051                                         strerror(-ret));
7052                                 error++;
7053                                 continue;
7054                         }
7055                         ret = load_free_space_tree(root->fs_info, cache);
7056                         free_excluded_extents(root, cache);
7057                         if (ret < 0) {
7058                                 fprintf(stderr, "could not load free space tree: %s\n",
7059                                         strerror(-ret));
7060                                 error++;
7061                                 continue;
7062                         }
7063                         error += ret;
7064                 } else {
7065                         ret = load_free_space_cache(root->fs_info, cache);
7066                         if (!ret)
7067                                 continue;
7068                 }
7069
7070                 ret = verify_space_cache(root, cache);
7071                 if (ret) {
7072                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7073                                 cache->key.objectid);
7074                         error++;
7075                 }
7076         }
7077
7078         task_stop(ctx.info);
7079
7080         return error ? -EINVAL : 0;
7081 }
7082
7083 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7084                         u64 num_bytes, unsigned long leaf_offset,
7085                         struct extent_buffer *eb) {
7086
7087         u64 offset = 0;
7088         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7089         char *data;
7090         unsigned long csum_offset;
7091         u32 csum;
7092         u32 csum_expected;
7093         u64 read_len;
7094         u64 data_checked = 0;
7095         u64 tmp;
7096         int ret = 0;
7097         int mirror;
7098         int num_copies;
7099
7100         if (num_bytes % root->sectorsize)
7101                 return -EINVAL;
7102
7103         data = malloc(num_bytes);
7104         if (!data)
7105                 return -ENOMEM;
7106
7107         while (offset < num_bytes) {
7108                 mirror = 0;
7109 again:
7110                 read_len = num_bytes - offset;
7111                 /* read as much space once a time */
7112                 ret = read_extent_data(root, data + offset,
7113                                 bytenr + offset, &read_len, mirror);
7114                 if (ret)
7115                         goto out;
7116                 data_checked = 0;
7117                 /* verify every 4k data's checksum */
7118                 while (data_checked < read_len) {
7119                         csum = ~(u32)0;
7120                         tmp = offset + data_checked;
7121
7122                         csum = btrfs_csum_data((char *)data + tmp,
7123                                                csum, root->sectorsize);
7124                         btrfs_csum_final(csum, (u8 *)&csum);
7125
7126                         csum_offset = leaf_offset +
7127                                  tmp / root->sectorsize * csum_size;
7128                         read_extent_buffer(eb, (char *)&csum_expected,
7129                                            csum_offset, csum_size);
7130                         /* try another mirror */
7131                         if (csum != csum_expected) {
7132                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7133                                                 mirror, bytenr + tmp,
7134                                                 csum, csum_expected);
7135                                 num_copies = btrfs_num_copies(
7136                                                 &root->fs_info->mapping_tree,
7137                                                 bytenr, num_bytes);
7138                                 if (mirror < num_copies - 1) {
7139                                         mirror += 1;
7140                                         goto again;
7141                                 }
7142                         }
7143                         data_checked += root->sectorsize;
7144                 }
7145                 offset += read_len;
7146         }
7147 out:
7148         free(data);
7149         return ret;
7150 }
7151
7152 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7153                                u64 num_bytes)
7154 {
7155         struct btrfs_path path;
7156         struct extent_buffer *leaf;
7157         struct btrfs_key key;
7158         int ret;
7159
7160         btrfs_init_path(&path);
7161         key.objectid = bytenr;
7162         key.type = BTRFS_EXTENT_ITEM_KEY;
7163         key.offset = (u64)-1;
7164
7165 again:
7166         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7167                                 0, 0);
7168         if (ret < 0) {
7169                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7170                 btrfs_release_path(&path);
7171                 return ret;
7172         } else if (ret) {
7173                 if (path.slots[0] > 0) {
7174                         path.slots[0]--;
7175                 } else {
7176                         ret = btrfs_prev_leaf(root, &path);
7177                         if (ret < 0) {
7178                                 goto out;
7179                         } else if (ret > 0) {
7180                                 ret = 0;
7181                                 goto out;
7182                         }
7183                 }
7184         }
7185
7186         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7187
7188         /*
7189          * Block group items come before extent items if they have the same
7190          * bytenr, so walk back one more just in case.  Dear future traveller,
7191          * first congrats on mastering time travel.  Now if it's not too much
7192          * trouble could you go back to 2006 and tell Chris to make the
7193          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7194          * EXTENT_ITEM_KEY please?
7195          */
7196         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7197                 if (path.slots[0] > 0) {
7198                         path.slots[0]--;
7199                 } else {
7200                         ret = btrfs_prev_leaf(root, &path);
7201                         if (ret < 0) {
7202                                 goto out;
7203                         } else if (ret > 0) {
7204                                 ret = 0;
7205                                 goto out;
7206                         }
7207                 }
7208                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7209         }
7210
7211         while (num_bytes) {
7212                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7213                         ret = btrfs_next_leaf(root, &path);
7214                         if (ret < 0) {
7215                                 fprintf(stderr, "Error going to next leaf "
7216                                         "%d\n", ret);
7217                                 btrfs_release_path(&path);
7218                                 return ret;
7219                         } else if (ret) {
7220                                 break;
7221                         }
7222                 }
7223                 leaf = path.nodes[0];
7224                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7225                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7226                         path.slots[0]++;
7227                         continue;
7228                 }
7229                 if (key.objectid + key.offset < bytenr) {
7230                         path.slots[0]++;
7231                         continue;
7232                 }
7233                 if (key.objectid > bytenr + num_bytes)
7234                         break;
7235
7236                 if (key.objectid == bytenr) {
7237                         if (key.offset >= num_bytes) {
7238                                 num_bytes = 0;
7239                                 break;
7240                         }
7241                         num_bytes -= key.offset;
7242                         bytenr += key.offset;
7243                 } else if (key.objectid < bytenr) {
7244                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7245                                 num_bytes = 0;
7246                                 break;
7247                         }
7248                         num_bytes = (bytenr + num_bytes) -
7249                                 (key.objectid + key.offset);
7250                         bytenr = key.objectid + key.offset;
7251                 } else {
7252                         if (key.objectid + key.offset < bytenr + num_bytes) {
7253                                 u64 new_start = key.objectid + key.offset;
7254                                 u64 new_bytes = bytenr + num_bytes - new_start;
7255
7256                                 /*
7257                                  * Weird case, the extent is in the middle of
7258                                  * our range, we'll have to search one side
7259                                  * and then the other.  Not sure if this happens
7260                                  * in real life, but no harm in coding it up
7261                                  * anyway just in case.
7262                                  */
7263                                 btrfs_release_path(&path);
7264                                 ret = check_extent_exists(root, new_start,
7265                                                           new_bytes);
7266                                 if (ret) {
7267                                         fprintf(stderr, "Right section didn't "
7268                                                 "have a record\n");
7269                                         break;
7270                                 }
7271                                 num_bytes = key.objectid - bytenr;
7272                                 goto again;
7273                         }
7274                         num_bytes = key.objectid - bytenr;
7275                 }
7276                 path.slots[0]++;
7277         }
7278         ret = 0;
7279
7280 out:
7281         if (num_bytes && !ret) {
7282                 fprintf(stderr, "There are no extents for csum range "
7283                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7284                 ret = 1;
7285         }
7286
7287         btrfs_release_path(&path);
7288         return ret;
7289 }
7290
7291 static int check_csums(struct btrfs_root *root)
7292 {
7293         struct btrfs_path path;
7294         struct extent_buffer *leaf;
7295         struct btrfs_key key;
7296         u64 offset = 0, num_bytes = 0;
7297         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7298         int errors = 0;
7299         int ret;
7300         u64 data_len;
7301         unsigned long leaf_offset;
7302
7303         root = root->fs_info->csum_root;
7304         if (!extent_buffer_uptodate(root->node)) {
7305                 fprintf(stderr, "No valid csum tree found\n");
7306                 return -ENOENT;
7307         }
7308
7309         btrfs_init_path(&path);
7310         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7311         key.type = BTRFS_EXTENT_CSUM_KEY;
7312         key.offset = 0;
7313         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7314         if (ret < 0) {
7315                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7316                 btrfs_release_path(&path);
7317                 return ret;
7318         }
7319
7320         if (ret > 0 && path.slots[0])
7321                 path.slots[0]--;
7322         ret = 0;
7323
7324         while (1) {
7325                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7326                         ret = btrfs_next_leaf(root, &path);
7327                         if (ret < 0) {
7328                                 fprintf(stderr, "Error going to next leaf "
7329                                         "%d\n", ret);
7330                                 break;
7331                         }
7332                         if (ret)
7333                                 break;
7334                 }
7335                 leaf = path.nodes[0];
7336
7337                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7338                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7339                         path.slots[0]++;
7340                         continue;
7341                 }
7342
7343                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7344                               csum_size) * root->sectorsize;
7345                 if (!check_data_csum)
7346                         goto skip_csum_check;
7347                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7348                 ret = check_extent_csums(root, key.offset, data_len,
7349                                          leaf_offset, leaf);
7350                 if (ret)
7351                         break;
7352 skip_csum_check:
7353                 if (!num_bytes) {
7354                         offset = key.offset;
7355                 } else if (key.offset != offset + num_bytes) {
7356                         ret = check_extent_exists(root, offset, num_bytes);
7357                         if (ret) {
7358                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7359                                         "there is no extent record\n",
7360                                         offset, offset+num_bytes);
7361                                 errors++;
7362                         }
7363                         offset = key.offset;
7364                         num_bytes = 0;
7365                 }
7366                 num_bytes += data_len;
7367                 path.slots[0]++;
7368         }
7369
7370         btrfs_release_path(&path);
7371         return errors;
7372 }
7373
7374 static int is_dropped_key(struct btrfs_key *key,
7375                           struct btrfs_key *drop_key) {
7376         if (key->objectid < drop_key->objectid)
7377                 return 1;
7378         else if (key->objectid == drop_key->objectid) {
7379                 if (key->type < drop_key->type)
7380                         return 1;
7381                 else if (key->type == drop_key->type) {
7382                         if (key->offset < drop_key->offset)
7383                                 return 1;
7384                 }
7385         }
7386         return 0;
7387 }
7388
7389 /*
7390  * Here are the rules for FULL_BACKREF.
7391  *
7392  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7393  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7394  *      FULL_BACKREF set.
7395  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7396  *    if it happened after the relocation occurred since we'll have dropped the
7397  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7398  *    have no real way to know for sure.
7399  *
7400  * We process the blocks one root at a time, and we start from the lowest root
7401  * objectid and go to the highest.  So we can just lookup the owner backref for
7402  * the record and if we don't find it then we know it doesn't exist and we have
7403  * a FULL BACKREF.
7404  *
7405  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7406  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7407  * be set or not and then we can check later once we've gathered all the refs.
7408  */
7409 static int calc_extent_flag(struct btrfs_root *root,
7410                            struct cache_tree *extent_cache,
7411                            struct extent_buffer *buf,
7412                            struct root_item_record *ri,
7413                            u64 *flags)
7414 {
7415         struct extent_record *rec;
7416         struct cache_extent *cache;
7417         struct tree_backref *tback;
7418         u64 owner = 0;
7419
7420         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7421         /* we have added this extent before */
7422         if (!cache)
7423                 return -ENOENT;
7424
7425         rec = container_of(cache, struct extent_record, cache);
7426
7427         /*
7428          * Except file/reloc tree, we can not have
7429          * FULL BACKREF MODE
7430          */
7431         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7432                 goto normal;
7433         /*
7434          * root node
7435          */
7436         if (buf->start == ri->bytenr)
7437                 goto normal;
7438
7439         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7440                 goto full_backref;
7441
7442         owner = btrfs_header_owner(buf);
7443         if (owner == ri->objectid)
7444                 goto normal;
7445
7446         tback = find_tree_backref(rec, 0, owner);
7447         if (!tback)
7448                 goto full_backref;
7449 normal:
7450         *flags = 0;
7451         if (rec->flag_block_full_backref != FLAG_UNSET &&
7452             rec->flag_block_full_backref != 0)
7453                 rec->bad_full_backref = 1;
7454         return 0;
7455 full_backref:
7456         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7457         if (rec->flag_block_full_backref != FLAG_UNSET &&
7458             rec->flag_block_full_backref != 1)
7459                 rec->bad_full_backref = 1;
7460         return 0;
7461 }
7462
7463 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7464 {
7465         fprintf(stderr, "Invalid key type(");
7466         print_key_type(stderr, 0, key_type);
7467         fprintf(stderr, ") found in root(");
7468         print_objectid(stderr, rootid, 0);
7469         fprintf(stderr, ")\n");
7470 }
7471
7472 /*
7473  * Check if the key is valid with its extent buffer.
7474  *
7475  * This is a early check in case invalid key exists in a extent buffer
7476  * This is not comprehensive yet, but should prevent wrong key/item passed
7477  * further
7478  */
7479 static int check_type_with_root(u64 rootid, u8 key_type)
7480 {
7481         switch (key_type) {
7482         /* Only valid in chunk tree */
7483         case BTRFS_DEV_ITEM_KEY:
7484         case BTRFS_CHUNK_ITEM_KEY:
7485                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7486                         goto err;
7487                 break;
7488         /* valid in csum and log tree */
7489         case BTRFS_CSUM_TREE_OBJECTID:
7490                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7491                       is_fstree(rootid)))
7492                         goto err;
7493                 break;
7494         case BTRFS_EXTENT_ITEM_KEY:
7495         case BTRFS_METADATA_ITEM_KEY:
7496         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7497                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7498                         goto err;
7499                 break;
7500         case BTRFS_ROOT_ITEM_KEY:
7501                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7502                         goto err;
7503                 break;
7504         case BTRFS_DEV_EXTENT_KEY:
7505                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7506                         goto err;
7507                 break;
7508         }
7509         return 0;
7510 err:
7511         report_mismatch_key_root(key_type, rootid);
7512         return -EINVAL;
7513 }
7514
7515 static int run_next_block(struct btrfs_root *root,
7516                           struct block_info *bits,
7517                           int bits_nr,
7518                           u64 *last,
7519                           struct cache_tree *pending,
7520                           struct cache_tree *seen,
7521                           struct cache_tree *reada,
7522                           struct cache_tree *nodes,
7523                           struct cache_tree *extent_cache,
7524                           struct cache_tree *chunk_cache,
7525                           struct rb_root *dev_cache,
7526                           struct block_group_tree *block_group_cache,
7527                           struct device_extent_tree *dev_extent_cache,
7528                           struct root_item_record *ri)
7529 {
7530         struct extent_buffer *buf;
7531         struct extent_record *rec = NULL;
7532         u64 bytenr;
7533         u32 size;
7534         u64 parent;
7535         u64 owner;
7536         u64 flags;
7537         u64 ptr;
7538         u64 gen = 0;
7539         int ret = 0;
7540         int i;
7541         int nritems;
7542         struct btrfs_key key;
7543         struct cache_extent *cache;
7544         int reada_bits;
7545
7546         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7547                                     bits_nr, &reada_bits);
7548         if (nritems == 0)
7549                 return 1;
7550
7551         if (!reada_bits) {
7552                 for(i = 0; i < nritems; i++) {
7553                         ret = add_cache_extent(reada, bits[i].start,
7554                                                bits[i].size);
7555                         if (ret == -EEXIST)
7556                                 continue;
7557
7558                         /* fixme, get the parent transid */
7559                         readahead_tree_block(root, bits[i].start,
7560                                              bits[i].size, 0);
7561                 }
7562         }
7563         *last = bits[0].start;
7564         bytenr = bits[0].start;
7565         size = bits[0].size;
7566
7567         cache = lookup_cache_extent(pending, bytenr, size);
7568         if (cache) {
7569                 remove_cache_extent(pending, cache);
7570                 free(cache);
7571         }
7572         cache = lookup_cache_extent(reada, bytenr, size);
7573         if (cache) {
7574                 remove_cache_extent(reada, cache);
7575                 free(cache);
7576         }
7577         cache = lookup_cache_extent(nodes, bytenr, size);
7578         if (cache) {
7579                 remove_cache_extent(nodes, cache);
7580                 free(cache);
7581         }
7582         cache = lookup_cache_extent(extent_cache, bytenr, size);
7583         if (cache) {
7584                 rec = container_of(cache, struct extent_record, cache);
7585                 gen = rec->parent_generation;
7586         }
7587
7588         /* fixme, get the real parent transid */
7589         buf = read_tree_block(root, bytenr, size, gen);
7590         if (!extent_buffer_uptodate(buf)) {
7591                 record_bad_block_io(root->fs_info,
7592                                     extent_cache, bytenr, size);
7593                 goto out;
7594         }
7595
7596         nritems = btrfs_header_nritems(buf);
7597
7598         flags = 0;
7599         if (!init_extent_tree) {
7600                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7601                                        btrfs_header_level(buf), 1, NULL,
7602                                        &flags);
7603                 if (ret < 0) {
7604                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7605                         if (ret < 0) {
7606                                 fprintf(stderr, "Couldn't calc extent flags\n");
7607                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7608                         }
7609                 }
7610         } else {
7611                 flags = 0;
7612                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7613                 if (ret < 0) {
7614                         fprintf(stderr, "Couldn't calc extent flags\n");
7615                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7616                 }
7617         }
7618
7619         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7620                 if (ri != NULL &&
7621                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7622                     ri->objectid == btrfs_header_owner(buf)) {
7623                         /*
7624                          * Ok we got to this block from it's original owner and
7625                          * we have FULL_BACKREF set.  Relocation can leave
7626                          * converted blocks over so this is altogether possible,
7627                          * however it's not possible if the generation > the
7628                          * last snapshot, so check for this case.
7629                          */
7630                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7631                             btrfs_header_generation(buf) > ri->last_snapshot) {
7632                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7633                                 rec->bad_full_backref = 1;
7634                         }
7635                 }
7636         } else {
7637                 if (ri != NULL &&
7638                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7639                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7640                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7641                         rec->bad_full_backref = 1;
7642                 }
7643         }
7644
7645         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7646                 rec->flag_block_full_backref = 1;
7647                 parent = bytenr;
7648                 owner = 0;
7649         } else {
7650                 rec->flag_block_full_backref = 0;
7651                 parent = 0;
7652                 owner = btrfs_header_owner(buf);
7653         }
7654
7655         ret = check_block(root, extent_cache, buf, flags);
7656         if (ret)
7657                 goto out;
7658
7659         if (btrfs_is_leaf(buf)) {
7660                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7661                 for (i = 0; i < nritems; i++) {
7662                         struct btrfs_file_extent_item *fi;
7663                         btrfs_item_key_to_cpu(buf, &key, i);
7664                         /*
7665                          * Check key type against the leaf owner.
7666                          * Could filter quite a lot of early error if
7667                          * owner is correct
7668                          */
7669                         if (check_type_with_root(btrfs_header_owner(buf),
7670                                                  key.type)) {
7671                                 fprintf(stderr, "ignoring invalid key\n");
7672                                 continue;
7673                         }
7674                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7675                                 process_extent_item(root, extent_cache, buf,
7676                                                     i);
7677                                 continue;
7678                         }
7679                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7680                                 process_extent_item(root, extent_cache, buf,
7681                                                     i);
7682                                 continue;
7683                         }
7684                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7685                                 total_csum_bytes +=
7686                                         btrfs_item_size_nr(buf, i);
7687                                 continue;
7688                         }
7689                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7690                                 process_chunk_item(chunk_cache, &key, buf, i);
7691                                 continue;
7692                         }
7693                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7694                                 process_device_item(dev_cache, &key, buf, i);
7695                                 continue;
7696                         }
7697                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7698                                 process_block_group_item(block_group_cache,
7699                                         &key, buf, i);
7700                                 continue;
7701                         }
7702                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7703                                 process_device_extent_item(dev_extent_cache,
7704                                         &key, buf, i);
7705                                 continue;
7706
7707                         }
7708                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7709 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7710                                 process_extent_ref_v0(extent_cache, buf, i);
7711 #else
7712                                 BUG();
7713 #endif
7714                                 continue;
7715                         }
7716
7717                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7718                                 ret = add_tree_backref(extent_cache,
7719                                                 key.objectid, 0, key.offset, 0);
7720                                 if (ret < 0)
7721                                         error("add_tree_backref failed: %s",
7722                                               strerror(-ret));
7723                                 continue;
7724                         }
7725                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7726                                 ret = add_tree_backref(extent_cache,
7727                                                 key.objectid, key.offset, 0, 0);
7728                                 if (ret < 0)
7729                                         error("add_tree_backref failed: %s",
7730                                               strerror(-ret));
7731                                 continue;
7732                         }
7733                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7734                                 struct btrfs_extent_data_ref *ref;
7735                                 ref = btrfs_item_ptr(buf, i,
7736                                                 struct btrfs_extent_data_ref);
7737                                 add_data_backref(extent_cache,
7738                                         key.objectid, 0,
7739                                         btrfs_extent_data_ref_root(buf, ref),
7740                                         btrfs_extent_data_ref_objectid(buf,
7741                                                                        ref),
7742                                         btrfs_extent_data_ref_offset(buf, ref),
7743                                         btrfs_extent_data_ref_count(buf, ref),
7744                                         0, root->sectorsize);
7745                                 continue;
7746                         }
7747                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7748                                 struct btrfs_shared_data_ref *ref;
7749                                 ref = btrfs_item_ptr(buf, i,
7750                                                 struct btrfs_shared_data_ref);
7751                                 add_data_backref(extent_cache,
7752                                         key.objectid, key.offset, 0, 0, 0,
7753                                         btrfs_shared_data_ref_count(buf, ref),
7754                                         0, root->sectorsize);
7755                                 continue;
7756                         }
7757                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7758                                 struct bad_item *bad;
7759
7760                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7761                                         continue;
7762                                 if (!owner)
7763                                         continue;
7764                                 bad = malloc(sizeof(struct bad_item));
7765                                 if (!bad)
7766                                         continue;
7767                                 INIT_LIST_HEAD(&bad->list);
7768                                 memcpy(&bad->key, &key,
7769                                        sizeof(struct btrfs_key));
7770                                 bad->root_id = owner;
7771                                 list_add_tail(&bad->list, &delete_items);
7772                                 continue;
7773                         }
7774                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7775                                 continue;
7776                         fi = btrfs_item_ptr(buf, i,
7777                                             struct btrfs_file_extent_item);
7778                         if (btrfs_file_extent_type(buf, fi) ==
7779                             BTRFS_FILE_EXTENT_INLINE)
7780                                 continue;
7781                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7782                                 continue;
7783
7784                         data_bytes_allocated +=
7785                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7786                         if (data_bytes_allocated < root->sectorsize) {
7787                                 abort();
7788                         }
7789                         data_bytes_referenced +=
7790                                 btrfs_file_extent_num_bytes(buf, fi);
7791                         add_data_backref(extent_cache,
7792                                 btrfs_file_extent_disk_bytenr(buf, fi),
7793                                 parent, owner, key.objectid, key.offset -
7794                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7795                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7796                 }
7797         } else {
7798                 int level;
7799                 struct btrfs_key first_key;
7800
7801                 first_key.objectid = 0;
7802
7803                 if (nritems > 0)
7804                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7805                 level = btrfs_header_level(buf);
7806                 for (i = 0; i < nritems; i++) {
7807                         struct extent_record tmpl;
7808
7809                         ptr = btrfs_node_blockptr(buf, i);
7810                         size = root->nodesize;
7811                         btrfs_node_key_to_cpu(buf, &key, i);
7812                         if (ri != NULL) {
7813                                 if ((level == ri->drop_level)
7814                                     && is_dropped_key(&key, &ri->drop_key)) {
7815                                         continue;
7816                                 }
7817                         }
7818
7819                         memset(&tmpl, 0, sizeof(tmpl));
7820                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7821                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7822                         tmpl.start = ptr;
7823                         tmpl.nr = size;
7824                         tmpl.refs = 1;
7825                         tmpl.metadata = 1;
7826                         tmpl.max_size = size;
7827                         ret = add_extent_rec(extent_cache, &tmpl);
7828                         if (ret < 0)
7829                                 goto out;
7830
7831                         ret = add_tree_backref(extent_cache, ptr, parent,
7832                                         owner, 1);
7833                         if (ret < 0) {
7834                                 error("add_tree_backref failed: %s",
7835                                       strerror(-ret));
7836                                 continue;
7837                         }
7838
7839                         if (level > 1) {
7840                                 add_pending(nodes, seen, ptr, size);
7841                         } else {
7842                                 add_pending(pending, seen, ptr, size);
7843                         }
7844                 }
7845                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7846                                       nritems) * sizeof(struct btrfs_key_ptr);
7847         }
7848         total_btree_bytes += buf->len;
7849         if (fs_root_objectid(btrfs_header_owner(buf)))
7850                 total_fs_tree_bytes += buf->len;
7851         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7852                 total_extent_tree_bytes += buf->len;
7853         if (!found_old_backref &&
7854             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7855             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7856             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7857                 found_old_backref = 1;
7858 out:
7859         free_extent_buffer(buf);
7860         return ret;
7861 }
7862
7863 static int add_root_to_pending(struct extent_buffer *buf,
7864                                struct cache_tree *extent_cache,
7865                                struct cache_tree *pending,
7866                                struct cache_tree *seen,
7867                                struct cache_tree *nodes,
7868                                u64 objectid)
7869 {
7870         struct extent_record tmpl;
7871         int ret;
7872
7873         if (btrfs_header_level(buf) > 0)
7874                 add_pending(nodes, seen, buf->start, buf->len);
7875         else
7876                 add_pending(pending, seen, buf->start, buf->len);
7877
7878         memset(&tmpl, 0, sizeof(tmpl));
7879         tmpl.start = buf->start;
7880         tmpl.nr = buf->len;
7881         tmpl.is_root = 1;
7882         tmpl.refs = 1;
7883         tmpl.metadata = 1;
7884         tmpl.max_size = buf->len;
7885         add_extent_rec(extent_cache, &tmpl);
7886
7887         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7888             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7889                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7890                                 0, 1);
7891         else
7892                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7893                                 1);
7894         return ret;
7895 }
7896
7897 /* as we fix the tree, we might be deleting blocks that
7898  * we're tracking for repair.  This hook makes sure we
7899  * remove any backrefs for blocks as we are fixing them.
7900  */
7901 static int free_extent_hook(struct btrfs_trans_handle *trans,
7902                             struct btrfs_root *root,
7903                             u64 bytenr, u64 num_bytes, u64 parent,
7904                             u64 root_objectid, u64 owner, u64 offset,
7905                             int refs_to_drop)
7906 {
7907         struct extent_record *rec;
7908         struct cache_extent *cache;
7909         int is_data;
7910         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7911
7912         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7913         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7914         if (!cache)
7915                 return 0;
7916
7917         rec = container_of(cache, struct extent_record, cache);
7918         if (is_data) {
7919                 struct data_backref *back;
7920                 back = find_data_backref(rec, parent, root_objectid, owner,
7921                                          offset, 1, bytenr, num_bytes);
7922                 if (!back)
7923                         goto out;
7924                 if (back->node.found_ref) {
7925                         back->found_ref -= refs_to_drop;
7926                         if (rec->refs)
7927                                 rec->refs -= refs_to_drop;
7928                 }
7929                 if (back->node.found_extent_tree) {
7930                         back->num_refs -= refs_to_drop;
7931                         if (rec->extent_item_refs)
7932                                 rec->extent_item_refs -= refs_to_drop;
7933                 }
7934                 if (back->found_ref == 0)
7935                         back->node.found_ref = 0;
7936                 if (back->num_refs == 0)
7937                         back->node.found_extent_tree = 0;
7938
7939                 if (!back->node.found_extent_tree && back->node.found_ref) {
7940                         list_del(&back->node.list);
7941                         free(back);
7942                 }
7943         } else {
7944                 struct tree_backref *back;
7945                 back = find_tree_backref(rec, parent, root_objectid);
7946                 if (!back)
7947                         goto out;
7948                 if (back->node.found_ref) {
7949                         if (rec->refs)
7950                                 rec->refs--;
7951                         back->node.found_ref = 0;
7952                 }
7953                 if (back->node.found_extent_tree) {
7954                         if (rec->extent_item_refs)
7955                                 rec->extent_item_refs--;
7956                         back->node.found_extent_tree = 0;
7957                 }
7958                 if (!back->node.found_extent_tree && back->node.found_ref) {
7959                         list_del(&back->node.list);
7960                         free(back);
7961                 }
7962         }
7963         maybe_free_extent_rec(extent_cache, rec);
7964 out:
7965         return 0;
7966 }
7967
7968 static int delete_extent_records(struct btrfs_trans_handle *trans,
7969                                  struct btrfs_root *root,
7970                                  struct btrfs_path *path,
7971                                  u64 bytenr)
7972 {
7973         struct btrfs_key key;
7974         struct btrfs_key found_key;
7975         struct extent_buffer *leaf;
7976         int ret;
7977         int slot;
7978
7979
7980         key.objectid = bytenr;
7981         key.type = (u8)-1;
7982         key.offset = (u64)-1;
7983
7984         while(1) {
7985                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7986                                         &key, path, 0, 1);
7987                 if (ret < 0)
7988                         break;
7989
7990                 if (ret > 0) {
7991                         ret = 0;
7992                         if (path->slots[0] == 0)
7993                                 break;
7994                         path->slots[0]--;
7995                 }
7996                 ret = 0;
7997
7998                 leaf = path->nodes[0];
7999                 slot = path->slots[0];
8000
8001                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8002                 if (found_key.objectid != bytenr)
8003                         break;
8004
8005                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8006                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8007                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8008                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8009                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8010                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8011                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8012                         btrfs_release_path(path);
8013                         if (found_key.type == 0) {
8014                                 if (found_key.offset == 0)
8015                                         break;
8016                                 key.offset = found_key.offset - 1;
8017                                 key.type = found_key.type;
8018                         }
8019                         key.type = found_key.type - 1;
8020                         key.offset = (u64)-1;
8021                         continue;
8022                 }
8023
8024                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8025                         found_key.objectid, found_key.type, found_key.offset);
8026
8027                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8028                 if (ret)
8029                         break;
8030                 btrfs_release_path(path);
8031
8032                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8033                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8034                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8035                                 found_key.offset : root->nodesize;
8036
8037                         ret = btrfs_update_block_group(trans, root, bytenr,
8038                                                        bytes, 0, 0);
8039                         if (ret)
8040                                 break;
8041                 }
8042         }
8043
8044         btrfs_release_path(path);
8045         return ret;
8046 }
8047
8048 /*
8049  * for a single backref, this will allocate a new extent
8050  * and add the backref to it.
8051  */
8052 static int record_extent(struct btrfs_trans_handle *trans,
8053                          struct btrfs_fs_info *info,
8054                          struct btrfs_path *path,
8055                          struct extent_record *rec,
8056                          struct extent_backref *back,
8057                          int allocated, u64 flags)
8058 {
8059         int ret = 0;
8060         struct btrfs_root *extent_root = info->extent_root;
8061         struct extent_buffer *leaf;
8062         struct btrfs_key ins_key;
8063         struct btrfs_extent_item *ei;
8064         struct data_backref *dback;
8065         struct btrfs_tree_block_info *bi;
8066
8067         if (!back->is_data)
8068                 rec->max_size = max_t(u64, rec->max_size,
8069                                     info->extent_root->nodesize);
8070
8071         if (!allocated) {
8072                 u32 item_size = sizeof(*ei);
8073
8074                 if (!back->is_data)
8075                         item_size += sizeof(*bi);
8076
8077                 ins_key.objectid = rec->start;
8078                 ins_key.offset = rec->max_size;
8079                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8080
8081                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8082                                         &ins_key, item_size);
8083                 if (ret)
8084                         goto fail;
8085
8086                 leaf = path->nodes[0];
8087                 ei = btrfs_item_ptr(leaf, path->slots[0],
8088                                     struct btrfs_extent_item);
8089
8090                 btrfs_set_extent_refs(leaf, ei, 0);
8091                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8092
8093                 if (back->is_data) {
8094                         btrfs_set_extent_flags(leaf, ei,
8095                                                BTRFS_EXTENT_FLAG_DATA);
8096                 } else {
8097                         struct btrfs_disk_key copy_key;;
8098
8099                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8100                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8101                                              sizeof(*bi));
8102
8103                         btrfs_set_disk_key_objectid(&copy_key,
8104                                                     rec->info_objectid);
8105                         btrfs_set_disk_key_type(&copy_key, 0);
8106                         btrfs_set_disk_key_offset(&copy_key, 0);
8107
8108                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8109                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8110
8111                         btrfs_set_extent_flags(leaf, ei,
8112                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8113                 }
8114
8115                 btrfs_mark_buffer_dirty(leaf);
8116                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8117                                                rec->max_size, 1, 0);
8118                 if (ret)
8119                         goto fail;
8120                 btrfs_release_path(path);
8121         }
8122
8123         if (back->is_data) {
8124                 u64 parent;
8125                 int i;
8126
8127                 dback = to_data_backref(back);
8128                 if (back->full_backref)
8129                         parent = dback->parent;
8130                 else
8131                         parent = 0;
8132
8133                 for (i = 0; i < dback->found_ref; i++) {
8134                         /* if parent != 0, we're doing a full backref
8135                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8136                          * just makes the backref allocator create a data
8137                          * backref
8138                          */
8139                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8140                                                    rec->start, rec->max_size,
8141                                                    parent,
8142                                                    dback->root,
8143                                                    parent ?
8144                                                    BTRFS_FIRST_FREE_OBJECTID :
8145                                                    dback->owner,
8146                                                    dback->offset);
8147                         if (ret)
8148                                 break;
8149                 }
8150                 fprintf(stderr, "adding new data backref"
8151                                 " on %llu %s %llu owner %llu"
8152                                 " offset %llu found %d\n",
8153                                 (unsigned long long)rec->start,
8154                                 back->full_backref ?
8155                                 "parent" : "root",
8156                                 back->full_backref ?
8157                                 (unsigned long long)parent :
8158                                 (unsigned long long)dback->root,
8159                                 (unsigned long long)dback->owner,
8160                                 (unsigned long long)dback->offset,
8161                                 dback->found_ref);
8162         } else {
8163                 u64 parent;
8164                 struct tree_backref *tback;
8165
8166                 tback = to_tree_backref(back);
8167                 if (back->full_backref)
8168                         parent = tback->parent;
8169                 else
8170                         parent = 0;
8171
8172                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8173                                            rec->start, rec->max_size,
8174                                            parent, tback->root, 0, 0);
8175                 fprintf(stderr, "adding new tree backref on "
8176                         "start %llu len %llu parent %llu root %llu\n",
8177                         rec->start, rec->max_size, parent, tback->root);
8178         }
8179 fail:
8180         btrfs_release_path(path);
8181         return ret;
8182 }
8183
8184 static struct extent_entry *find_entry(struct list_head *entries,
8185                                        u64 bytenr, u64 bytes)
8186 {
8187         struct extent_entry *entry = NULL;
8188
8189         list_for_each_entry(entry, entries, list) {
8190                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8191                         return entry;
8192         }
8193
8194         return NULL;
8195 }
8196
8197 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8198 {
8199         struct extent_entry *entry, *best = NULL, *prev = NULL;
8200
8201         list_for_each_entry(entry, entries, list) {
8202                 /*
8203                  * If there are as many broken entries as entries then we know
8204                  * not to trust this particular entry.
8205                  */
8206                 if (entry->broken == entry->count)
8207                         continue;
8208
8209                 /*
8210                  * Special case, when there are only two entries and 'best' is
8211                  * the first one
8212                  */
8213                 if (!prev) {
8214                         best = entry;
8215                         prev = entry;
8216                         continue;
8217                 }
8218
8219                 /*
8220                  * If our current entry == best then we can't be sure our best
8221                  * is really the best, so we need to keep searching.
8222                  */
8223                 if (best && best->count == entry->count) {
8224                         prev = entry;
8225                         best = NULL;
8226                         continue;
8227                 }
8228
8229                 /* Prev == entry, not good enough, have to keep searching */
8230                 if (!prev->broken && prev->count == entry->count)
8231                         continue;
8232
8233                 if (!best)
8234                         best = (prev->count > entry->count) ? prev : entry;
8235                 else if (best->count < entry->count)
8236                         best = entry;
8237                 prev = entry;
8238         }
8239
8240         return best;
8241 }
8242
8243 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8244                       struct data_backref *dback, struct extent_entry *entry)
8245 {
8246         struct btrfs_trans_handle *trans;
8247         struct btrfs_root *root;
8248         struct btrfs_file_extent_item *fi;
8249         struct extent_buffer *leaf;
8250         struct btrfs_key key;
8251         u64 bytenr, bytes;
8252         int ret, err;
8253
8254         key.objectid = dback->root;
8255         key.type = BTRFS_ROOT_ITEM_KEY;
8256         key.offset = (u64)-1;
8257         root = btrfs_read_fs_root(info, &key);
8258         if (IS_ERR(root)) {
8259                 fprintf(stderr, "Couldn't find root for our ref\n");
8260                 return -EINVAL;
8261         }
8262
8263         /*
8264          * The backref points to the original offset of the extent if it was
8265          * split, so we need to search down to the offset we have and then walk
8266          * forward until we find the backref we're looking for.
8267          */
8268         key.objectid = dback->owner;
8269         key.type = BTRFS_EXTENT_DATA_KEY;
8270         key.offset = dback->offset;
8271         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8272         if (ret < 0) {
8273                 fprintf(stderr, "Error looking up ref %d\n", ret);
8274                 return ret;
8275         }
8276
8277         while (1) {
8278                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8279                         ret = btrfs_next_leaf(root, path);
8280                         if (ret) {
8281                                 fprintf(stderr, "Couldn't find our ref, next\n");
8282                                 return -EINVAL;
8283                         }
8284                 }
8285                 leaf = path->nodes[0];
8286                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8287                 if (key.objectid != dback->owner ||
8288                     key.type != BTRFS_EXTENT_DATA_KEY) {
8289                         fprintf(stderr, "Couldn't find our ref, search\n");
8290                         return -EINVAL;
8291                 }
8292                 fi = btrfs_item_ptr(leaf, path->slots[0],
8293                                     struct btrfs_file_extent_item);
8294                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8295                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8296
8297                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8298                         break;
8299                 path->slots[0]++;
8300         }
8301
8302         btrfs_release_path(path);
8303
8304         trans = btrfs_start_transaction(root, 1);
8305         if (IS_ERR(trans))
8306                 return PTR_ERR(trans);
8307
8308         /*
8309          * Ok we have the key of the file extent we want to fix, now we can cow
8310          * down to the thing and fix it.
8311          */
8312         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8313         if (ret < 0) {
8314                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8315                         key.objectid, key.type, key.offset, ret);
8316                 goto out;
8317         }
8318         if (ret > 0) {
8319                 fprintf(stderr, "Well that's odd, we just found this key "
8320                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8321                         key.offset);
8322                 ret = -EINVAL;
8323                 goto out;
8324         }
8325         leaf = path->nodes[0];
8326         fi = btrfs_item_ptr(leaf, path->slots[0],
8327                             struct btrfs_file_extent_item);
8328
8329         if (btrfs_file_extent_compression(leaf, fi) &&
8330             dback->disk_bytenr != entry->bytenr) {
8331                 fprintf(stderr, "Ref doesn't match the record start and is "
8332                         "compressed, please take a btrfs-image of this file "
8333                         "system and send it to a btrfs developer so they can "
8334                         "complete this functionality for bytenr %Lu\n",
8335                         dback->disk_bytenr);
8336                 ret = -EINVAL;
8337                 goto out;
8338         }
8339
8340         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8341                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8342         } else if (dback->disk_bytenr > entry->bytenr) {
8343                 u64 off_diff, offset;
8344
8345                 off_diff = dback->disk_bytenr - entry->bytenr;
8346                 offset = btrfs_file_extent_offset(leaf, fi);
8347                 if (dback->disk_bytenr + offset +
8348                     btrfs_file_extent_num_bytes(leaf, fi) >
8349                     entry->bytenr + entry->bytes) {
8350                         fprintf(stderr, "Ref is past the entry end, please "
8351                                 "take a btrfs-image of this file system and "
8352                                 "send it to a btrfs developer, ref %Lu\n",
8353                                 dback->disk_bytenr);
8354                         ret = -EINVAL;
8355                         goto out;
8356                 }
8357                 offset += off_diff;
8358                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8359                 btrfs_set_file_extent_offset(leaf, fi, offset);
8360         } else if (dback->disk_bytenr < entry->bytenr) {
8361                 u64 offset;
8362
8363                 offset = btrfs_file_extent_offset(leaf, fi);
8364                 if (dback->disk_bytenr + offset < entry->bytenr) {
8365                         fprintf(stderr, "Ref is before the entry start, please"
8366                                 " take a btrfs-image of this file system and "
8367                                 "send it to a btrfs developer, ref %Lu\n",
8368                                 dback->disk_bytenr);
8369                         ret = -EINVAL;
8370                         goto out;
8371                 }
8372
8373                 offset += dback->disk_bytenr;
8374                 offset -= entry->bytenr;
8375                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8376                 btrfs_set_file_extent_offset(leaf, fi, offset);
8377         }
8378
8379         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8380
8381         /*
8382          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8383          * only do this if we aren't using compression, otherwise it's a
8384          * trickier case.
8385          */
8386         if (!btrfs_file_extent_compression(leaf, fi))
8387                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8388         else
8389                 printf("ram bytes may be wrong?\n");
8390         btrfs_mark_buffer_dirty(leaf);
8391 out:
8392         err = btrfs_commit_transaction(trans, root);
8393         btrfs_release_path(path);
8394         return ret ? ret : err;
8395 }
8396
8397 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8398                            struct extent_record *rec)
8399 {
8400         struct extent_backref *back;
8401         struct data_backref *dback;
8402         struct extent_entry *entry, *best = NULL;
8403         LIST_HEAD(entries);
8404         int nr_entries = 0;
8405         int broken_entries = 0;
8406         int ret = 0;
8407         short mismatch = 0;
8408
8409         /*
8410          * Metadata is easy and the backrefs should always agree on bytenr and
8411          * size, if not we've got bigger issues.
8412          */
8413         if (rec->metadata)
8414                 return 0;
8415
8416         list_for_each_entry(back, &rec->backrefs, list) {
8417                 if (back->full_backref || !back->is_data)
8418                         continue;
8419
8420                 dback = to_data_backref(back);
8421
8422                 /*
8423                  * We only pay attention to backrefs that we found a real
8424                  * backref for.
8425                  */
8426                 if (dback->found_ref == 0)
8427                         continue;
8428
8429                 /*
8430                  * For now we only catch when the bytes don't match, not the
8431                  * bytenr.  We can easily do this at the same time, but I want
8432                  * to have a fs image to test on before we just add repair
8433                  * functionality willy-nilly so we know we won't screw up the
8434                  * repair.
8435                  */
8436
8437                 entry = find_entry(&entries, dback->disk_bytenr,
8438                                    dback->bytes);
8439                 if (!entry) {
8440                         entry = malloc(sizeof(struct extent_entry));
8441                         if (!entry) {
8442                                 ret = -ENOMEM;
8443                                 goto out;
8444                         }
8445                         memset(entry, 0, sizeof(*entry));
8446                         entry->bytenr = dback->disk_bytenr;
8447                         entry->bytes = dback->bytes;
8448                         list_add_tail(&entry->list, &entries);
8449                         nr_entries++;
8450                 }
8451
8452                 /*
8453                  * If we only have on entry we may think the entries agree when
8454                  * in reality they don't so we have to do some extra checking.
8455                  */
8456                 if (dback->disk_bytenr != rec->start ||
8457                     dback->bytes != rec->nr || back->broken)
8458                         mismatch = 1;
8459
8460                 if (back->broken) {
8461                         entry->broken++;
8462                         broken_entries++;
8463                 }
8464
8465                 entry->count++;
8466         }
8467
8468         /* Yay all the backrefs agree, carry on good sir */
8469         if (nr_entries <= 1 && !mismatch)
8470                 goto out;
8471
8472         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8473                 "%Lu\n", rec->start);
8474
8475         /*
8476          * First we want to see if the backrefs can agree amongst themselves who
8477          * is right, so figure out which one of the entries has the highest
8478          * count.
8479          */
8480         best = find_most_right_entry(&entries);
8481
8482         /*
8483          * Ok so we may have an even split between what the backrefs think, so
8484          * this is where we use the extent ref to see what it thinks.
8485          */
8486         if (!best) {
8487                 entry = find_entry(&entries, rec->start, rec->nr);
8488                 if (!entry && (!broken_entries || !rec->found_rec)) {
8489                         fprintf(stderr, "Backrefs don't agree with each other "
8490                                 "and extent record doesn't agree with anybody,"
8491                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8492                                 rec->start, rec->nr);
8493                         ret = -EINVAL;
8494                         goto out;
8495                 } else if (!entry) {
8496                         /*
8497                          * Ok our backrefs were broken, we'll assume this is the
8498                          * correct value and add an entry for this range.
8499                          */
8500                         entry = malloc(sizeof(struct extent_entry));
8501                         if (!entry) {
8502                                 ret = -ENOMEM;
8503                                 goto out;
8504                         }
8505                         memset(entry, 0, sizeof(*entry));
8506                         entry->bytenr = rec->start;
8507                         entry->bytes = rec->nr;
8508                         list_add_tail(&entry->list, &entries);
8509                         nr_entries++;
8510                 }
8511                 entry->count++;
8512                 best = find_most_right_entry(&entries);
8513                 if (!best) {
8514                         fprintf(stderr, "Backrefs and extent record evenly "
8515                                 "split on who is right, this is going to "
8516                                 "require user input to fix bytenr %Lu bytes "
8517                                 "%Lu\n", rec->start, rec->nr);
8518                         ret = -EINVAL;
8519                         goto out;
8520                 }
8521         }
8522
8523         /*
8524          * I don't think this can happen currently as we'll abort() if we catch
8525          * this case higher up, but in case somebody removes that we still can't
8526          * deal with it properly here yet, so just bail out of that's the case.
8527          */
8528         if (best->bytenr != rec->start) {
8529                 fprintf(stderr, "Extent start and backref starts don't match, "
8530                         "please use btrfs-image on this file system and send "
8531                         "it to a btrfs developer so they can make fsck fix "
8532                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8533                         rec->start, rec->nr);
8534                 ret = -EINVAL;
8535                 goto out;
8536         }
8537
8538         /*
8539          * Ok great we all agreed on an extent record, let's go find the real
8540          * references and fix up the ones that don't match.
8541          */
8542         list_for_each_entry(back, &rec->backrefs, list) {
8543                 if (back->full_backref || !back->is_data)
8544                         continue;
8545
8546                 dback = to_data_backref(back);
8547
8548                 /*
8549                  * Still ignoring backrefs that don't have a real ref attached
8550                  * to them.
8551                  */
8552                 if (dback->found_ref == 0)
8553                         continue;
8554
8555                 if (dback->bytes == best->bytes &&
8556                     dback->disk_bytenr == best->bytenr)
8557                         continue;
8558
8559                 ret = repair_ref(info, path, dback, best);
8560                 if (ret)
8561                         goto out;
8562         }
8563
8564         /*
8565          * Ok we messed with the actual refs, which means we need to drop our
8566          * entire cache and go back and rescan.  I know this is a huge pain and
8567          * adds a lot of extra work, but it's the only way to be safe.  Once all
8568          * the backrefs agree we may not need to do anything to the extent
8569          * record itself.
8570          */
8571         ret = -EAGAIN;
8572 out:
8573         while (!list_empty(&entries)) {
8574                 entry = list_entry(entries.next, struct extent_entry, list);
8575                 list_del_init(&entry->list);
8576                 free(entry);
8577         }
8578         return ret;
8579 }
8580
8581 static int process_duplicates(struct btrfs_root *root,
8582                               struct cache_tree *extent_cache,
8583                               struct extent_record *rec)
8584 {
8585         struct extent_record *good, *tmp;
8586         struct cache_extent *cache;
8587         int ret;
8588
8589         /*
8590          * If we found a extent record for this extent then return, or if we
8591          * have more than one duplicate we are likely going to need to delete
8592          * something.
8593          */
8594         if (rec->found_rec || rec->num_duplicates > 1)
8595                 return 0;
8596
8597         /* Shouldn't happen but just in case */
8598         BUG_ON(!rec->num_duplicates);
8599
8600         /*
8601          * So this happens if we end up with a backref that doesn't match the
8602          * actual extent entry.  So either the backref is bad or the extent
8603          * entry is bad.  Either way we want to have the extent_record actually
8604          * reflect what we found in the extent_tree, so we need to take the
8605          * duplicate out and use that as the extent_record since the only way we
8606          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8607          */
8608         remove_cache_extent(extent_cache, &rec->cache);
8609
8610         good = to_extent_record(rec->dups.next);
8611         list_del_init(&good->list);
8612         INIT_LIST_HEAD(&good->backrefs);
8613         INIT_LIST_HEAD(&good->dups);
8614         good->cache.start = good->start;
8615         good->cache.size = good->nr;
8616         good->content_checked = 0;
8617         good->owner_ref_checked = 0;
8618         good->num_duplicates = 0;
8619         good->refs = rec->refs;
8620         list_splice_init(&rec->backrefs, &good->backrefs);
8621         while (1) {
8622                 cache = lookup_cache_extent(extent_cache, good->start,
8623                                             good->nr);
8624                 if (!cache)
8625                         break;
8626                 tmp = container_of(cache, struct extent_record, cache);
8627
8628                 /*
8629                  * If we find another overlapping extent and it's found_rec is
8630                  * set then it's a duplicate and we need to try and delete
8631                  * something.
8632                  */
8633                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8634                         if (list_empty(&good->list))
8635                                 list_add_tail(&good->list,
8636                                               &duplicate_extents);
8637                         good->num_duplicates += tmp->num_duplicates + 1;
8638                         list_splice_init(&tmp->dups, &good->dups);
8639                         list_del_init(&tmp->list);
8640                         list_add_tail(&tmp->list, &good->dups);
8641                         remove_cache_extent(extent_cache, &tmp->cache);
8642                         continue;
8643                 }
8644
8645                 /*
8646                  * Ok we have another non extent item backed extent rec, so lets
8647                  * just add it to this extent and carry on like we did above.
8648                  */
8649                 good->refs += tmp->refs;
8650                 list_splice_init(&tmp->backrefs, &good->backrefs);
8651                 remove_cache_extent(extent_cache, &tmp->cache);
8652                 free(tmp);
8653         }
8654         ret = insert_cache_extent(extent_cache, &good->cache);
8655         BUG_ON(ret);
8656         free(rec);
8657         return good->num_duplicates ? 0 : 1;
8658 }
8659
8660 static int delete_duplicate_records(struct btrfs_root *root,
8661                                     struct extent_record *rec)
8662 {
8663         struct btrfs_trans_handle *trans;
8664         LIST_HEAD(delete_list);
8665         struct btrfs_path path;
8666         struct extent_record *tmp, *good, *n;
8667         int nr_del = 0;
8668         int ret = 0, err;
8669         struct btrfs_key key;
8670
8671         btrfs_init_path(&path);
8672
8673         good = rec;
8674         /* Find the record that covers all of the duplicates. */
8675         list_for_each_entry(tmp, &rec->dups, list) {
8676                 if (good->start < tmp->start)
8677                         continue;
8678                 if (good->nr > tmp->nr)
8679                         continue;
8680
8681                 if (tmp->start + tmp->nr < good->start + good->nr) {
8682                         fprintf(stderr, "Ok we have overlapping extents that "
8683                                 "aren't completely covered by each other, this "
8684                                 "is going to require more careful thought.  "
8685                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8686                                 tmp->start, tmp->nr, good->start, good->nr);
8687                         abort();
8688                 }
8689                 good = tmp;
8690         }
8691
8692         if (good != rec)
8693                 list_add_tail(&rec->list, &delete_list);
8694
8695         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8696                 if (tmp == good)
8697                         continue;
8698                 list_move_tail(&tmp->list, &delete_list);
8699         }
8700
8701         root = root->fs_info->extent_root;
8702         trans = btrfs_start_transaction(root, 1);
8703         if (IS_ERR(trans)) {
8704                 ret = PTR_ERR(trans);
8705                 goto out;
8706         }
8707
8708         list_for_each_entry(tmp, &delete_list, list) {
8709                 if (tmp->found_rec == 0)
8710                         continue;
8711                 key.objectid = tmp->start;
8712                 key.type = BTRFS_EXTENT_ITEM_KEY;
8713                 key.offset = tmp->nr;
8714
8715                 /* Shouldn't happen but just in case */
8716                 if (tmp->metadata) {
8717                         fprintf(stderr, "Well this shouldn't happen, extent "
8718                                 "record overlaps but is metadata? "
8719                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8720                         abort();
8721                 }
8722
8723                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8724                 if (ret) {
8725                         if (ret > 0)
8726                                 ret = -EINVAL;
8727                         break;
8728                 }
8729                 ret = btrfs_del_item(trans, root, &path);
8730                 if (ret)
8731                         break;
8732                 btrfs_release_path(&path);
8733                 nr_del++;
8734         }
8735         err = btrfs_commit_transaction(trans, root);
8736         if (err && !ret)
8737                 ret = err;
8738 out:
8739         while (!list_empty(&delete_list)) {
8740                 tmp = to_extent_record(delete_list.next);
8741                 list_del_init(&tmp->list);
8742                 if (tmp == rec)
8743                         continue;
8744                 free(tmp);
8745         }
8746
8747         while (!list_empty(&rec->dups)) {
8748                 tmp = to_extent_record(rec->dups.next);
8749                 list_del_init(&tmp->list);
8750                 free(tmp);
8751         }
8752
8753         btrfs_release_path(&path);
8754
8755         if (!ret && !nr_del)
8756                 rec->num_duplicates = 0;
8757
8758         return ret ? ret : nr_del;
8759 }
8760
8761 static int find_possible_backrefs(struct btrfs_fs_info *info,
8762                                   struct btrfs_path *path,
8763                                   struct cache_tree *extent_cache,
8764                                   struct extent_record *rec)
8765 {
8766         struct btrfs_root *root;
8767         struct extent_backref *back;
8768         struct data_backref *dback;
8769         struct cache_extent *cache;
8770         struct btrfs_file_extent_item *fi;
8771         struct btrfs_key key;
8772         u64 bytenr, bytes;
8773         int ret;
8774
8775         list_for_each_entry(back, &rec->backrefs, list) {
8776                 /* Don't care about full backrefs (poor unloved backrefs) */
8777                 if (back->full_backref || !back->is_data)
8778                         continue;
8779
8780                 dback = to_data_backref(back);
8781
8782                 /* We found this one, we don't need to do a lookup */
8783                 if (dback->found_ref)
8784                         continue;
8785
8786                 key.objectid = dback->root;
8787                 key.type = BTRFS_ROOT_ITEM_KEY;
8788                 key.offset = (u64)-1;
8789
8790                 root = btrfs_read_fs_root(info, &key);
8791
8792                 /* No root, definitely a bad ref, skip */
8793                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8794                         continue;
8795                 /* Other err, exit */
8796                 if (IS_ERR(root))
8797                         return PTR_ERR(root);
8798
8799                 key.objectid = dback->owner;
8800                 key.type = BTRFS_EXTENT_DATA_KEY;
8801                 key.offset = dback->offset;
8802                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8803                 if (ret) {
8804                         btrfs_release_path(path);
8805                         if (ret < 0)
8806                                 return ret;
8807                         /* Didn't find it, we can carry on */
8808                         ret = 0;
8809                         continue;
8810                 }
8811
8812                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8813                                     struct btrfs_file_extent_item);
8814                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8815                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8816                 btrfs_release_path(path);
8817                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8818                 if (cache) {
8819                         struct extent_record *tmp;
8820                         tmp = container_of(cache, struct extent_record, cache);
8821
8822                         /*
8823                          * If we found an extent record for the bytenr for this
8824                          * particular backref then we can't add it to our
8825                          * current extent record.  We only want to add backrefs
8826                          * that don't have a corresponding extent item in the
8827                          * extent tree since they likely belong to this record
8828                          * and we need to fix it if it doesn't match bytenrs.
8829                          */
8830                         if  (tmp->found_rec)
8831                                 continue;
8832                 }
8833
8834                 dback->found_ref += 1;
8835                 dback->disk_bytenr = bytenr;
8836                 dback->bytes = bytes;
8837
8838                 /*
8839                  * Set this so the verify backref code knows not to trust the
8840                  * values in this backref.
8841                  */
8842                 back->broken = 1;
8843         }
8844
8845         return 0;
8846 }
8847
8848 /*
8849  * Record orphan data ref into corresponding root.
8850  *
8851  * Return 0 if the extent item contains data ref and recorded.
8852  * Return 1 if the extent item contains no useful data ref
8853  *   On that case, it may contains only shared_dataref or metadata backref
8854  *   or the file extent exists(this should be handled by the extent bytenr
8855  *   recovery routine)
8856  * Return <0 if something goes wrong.
8857  */
8858 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8859                                       struct extent_record *rec)
8860 {
8861         struct btrfs_key key;
8862         struct btrfs_root *dest_root;
8863         struct extent_backref *back;
8864         struct data_backref *dback;
8865         struct orphan_data_extent *orphan;
8866         struct btrfs_path path;
8867         int recorded_data_ref = 0;
8868         int ret = 0;
8869
8870         if (rec->metadata)
8871                 return 1;
8872         btrfs_init_path(&path);
8873         list_for_each_entry(back, &rec->backrefs, list) {
8874                 if (back->full_backref || !back->is_data ||
8875                     !back->found_extent_tree)
8876                         continue;
8877                 dback = to_data_backref(back);
8878                 if (dback->found_ref)
8879                         continue;
8880                 key.objectid = dback->root;
8881                 key.type = BTRFS_ROOT_ITEM_KEY;
8882                 key.offset = (u64)-1;
8883
8884                 dest_root = btrfs_read_fs_root(fs_info, &key);
8885
8886                 /* For non-exist root we just skip it */
8887                 if (IS_ERR(dest_root) || !dest_root)
8888                         continue;
8889
8890                 key.objectid = dback->owner;
8891                 key.type = BTRFS_EXTENT_DATA_KEY;
8892                 key.offset = dback->offset;
8893
8894                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8895                 btrfs_release_path(&path);
8896                 /*
8897                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8898                  * we need to record it for inode/file extent rebuild.
8899                  * For ret > 0, we record it only for file extent rebuild.
8900                  * For ret == 0, the file extent exists but only bytenr
8901                  * mismatch, let the original bytenr fix routine to handle,
8902                  * don't record it.
8903                  */
8904                 if (ret == 0)
8905                         continue;
8906                 ret = 0;
8907                 orphan = malloc(sizeof(*orphan));
8908                 if (!orphan) {
8909                         ret = -ENOMEM;
8910                         goto out;
8911                 }
8912                 INIT_LIST_HEAD(&orphan->list);
8913                 orphan->root = dback->root;
8914                 orphan->objectid = dback->owner;
8915                 orphan->offset = dback->offset;
8916                 orphan->disk_bytenr = rec->cache.start;
8917                 orphan->disk_len = rec->cache.size;
8918                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8919                 recorded_data_ref = 1;
8920         }
8921 out:
8922         btrfs_release_path(&path);
8923         if (!ret)
8924                 return !recorded_data_ref;
8925         else
8926                 return ret;
8927 }
8928
8929 /*
8930  * when an incorrect extent item is found, this will delete
8931  * all of the existing entries for it and recreate them
8932  * based on what the tree scan found.
8933  */
8934 static int fixup_extent_refs(struct btrfs_fs_info *info,
8935                              struct cache_tree *extent_cache,
8936                              struct extent_record *rec)
8937 {
8938         struct btrfs_trans_handle *trans = NULL;
8939         int ret;
8940         struct btrfs_path path;
8941         struct list_head *cur = rec->backrefs.next;
8942         struct cache_extent *cache;
8943         struct extent_backref *back;
8944         int allocated = 0;
8945         u64 flags = 0;
8946
8947         if (rec->flag_block_full_backref)
8948                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8949
8950         btrfs_init_path(&path);
8951         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8952                 /*
8953                  * Sometimes the backrefs themselves are so broken they don't
8954                  * get attached to any meaningful rec, so first go back and
8955                  * check any of our backrefs that we couldn't find and throw
8956                  * them into the list if we find the backref so that
8957                  * verify_backrefs can figure out what to do.
8958                  */
8959                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8960                 if (ret < 0)
8961                         goto out;
8962         }
8963
8964         /* step one, make sure all of the backrefs agree */
8965         ret = verify_backrefs(info, &path, rec);
8966         if (ret < 0)
8967                 goto out;
8968
8969         trans = btrfs_start_transaction(info->extent_root, 1);
8970         if (IS_ERR(trans)) {
8971                 ret = PTR_ERR(trans);
8972                 goto out;
8973         }
8974
8975         /* step two, delete all the existing records */
8976         ret = delete_extent_records(trans, info->extent_root, &path,
8977                                     rec->start);
8978
8979         if (ret < 0)
8980                 goto out;
8981
8982         /* was this block corrupt?  If so, don't add references to it */
8983         cache = lookup_cache_extent(info->corrupt_blocks,
8984                                     rec->start, rec->max_size);
8985         if (cache) {
8986                 ret = 0;
8987                 goto out;
8988         }
8989
8990         /* step three, recreate all the refs we did find */
8991         while(cur != &rec->backrefs) {
8992                 back = to_extent_backref(cur);
8993                 cur = cur->next;
8994
8995                 /*
8996                  * if we didn't find any references, don't create a
8997                  * new extent record
8998                  */
8999                 if (!back->found_ref)
9000                         continue;
9001
9002                 rec->bad_full_backref = 0;
9003                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9004                 allocated = 1;
9005
9006                 if (ret)
9007                         goto out;
9008         }
9009 out:
9010         if (trans) {
9011                 int err = btrfs_commit_transaction(trans, info->extent_root);
9012                 if (!ret)
9013                         ret = err;
9014         }
9015
9016         if (!ret)
9017                 fprintf(stderr, "Repaired extent references for %llu\n",
9018                                 (unsigned long long)rec->start);
9019
9020         btrfs_release_path(&path);
9021         return ret;
9022 }
9023
9024 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9025                               struct extent_record *rec)
9026 {
9027         struct btrfs_trans_handle *trans;
9028         struct btrfs_root *root = fs_info->extent_root;
9029         struct btrfs_path path;
9030         struct btrfs_extent_item *ei;
9031         struct btrfs_key key;
9032         u64 flags;
9033         int ret = 0;
9034
9035         key.objectid = rec->start;
9036         if (rec->metadata) {
9037                 key.type = BTRFS_METADATA_ITEM_KEY;
9038                 key.offset = rec->info_level;
9039         } else {
9040                 key.type = BTRFS_EXTENT_ITEM_KEY;
9041                 key.offset = rec->max_size;
9042         }
9043
9044         trans = btrfs_start_transaction(root, 0);
9045         if (IS_ERR(trans))
9046                 return PTR_ERR(trans);
9047
9048         btrfs_init_path(&path);
9049         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9050         if (ret < 0) {
9051                 btrfs_release_path(&path);
9052                 btrfs_commit_transaction(trans, root);
9053                 return ret;
9054         } else if (ret) {
9055                 fprintf(stderr, "Didn't find extent for %llu\n",
9056                         (unsigned long long)rec->start);
9057                 btrfs_release_path(&path);
9058                 btrfs_commit_transaction(trans, root);
9059                 return -ENOENT;
9060         }
9061
9062         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9063                             struct btrfs_extent_item);
9064         flags = btrfs_extent_flags(path.nodes[0], ei);
9065         if (rec->flag_block_full_backref) {
9066                 fprintf(stderr, "setting full backref on %llu\n",
9067                         (unsigned long long)key.objectid);
9068                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9069         } else {
9070                 fprintf(stderr, "clearing full backref on %llu\n",
9071                         (unsigned long long)key.objectid);
9072                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9073         }
9074         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9075         btrfs_mark_buffer_dirty(path.nodes[0]);
9076         btrfs_release_path(&path);
9077         ret = btrfs_commit_transaction(trans, root);
9078         if (!ret)
9079                 fprintf(stderr, "Repaired extent flags for %llu\n",
9080                                 (unsigned long long)rec->start);
9081
9082         return ret;
9083 }
9084
9085 /* right now we only prune from the extent allocation tree */
9086 static int prune_one_block(struct btrfs_trans_handle *trans,
9087                            struct btrfs_fs_info *info,
9088                            struct btrfs_corrupt_block *corrupt)
9089 {
9090         int ret;
9091         struct btrfs_path path;
9092         struct extent_buffer *eb;
9093         u64 found;
9094         int slot;
9095         int nritems;
9096         int level = corrupt->level + 1;
9097
9098         btrfs_init_path(&path);
9099 again:
9100         /* we want to stop at the parent to our busted block */
9101         path.lowest_level = level;
9102
9103         ret = btrfs_search_slot(trans, info->extent_root,
9104                                 &corrupt->key, &path, -1, 1);
9105
9106         if (ret < 0)
9107                 goto out;
9108
9109         eb = path.nodes[level];
9110         if (!eb) {
9111                 ret = -ENOENT;
9112                 goto out;
9113         }
9114
9115         /*
9116          * hopefully the search gave us the block we want to prune,
9117          * lets try that first
9118          */
9119         slot = path.slots[level];
9120         found =  btrfs_node_blockptr(eb, slot);
9121         if (found == corrupt->cache.start)
9122                 goto del_ptr;
9123
9124         nritems = btrfs_header_nritems(eb);
9125
9126         /* the search failed, lets scan this node and hope we find it */
9127         for (slot = 0; slot < nritems; slot++) {
9128                 found =  btrfs_node_blockptr(eb, slot);
9129                 if (found == corrupt->cache.start)
9130                         goto del_ptr;
9131         }
9132         /*
9133          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9134          * to this block
9135          */
9136         if (eb == info->extent_root->node) {
9137                 ret = -ENOENT;
9138                 goto out;
9139         } else {
9140                 level++;
9141                 btrfs_release_path(&path);
9142                 goto again;
9143         }
9144
9145 del_ptr:
9146         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9147         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9148
9149 out:
9150         btrfs_release_path(&path);
9151         return ret;
9152 }
9153
9154 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9155 {
9156         struct btrfs_trans_handle *trans = NULL;
9157         struct cache_extent *cache;
9158         struct btrfs_corrupt_block *corrupt;
9159
9160         while (1) {
9161                 cache = search_cache_extent(info->corrupt_blocks, 0);
9162                 if (!cache)
9163                         break;
9164                 if (!trans) {
9165                         trans = btrfs_start_transaction(info->extent_root, 1);
9166                         if (IS_ERR(trans))
9167                                 return PTR_ERR(trans);
9168                 }
9169                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9170                 prune_one_block(trans, info, corrupt);
9171                 remove_cache_extent(info->corrupt_blocks, cache);
9172         }
9173         if (trans)
9174                 return btrfs_commit_transaction(trans, info->extent_root);
9175         return 0;
9176 }
9177
9178 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9179 {
9180         struct btrfs_block_group_cache *cache;
9181         u64 start, end;
9182         int ret;
9183
9184         while (1) {
9185                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9186                                             &start, &end, EXTENT_DIRTY);
9187                 if (ret)
9188                         break;
9189                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9190         }
9191
9192         start = 0;
9193         while (1) {
9194                 cache = btrfs_lookup_first_block_group(fs_info, start);
9195                 if (!cache)
9196                         break;
9197                 if (cache->cached)
9198                         cache->cached = 0;
9199                 start = cache->key.objectid + cache->key.offset;
9200         }
9201 }
9202
9203 static int check_extent_refs(struct btrfs_root *root,
9204                              struct cache_tree *extent_cache)
9205 {
9206         struct extent_record *rec;
9207         struct cache_extent *cache;
9208         int ret = 0;
9209         int had_dups = 0;
9210
9211         if (repair) {
9212                 /*
9213                  * if we're doing a repair, we have to make sure
9214                  * we don't allocate from the problem extents.
9215                  * In the worst case, this will be all the
9216                  * extents in the FS
9217                  */
9218                 cache = search_cache_extent(extent_cache, 0);
9219                 while(cache) {
9220                         rec = container_of(cache, struct extent_record, cache);
9221                         set_extent_dirty(root->fs_info->excluded_extents,
9222                                          rec->start,
9223                                          rec->start + rec->max_size - 1);
9224                         cache = next_cache_extent(cache);
9225                 }
9226
9227                 /* pin down all the corrupted blocks too */
9228                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9229                 while(cache) {
9230                         set_extent_dirty(root->fs_info->excluded_extents,
9231                                          cache->start,
9232                                          cache->start + cache->size - 1);
9233                         cache = next_cache_extent(cache);
9234                 }
9235                 prune_corrupt_blocks(root->fs_info);
9236                 reset_cached_block_groups(root->fs_info);
9237         }
9238
9239         reset_cached_block_groups(root->fs_info);
9240
9241         /*
9242          * We need to delete any duplicate entries we find first otherwise we
9243          * could mess up the extent tree when we have backrefs that actually
9244          * belong to a different extent item and not the weird duplicate one.
9245          */
9246         while (repair && !list_empty(&duplicate_extents)) {
9247                 rec = to_extent_record(duplicate_extents.next);
9248                 list_del_init(&rec->list);
9249
9250                 /* Sometimes we can find a backref before we find an actual
9251                  * extent, so we need to process it a little bit to see if there
9252                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9253                  * if this is a backref screwup.  If we need to delete stuff
9254                  * process_duplicates() will return 0, otherwise it will return
9255                  * 1 and we
9256                  */
9257                 if (process_duplicates(root, extent_cache, rec))
9258                         continue;
9259                 ret = delete_duplicate_records(root, rec);
9260                 if (ret < 0)
9261                         return ret;
9262                 /*
9263                  * delete_duplicate_records will return the number of entries
9264                  * deleted, so if it's greater than 0 then we know we actually
9265                  * did something and we need to remove.
9266                  */
9267                 if (ret)
9268                         had_dups = 1;
9269         }
9270
9271         if (had_dups)
9272                 return -EAGAIN;
9273
9274         while(1) {
9275                 int cur_err = 0;
9276                 int fix = 0;
9277
9278                 cache = search_cache_extent(extent_cache, 0);
9279                 if (!cache)
9280                         break;
9281                 rec = container_of(cache, struct extent_record, cache);
9282                 if (rec->num_duplicates) {
9283                         fprintf(stderr, "extent item %llu has multiple extent "
9284                                 "items\n", (unsigned long long)rec->start);
9285                         cur_err = 1;
9286                 }
9287
9288                 if (rec->refs != rec->extent_item_refs) {
9289                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9290                                 (unsigned long long)rec->start,
9291                                 (unsigned long long)rec->nr);
9292                         fprintf(stderr, "extent item %llu, found %llu\n",
9293                                 (unsigned long long)rec->extent_item_refs,
9294                                 (unsigned long long)rec->refs);
9295                         ret = record_orphan_data_extents(root->fs_info, rec);
9296                         if (ret < 0)
9297                                 goto repair_abort;
9298                         fix = ret;
9299                         cur_err = 1;
9300                 }
9301                 if (all_backpointers_checked(rec, 1)) {
9302                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9303                                 (unsigned long long)rec->start,
9304                                 (unsigned long long)rec->nr);
9305                         fix = 1;
9306                         cur_err = 1;
9307                 }
9308                 if (!rec->owner_ref_checked) {
9309                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9310                                 (unsigned long long)rec->start,
9311                                 (unsigned long long)rec->nr);
9312                         fix = 1;
9313                         cur_err = 1;
9314                 }
9315
9316                 if (repair && fix) {
9317                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9318                         if (ret)
9319                                 goto repair_abort;
9320                 }
9321
9322
9323                 if (rec->bad_full_backref) {
9324                         fprintf(stderr, "bad full backref, on [%llu]\n",
9325                                 (unsigned long long)rec->start);
9326                         if (repair) {
9327                                 ret = fixup_extent_flags(root->fs_info, rec);
9328                                 if (ret)
9329                                         goto repair_abort;
9330                                 fix = 1;
9331                         }
9332                         cur_err = 1;
9333                 }
9334                 /*
9335                  * Although it's not a extent ref's problem, we reuse this
9336                  * routine for error reporting.
9337                  * No repair function yet.
9338                  */
9339                 if (rec->crossing_stripes) {
9340                         fprintf(stderr,
9341                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9342                                 rec->start, rec->start + rec->max_size);
9343                         cur_err = 1;
9344                 }
9345
9346                 if (rec->wrong_chunk_type) {
9347                         fprintf(stderr,
9348                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9349                                 rec->start, rec->start + rec->max_size);
9350                         cur_err = 1;
9351                 }
9352
9353                 remove_cache_extent(extent_cache, cache);
9354                 free_all_extent_backrefs(rec);
9355                 if (!init_extent_tree && repair && (!cur_err || fix))
9356                         clear_extent_dirty(root->fs_info->excluded_extents,
9357                                            rec->start,
9358                                            rec->start + rec->max_size - 1);
9359                 free(rec);
9360         }
9361 repair_abort:
9362         if (repair) {
9363                 if (ret && ret != -EAGAIN) {
9364                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9365                         exit(1);
9366                 } else if (!ret) {
9367                         struct btrfs_trans_handle *trans;
9368
9369                         root = root->fs_info->extent_root;
9370                         trans = btrfs_start_transaction(root, 1);
9371                         if (IS_ERR(trans)) {
9372                                 ret = PTR_ERR(trans);
9373                                 goto repair_abort;
9374                         }
9375
9376                         btrfs_fix_block_accounting(trans, root);
9377                         ret = btrfs_commit_transaction(trans, root);
9378                         if (ret)
9379                                 goto repair_abort;
9380                 }
9381                 return ret;
9382         }
9383         return 0;
9384 }
9385
9386 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9387 {
9388         u64 stripe_size;
9389
9390         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9391                 stripe_size = length;
9392                 stripe_size /= num_stripes;
9393         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9394                 stripe_size = length * 2;
9395                 stripe_size /= num_stripes;
9396         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9397                 stripe_size = length;
9398                 stripe_size /= (num_stripes - 1);
9399         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9400                 stripe_size = length;
9401                 stripe_size /= (num_stripes - 2);
9402         } else {
9403                 stripe_size = length;
9404         }
9405         return stripe_size;
9406 }
9407
9408 /*
9409  * Check the chunk with its block group/dev list ref:
9410  * Return 0 if all refs seems valid.
9411  * Return 1 if part of refs seems valid, need later check for rebuild ref
9412  * like missing block group and needs to search extent tree to rebuild them.
9413  * Return -1 if essential refs are missing and unable to rebuild.
9414  */
9415 static int check_chunk_refs(struct chunk_record *chunk_rec,
9416                             struct block_group_tree *block_group_cache,
9417                             struct device_extent_tree *dev_extent_cache,
9418                             int silent)
9419 {
9420         struct cache_extent *block_group_item;
9421         struct block_group_record *block_group_rec;
9422         struct cache_extent *dev_extent_item;
9423         struct device_extent_record *dev_extent_rec;
9424         u64 devid;
9425         u64 offset;
9426         u64 length;
9427         int metadump_v2 = 0;
9428         int i;
9429         int ret = 0;
9430
9431         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9432                                                chunk_rec->offset,
9433                                                chunk_rec->length);
9434         if (block_group_item) {
9435                 block_group_rec = container_of(block_group_item,
9436                                                struct block_group_record,
9437                                                cache);
9438                 if (chunk_rec->length != block_group_rec->offset ||
9439                     chunk_rec->offset != block_group_rec->objectid ||
9440                     (!metadump_v2 &&
9441                      chunk_rec->type_flags != block_group_rec->flags)) {
9442                         if (!silent)
9443                                 fprintf(stderr,
9444                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9445                                         chunk_rec->objectid,
9446                                         chunk_rec->type,
9447                                         chunk_rec->offset,
9448                                         chunk_rec->length,
9449                                         chunk_rec->offset,
9450                                         chunk_rec->type_flags,
9451                                         block_group_rec->objectid,
9452                                         block_group_rec->type,
9453                                         block_group_rec->offset,
9454                                         block_group_rec->offset,
9455                                         block_group_rec->objectid,
9456                                         block_group_rec->flags);
9457                         ret = -1;
9458                 } else {
9459                         list_del_init(&block_group_rec->list);
9460                         chunk_rec->bg_rec = block_group_rec;
9461                 }
9462         } else {
9463                 if (!silent)
9464                         fprintf(stderr,
9465                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9466                                 chunk_rec->objectid,
9467                                 chunk_rec->type,
9468                                 chunk_rec->offset,
9469                                 chunk_rec->length,
9470                                 chunk_rec->offset,
9471                                 chunk_rec->type_flags);
9472                 ret = 1;
9473         }
9474
9475         if (metadump_v2)
9476                 return ret;
9477
9478         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9479                                     chunk_rec->num_stripes);
9480         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9481                 devid = chunk_rec->stripes[i].devid;
9482                 offset = chunk_rec->stripes[i].offset;
9483                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9484                                                        devid, offset, length);
9485                 if (dev_extent_item) {
9486                         dev_extent_rec = container_of(dev_extent_item,
9487                                                 struct device_extent_record,
9488                                                 cache);
9489                         if (dev_extent_rec->objectid != devid ||
9490                             dev_extent_rec->offset != offset ||
9491                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9492                             dev_extent_rec->length != length) {
9493                                 if (!silent)
9494                                         fprintf(stderr,
9495                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9496                                                 chunk_rec->objectid,
9497                                                 chunk_rec->type,
9498                                                 chunk_rec->offset,
9499                                                 chunk_rec->stripes[i].devid,
9500                                                 chunk_rec->stripes[i].offset,
9501                                                 dev_extent_rec->objectid,
9502                                                 dev_extent_rec->offset,
9503                                                 dev_extent_rec->length);
9504                                 ret = -1;
9505                         } else {
9506                                 list_move(&dev_extent_rec->chunk_list,
9507                                           &chunk_rec->dextents);
9508                         }
9509                 } else {
9510                         if (!silent)
9511                                 fprintf(stderr,
9512                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9513                                         chunk_rec->objectid,
9514                                         chunk_rec->type,
9515                                         chunk_rec->offset,
9516                                         chunk_rec->stripes[i].devid,
9517                                         chunk_rec->stripes[i].offset);
9518                         ret = -1;
9519                 }
9520         }
9521         return ret;
9522 }
9523
9524 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9525 int check_chunks(struct cache_tree *chunk_cache,
9526                  struct block_group_tree *block_group_cache,
9527                  struct device_extent_tree *dev_extent_cache,
9528                  struct list_head *good, struct list_head *bad,
9529                  struct list_head *rebuild, int silent)
9530 {
9531         struct cache_extent *chunk_item;
9532         struct chunk_record *chunk_rec;
9533         struct block_group_record *bg_rec;
9534         struct device_extent_record *dext_rec;
9535         int err;
9536         int ret = 0;
9537
9538         chunk_item = first_cache_extent(chunk_cache);
9539         while (chunk_item) {
9540                 chunk_rec = container_of(chunk_item, struct chunk_record,
9541                                          cache);
9542                 err = check_chunk_refs(chunk_rec, block_group_cache,
9543                                        dev_extent_cache, silent);
9544                 if (err < 0)
9545                         ret = err;
9546                 if (err == 0 && good)
9547                         list_add_tail(&chunk_rec->list, good);
9548                 if (err > 0 && rebuild)
9549                         list_add_tail(&chunk_rec->list, rebuild);
9550                 if (err < 0 && bad)
9551                         list_add_tail(&chunk_rec->list, bad);
9552                 chunk_item = next_cache_extent(chunk_item);
9553         }
9554
9555         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9556                 if (!silent)
9557                         fprintf(stderr,
9558                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9559                                 bg_rec->objectid,
9560                                 bg_rec->offset,
9561                                 bg_rec->flags);
9562                 if (!ret)
9563                         ret = 1;
9564         }
9565
9566         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9567                             chunk_list) {
9568                 if (!silent)
9569                         fprintf(stderr,
9570                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9571                                 dext_rec->objectid,
9572                                 dext_rec->offset,
9573                                 dext_rec->length);
9574                 if (!ret)
9575                         ret = 1;
9576         }
9577         return ret;
9578 }
9579
9580
9581 static int check_device_used(struct device_record *dev_rec,
9582                              struct device_extent_tree *dext_cache)
9583 {
9584         struct cache_extent *cache;
9585         struct device_extent_record *dev_extent_rec;
9586         u64 total_byte = 0;
9587
9588         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9589         while (cache) {
9590                 dev_extent_rec = container_of(cache,
9591                                               struct device_extent_record,
9592                                               cache);
9593                 if (dev_extent_rec->objectid != dev_rec->devid)
9594                         break;
9595
9596                 list_del_init(&dev_extent_rec->device_list);
9597                 total_byte += dev_extent_rec->length;
9598                 cache = next_cache_extent(cache);
9599         }
9600
9601         if (total_byte != dev_rec->byte_used) {
9602                 fprintf(stderr,
9603                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9604                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9605                         dev_rec->type, dev_rec->offset);
9606                 return -1;
9607         } else {
9608                 return 0;
9609         }
9610 }
9611
9612 /* check btrfs_dev_item -> btrfs_dev_extent */
9613 static int check_devices(struct rb_root *dev_cache,
9614                          struct device_extent_tree *dev_extent_cache)
9615 {
9616         struct rb_node *dev_node;
9617         struct device_record *dev_rec;
9618         struct device_extent_record *dext_rec;
9619         int err;
9620         int ret = 0;
9621
9622         dev_node = rb_first(dev_cache);
9623         while (dev_node) {
9624                 dev_rec = container_of(dev_node, struct device_record, node);
9625                 err = check_device_used(dev_rec, dev_extent_cache);
9626                 if (err)
9627                         ret = err;
9628
9629                 dev_node = rb_next(dev_node);
9630         }
9631         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9632                             device_list) {
9633                 fprintf(stderr,
9634                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9635                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9636                 if (!ret)
9637                         ret = 1;
9638         }
9639         return ret;
9640 }
9641
9642 static int add_root_item_to_list(struct list_head *head,
9643                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9644                                   u8 level, u8 drop_level,
9645                                   int level_size, struct btrfs_key *drop_key)
9646 {
9647
9648         struct root_item_record *ri_rec;
9649         ri_rec = malloc(sizeof(*ri_rec));
9650         if (!ri_rec)
9651                 return -ENOMEM;
9652         ri_rec->bytenr = bytenr;
9653         ri_rec->objectid = objectid;
9654         ri_rec->level = level;
9655         ri_rec->level_size = level_size;
9656         ri_rec->drop_level = drop_level;
9657         ri_rec->last_snapshot = last_snapshot;
9658         if (drop_key)
9659                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9660         list_add_tail(&ri_rec->list, head);
9661
9662         return 0;
9663 }
9664
9665 static void free_root_item_list(struct list_head *list)
9666 {
9667         struct root_item_record *ri_rec;
9668
9669         while (!list_empty(list)) {
9670                 ri_rec = list_first_entry(list, struct root_item_record,
9671                                           list);
9672                 list_del_init(&ri_rec->list);
9673                 free(ri_rec);
9674         }
9675 }
9676
9677 static int deal_root_from_list(struct list_head *list,
9678                                struct btrfs_root *root,
9679                                struct block_info *bits,
9680                                int bits_nr,
9681                                struct cache_tree *pending,
9682                                struct cache_tree *seen,
9683                                struct cache_tree *reada,
9684                                struct cache_tree *nodes,
9685                                struct cache_tree *extent_cache,
9686                                struct cache_tree *chunk_cache,
9687                                struct rb_root *dev_cache,
9688                                struct block_group_tree *block_group_cache,
9689                                struct device_extent_tree *dev_extent_cache)
9690 {
9691         int ret = 0;
9692         u64 last;
9693
9694         while (!list_empty(list)) {
9695                 struct root_item_record *rec;
9696                 struct extent_buffer *buf;
9697                 rec = list_entry(list->next,
9698                                  struct root_item_record, list);
9699                 last = 0;
9700                 buf = read_tree_block(root->fs_info->tree_root,
9701                                       rec->bytenr, rec->level_size, 0);
9702                 if (!extent_buffer_uptodate(buf)) {
9703                         free_extent_buffer(buf);
9704                         ret = -EIO;
9705                         break;
9706                 }
9707                 ret = add_root_to_pending(buf, extent_cache, pending,
9708                                     seen, nodes, rec->objectid);
9709                 if (ret < 0)
9710                         break;
9711                 /*
9712                  * To rebuild extent tree, we need deal with snapshot
9713                  * one by one, otherwise we deal with node firstly which
9714                  * can maximize readahead.
9715                  */
9716                 while (1) {
9717                         ret = run_next_block(root, bits, bits_nr, &last,
9718                                              pending, seen, reada, nodes,
9719                                              extent_cache, chunk_cache,
9720                                              dev_cache, block_group_cache,
9721                                              dev_extent_cache, rec);
9722                         if (ret != 0)
9723                                 break;
9724                 }
9725                 free_extent_buffer(buf);
9726                 list_del(&rec->list);
9727                 free(rec);
9728                 if (ret < 0)
9729                         break;
9730         }
9731         while (ret >= 0) {
9732                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9733                                      reada, nodes, extent_cache, chunk_cache,
9734                                      dev_cache, block_group_cache,
9735                                      dev_extent_cache, NULL);
9736                 if (ret != 0) {
9737                         if (ret > 0)
9738                                 ret = 0;
9739                         break;
9740                 }
9741         }
9742         return ret;
9743 }
9744
9745 static int check_chunks_and_extents(struct btrfs_root *root)
9746 {
9747         struct rb_root dev_cache;
9748         struct cache_tree chunk_cache;
9749         struct block_group_tree block_group_cache;
9750         struct device_extent_tree dev_extent_cache;
9751         struct cache_tree extent_cache;
9752         struct cache_tree seen;
9753         struct cache_tree pending;
9754         struct cache_tree reada;
9755         struct cache_tree nodes;
9756         struct extent_io_tree excluded_extents;
9757         struct cache_tree corrupt_blocks;
9758         struct btrfs_path path;
9759         struct btrfs_key key;
9760         struct btrfs_key found_key;
9761         int ret, err = 0;
9762         struct block_info *bits;
9763         int bits_nr;
9764         struct extent_buffer *leaf;
9765         int slot;
9766         struct btrfs_root_item ri;
9767         struct list_head dropping_trees;
9768         struct list_head normal_trees;
9769         struct btrfs_root *root1;
9770         u64 objectid;
9771         u32 level_size;
9772         u8 level;
9773
9774         dev_cache = RB_ROOT;
9775         cache_tree_init(&chunk_cache);
9776         block_group_tree_init(&block_group_cache);
9777         device_extent_tree_init(&dev_extent_cache);
9778
9779         cache_tree_init(&extent_cache);
9780         cache_tree_init(&seen);
9781         cache_tree_init(&pending);
9782         cache_tree_init(&nodes);
9783         cache_tree_init(&reada);
9784         cache_tree_init(&corrupt_blocks);
9785         extent_io_tree_init(&excluded_extents);
9786         INIT_LIST_HEAD(&dropping_trees);
9787         INIT_LIST_HEAD(&normal_trees);
9788
9789         if (repair) {
9790                 root->fs_info->excluded_extents = &excluded_extents;
9791                 root->fs_info->fsck_extent_cache = &extent_cache;
9792                 root->fs_info->free_extent_hook = free_extent_hook;
9793                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9794         }
9795
9796         bits_nr = 1024;
9797         bits = malloc(bits_nr * sizeof(struct block_info));
9798         if (!bits) {
9799                 perror("malloc");
9800                 exit(1);
9801         }
9802
9803         if (ctx.progress_enabled) {
9804                 ctx.tp = TASK_EXTENTS;
9805                 task_start(ctx.info);
9806         }
9807
9808 again:
9809         root1 = root->fs_info->tree_root;
9810         level = btrfs_header_level(root1->node);
9811         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9812                                     root1->node->start, 0, level, 0,
9813                                     root1->nodesize, NULL);
9814         if (ret < 0)
9815                 goto out;
9816         root1 = root->fs_info->chunk_root;
9817         level = btrfs_header_level(root1->node);
9818         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9819                                     root1->node->start, 0, level, 0,
9820                                     root1->nodesize, NULL);
9821         if (ret < 0)
9822                 goto out;
9823         btrfs_init_path(&path);
9824         key.offset = 0;
9825         key.objectid = 0;
9826         key.type = BTRFS_ROOT_ITEM_KEY;
9827         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9828                                         &key, &path, 0, 0);
9829         if (ret < 0)
9830                 goto out;
9831         while(1) {
9832                 leaf = path.nodes[0];
9833                 slot = path.slots[0];
9834                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9835                         ret = btrfs_next_leaf(root, &path);
9836                         if (ret != 0)
9837                                 break;
9838                         leaf = path.nodes[0];
9839                         slot = path.slots[0];
9840                 }
9841                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9842                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9843                         unsigned long offset;
9844                         u64 last_snapshot;
9845
9846                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9847                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9848                         last_snapshot = btrfs_root_last_snapshot(&ri);
9849                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9850                                 level = btrfs_root_level(&ri);
9851                                 level_size = root->nodesize;
9852                                 ret = add_root_item_to_list(&normal_trees,
9853                                                 found_key.objectid,
9854                                                 btrfs_root_bytenr(&ri),
9855                                                 last_snapshot, level,
9856                                                 0, level_size, NULL);
9857                                 if (ret < 0)
9858                                         goto out;
9859                         } else {
9860                                 level = btrfs_root_level(&ri);
9861                                 level_size = root->nodesize;
9862                                 objectid = found_key.objectid;
9863                                 btrfs_disk_key_to_cpu(&found_key,
9864                                                       &ri.drop_progress);
9865                                 ret = add_root_item_to_list(&dropping_trees,
9866                                                 objectid,
9867                                                 btrfs_root_bytenr(&ri),
9868                                                 last_snapshot, level,
9869                                                 ri.drop_level,
9870                                                 level_size, &found_key);
9871                                 if (ret < 0)
9872                                         goto out;
9873                         }
9874                 }
9875                 path.slots[0]++;
9876         }
9877         btrfs_release_path(&path);
9878
9879         /*
9880          * check_block can return -EAGAIN if it fixes something, please keep
9881          * this in mind when dealing with return values from these functions, if
9882          * we get -EAGAIN we want to fall through and restart the loop.
9883          */
9884         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9885                                   &seen, &reada, &nodes, &extent_cache,
9886                                   &chunk_cache, &dev_cache, &block_group_cache,
9887                                   &dev_extent_cache);
9888         if (ret < 0) {
9889                 if (ret == -EAGAIN)
9890                         goto loop;
9891                 goto out;
9892         }
9893         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9894                                   &pending, &seen, &reada, &nodes,
9895                                   &extent_cache, &chunk_cache, &dev_cache,
9896                                   &block_group_cache, &dev_extent_cache);
9897         if (ret < 0) {
9898                 if (ret == -EAGAIN)
9899                         goto loop;
9900                 goto out;
9901         }
9902
9903         ret = check_chunks(&chunk_cache, &block_group_cache,
9904                            &dev_extent_cache, NULL, NULL, NULL, 0);
9905         if (ret) {
9906                 if (ret == -EAGAIN)
9907                         goto loop;
9908                 err = ret;
9909         }
9910
9911         ret = check_extent_refs(root, &extent_cache);
9912         if (ret < 0) {
9913                 if (ret == -EAGAIN)
9914                         goto loop;
9915                 goto out;
9916         }
9917
9918         ret = check_devices(&dev_cache, &dev_extent_cache);
9919         if (ret && err)
9920                 ret = err;
9921
9922 out:
9923         task_stop(ctx.info);
9924         if (repair) {
9925                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9926                 extent_io_tree_cleanup(&excluded_extents);
9927                 root->fs_info->fsck_extent_cache = NULL;
9928                 root->fs_info->free_extent_hook = NULL;
9929                 root->fs_info->corrupt_blocks = NULL;
9930                 root->fs_info->excluded_extents = NULL;
9931         }
9932         free(bits);
9933         free_chunk_cache_tree(&chunk_cache);
9934         free_device_cache_tree(&dev_cache);
9935         free_block_group_tree(&block_group_cache);
9936         free_device_extent_tree(&dev_extent_cache);
9937         free_extent_cache_tree(&seen);
9938         free_extent_cache_tree(&pending);
9939         free_extent_cache_tree(&reada);
9940         free_extent_cache_tree(&nodes);
9941         return ret;
9942 loop:
9943         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9944         free_extent_cache_tree(&seen);
9945         free_extent_cache_tree(&pending);
9946         free_extent_cache_tree(&reada);
9947         free_extent_cache_tree(&nodes);
9948         free_chunk_cache_tree(&chunk_cache);
9949         free_block_group_tree(&block_group_cache);
9950         free_device_cache_tree(&dev_cache);
9951         free_device_extent_tree(&dev_extent_cache);
9952         free_extent_record_cache(root->fs_info, &extent_cache);
9953         free_root_item_list(&normal_trees);
9954         free_root_item_list(&dropping_trees);
9955         extent_io_tree_cleanup(&excluded_extents);
9956         goto again;
9957 }
9958
9959 /*
9960  * Check backrefs of a tree block given by @bytenr or @eb.
9961  *
9962  * @root:       the root containing the @bytenr or @eb
9963  * @eb:         tree block extent buffer, can be NULL
9964  * @bytenr:     bytenr of the tree block to search
9965  * @level:      tree level of the tree block
9966  * @owner:      owner of the tree block
9967  *
9968  * Return >0 for any error found and output error message
9969  * Return 0 for no error found
9970  */
9971 static int check_tree_block_ref(struct btrfs_root *root,
9972                                 struct extent_buffer *eb, u64 bytenr,
9973                                 int level, u64 owner)
9974 {
9975         struct btrfs_key key;
9976         struct btrfs_root *extent_root = root->fs_info->extent_root;
9977         struct btrfs_path path;
9978         struct btrfs_extent_item *ei;
9979         struct btrfs_extent_inline_ref *iref;
9980         struct extent_buffer *leaf;
9981         unsigned long end;
9982         unsigned long ptr;
9983         int slot;
9984         int skinny_level;
9985         int type;
9986         u32 nodesize = root->nodesize;
9987         u32 item_size;
9988         u64 offset;
9989         int tree_reloc_root = 0;
9990         int found_ref = 0;
9991         int err = 0;
9992         int ret;
9993
9994         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9995             btrfs_header_bytenr(root->node) == bytenr)
9996                 tree_reloc_root = 1;
9997
9998         btrfs_init_path(&path);
9999         key.objectid = bytenr;
10000         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10001                 key.type = BTRFS_METADATA_ITEM_KEY;
10002         else
10003                 key.type = BTRFS_EXTENT_ITEM_KEY;
10004         key.offset = (u64)-1;
10005
10006         /* Search for the backref in extent tree */
10007         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10008         if (ret < 0) {
10009                 err |= BACKREF_MISSING;
10010                 goto out;
10011         }
10012         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10013         if (ret) {
10014                 err |= BACKREF_MISSING;
10015                 goto out;
10016         }
10017
10018         leaf = path.nodes[0];
10019         slot = path.slots[0];
10020         btrfs_item_key_to_cpu(leaf, &key, slot);
10021
10022         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10023
10024         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10025                 skinny_level = (int)key.offset;
10026                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10027         } else {
10028                 struct btrfs_tree_block_info *info;
10029
10030                 info = (struct btrfs_tree_block_info *)(ei + 1);
10031                 skinny_level = btrfs_tree_block_level(leaf, info);
10032                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10033         }
10034
10035         if (eb) {
10036                 u64 header_gen;
10037                 u64 extent_gen;
10038
10039                 if (!(btrfs_extent_flags(leaf, ei) &
10040                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10041                         error(
10042                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10043                                 key.objectid, nodesize,
10044                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10045                         err = BACKREF_MISMATCH;
10046                 }
10047                 header_gen = btrfs_header_generation(eb);
10048                 extent_gen = btrfs_extent_generation(leaf, ei);
10049                 if (header_gen != extent_gen) {
10050                         error(
10051         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10052                                 key.objectid, nodesize, header_gen,
10053                                 extent_gen);
10054                         err = BACKREF_MISMATCH;
10055                 }
10056                 if (level != skinny_level) {
10057                         error(
10058                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10059                                 key.objectid, nodesize, level, skinny_level);
10060                         err = BACKREF_MISMATCH;
10061                 }
10062                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10063                         error(
10064                         "extent[%llu %u] is referred by other roots than %llu",
10065                                 key.objectid, nodesize, root->objectid);
10066                         err = BACKREF_MISMATCH;
10067                 }
10068         }
10069
10070         /*
10071          * Iterate the extent/metadata item to find the exact backref
10072          */
10073         item_size = btrfs_item_size_nr(leaf, slot);
10074         ptr = (unsigned long)iref;
10075         end = (unsigned long)ei + item_size;
10076         while (ptr < end) {
10077                 iref = (struct btrfs_extent_inline_ref *)ptr;
10078                 type = btrfs_extent_inline_ref_type(leaf, iref);
10079                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10080
10081                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10082                         (offset == root->objectid || offset == owner)) {
10083                         found_ref = 1;
10084                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10085                         /*
10086                          * Backref of tree reloc root points to itself, no need
10087                          * to check backref any more.
10088                          */
10089                         if (tree_reloc_root)
10090                                 found_ref = 1;
10091                         else
10092                         /* Check if the backref points to valid referencer */
10093                                 found_ref = !check_tree_block_ref(root, NULL,
10094                                                 offset, level + 1, owner);
10095                 }
10096
10097                 if (found_ref)
10098                         break;
10099                 ptr += btrfs_extent_inline_ref_size(type);
10100         }
10101
10102         /*
10103          * Inlined extent item doesn't have what we need, check
10104          * TREE_BLOCK_REF_KEY
10105          */
10106         if (!found_ref) {
10107                 btrfs_release_path(&path);
10108                 key.objectid = bytenr;
10109                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10110                 key.offset = root->objectid;
10111
10112                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10113                 if (!ret)
10114                         found_ref = 1;
10115         }
10116         if (!found_ref)
10117                 err |= BACKREF_MISSING;
10118 out:
10119         btrfs_release_path(&path);
10120         if (eb && (err & BACKREF_MISSING))
10121                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10122                         bytenr, nodesize, owner, level);
10123         return err;
10124 }
10125
10126 /*
10127  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10128  *
10129  * Return >0 any error found and output error message
10130  * Return 0 for no error found
10131  */
10132 static int check_extent_data_item(struct btrfs_root *root,
10133                                   struct extent_buffer *eb, int slot)
10134 {
10135         struct btrfs_file_extent_item *fi;
10136         struct btrfs_path path;
10137         struct btrfs_root *extent_root = root->fs_info->extent_root;
10138         struct btrfs_key fi_key;
10139         struct btrfs_key dbref_key;
10140         struct extent_buffer *leaf;
10141         struct btrfs_extent_item *ei;
10142         struct btrfs_extent_inline_ref *iref;
10143         struct btrfs_extent_data_ref *dref;
10144         u64 owner;
10145         u64 disk_bytenr;
10146         u64 disk_num_bytes;
10147         u64 extent_num_bytes;
10148         u64 extent_flags;
10149         u32 item_size;
10150         unsigned long end;
10151         unsigned long ptr;
10152         int type;
10153         u64 ref_root;
10154         int found_dbackref = 0;
10155         int err = 0;
10156         int ret;
10157
10158         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10159         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10160
10161         /* Nothing to check for hole and inline data extents */
10162         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10163             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10164                 return 0;
10165
10166         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10167         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10168         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10169
10170         /* Check unaligned disk_num_bytes and num_bytes */
10171         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10172                 error(
10173 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10174                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10175                         root->sectorsize);
10176                 err |= BYTES_UNALIGNED;
10177         } else {
10178                 data_bytes_allocated += disk_num_bytes;
10179         }
10180         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10181                 error(
10182 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10183                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10184                         root->sectorsize);
10185                 err |= BYTES_UNALIGNED;
10186         } else {
10187                 data_bytes_referenced += extent_num_bytes;
10188         }
10189         owner = btrfs_header_owner(eb);
10190
10191         /* Check the extent item of the file extent in extent tree */
10192         btrfs_init_path(&path);
10193         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10194         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10195         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10196
10197         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10198         if (ret) {
10199                 err |= BACKREF_MISSING;
10200                 goto error;
10201         }
10202
10203         leaf = path.nodes[0];
10204         slot = path.slots[0];
10205         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10206
10207         extent_flags = btrfs_extent_flags(leaf, ei);
10208
10209         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10210                 error(
10211                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10212                     disk_bytenr, disk_num_bytes,
10213                     BTRFS_EXTENT_FLAG_DATA);
10214                 err |= BACKREF_MISMATCH;
10215         }
10216
10217         /* Check data backref inside that extent item */
10218         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10219         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10220         ptr = (unsigned long)iref;
10221         end = (unsigned long)ei + item_size;
10222         while (ptr < end) {
10223                 iref = (struct btrfs_extent_inline_ref *)ptr;
10224                 type = btrfs_extent_inline_ref_type(leaf, iref);
10225                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10226
10227                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10228                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10229                         if (ref_root == owner || ref_root == root->objectid)
10230                                 found_dbackref = 1;
10231                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10232                         found_dbackref = !check_tree_block_ref(root, NULL,
10233                                 btrfs_extent_inline_ref_offset(leaf, iref),
10234                                 0, owner);
10235                 }
10236
10237                 if (found_dbackref)
10238                         break;
10239                 ptr += btrfs_extent_inline_ref_size(type);
10240         }
10241
10242         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10243         if (!found_dbackref) {
10244                 btrfs_release_path(&path);
10245
10246                 btrfs_init_path(&path);
10247                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10248                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10249                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10250                                 fi_key.objectid, fi_key.offset);
10251
10252                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10253                                         &dbref_key, &path, 0, 0);
10254                 if (!ret)
10255                         found_dbackref = 1;
10256         }
10257
10258         if (!found_dbackref)
10259                 err |= BACKREF_MISSING;
10260 error:
10261         btrfs_release_path(&path);
10262         if (err & BACKREF_MISSING) {
10263                 error("data extent[%llu %llu] backref lost",
10264                       disk_bytenr, disk_num_bytes);
10265         }
10266         return err;
10267 }
10268
10269 /*
10270  * Get real tree block level for the case like shared block
10271  * Return >= 0 as tree level
10272  * Return <0 for error
10273  */
10274 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10275 {
10276         struct extent_buffer *eb;
10277         struct btrfs_path path;
10278         struct btrfs_key key;
10279         struct btrfs_extent_item *ei;
10280         u64 flags;
10281         u64 transid;
10282         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10283         u8 backref_level;
10284         u8 header_level;
10285         int ret;
10286
10287         /* Search extent tree for extent generation and level */
10288         key.objectid = bytenr;
10289         key.type = BTRFS_METADATA_ITEM_KEY;
10290         key.offset = (u64)-1;
10291
10292         btrfs_init_path(&path);
10293         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10294         if (ret < 0)
10295                 goto release_out;
10296         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10297         if (ret < 0)
10298                 goto release_out;
10299         if (ret > 0) {
10300                 ret = -ENOENT;
10301                 goto release_out;
10302         }
10303
10304         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10305         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10306                             struct btrfs_extent_item);
10307         flags = btrfs_extent_flags(path.nodes[0], ei);
10308         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10309                 ret = -ENOENT;
10310                 goto release_out;
10311         }
10312
10313         /* Get transid for later read_tree_block() check */
10314         transid = btrfs_extent_generation(path.nodes[0], ei);
10315
10316         /* Get backref level as one source */
10317         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10318                 backref_level = key.offset;
10319         } else {
10320                 struct btrfs_tree_block_info *info;
10321
10322                 info = (struct btrfs_tree_block_info *)(ei + 1);
10323                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10324         }
10325         btrfs_release_path(&path);
10326
10327         /* Get level from tree block as an alternative source */
10328         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10329         if (!extent_buffer_uptodate(eb)) {
10330                 free_extent_buffer(eb);
10331                 return -EIO;
10332         }
10333         header_level = btrfs_header_level(eb);
10334         free_extent_buffer(eb);
10335
10336         if (header_level != backref_level)
10337                 return -EIO;
10338         return header_level;
10339
10340 release_out:
10341         btrfs_release_path(&path);
10342         return ret;
10343 }
10344
10345 /*
10346  * Check if a tree block backref is valid (points to a valid tree block)
10347  * if level == -1, level will be resolved
10348  * Return >0 for any error found and print error message
10349  */
10350 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10351                                     u64 bytenr, int level)
10352 {
10353         struct btrfs_root *root;
10354         struct btrfs_key key;
10355         struct btrfs_path path;
10356         struct extent_buffer *eb;
10357         struct extent_buffer *node;
10358         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10359         int err = 0;
10360         int ret;
10361
10362         /* Query level for level == -1 special case */
10363         if (level == -1)
10364                 level = query_tree_block_level(fs_info, bytenr);
10365         if (level < 0) {
10366                 err |= REFERENCER_MISSING;
10367                 goto out;
10368         }
10369
10370         key.objectid = root_id;
10371         key.type = BTRFS_ROOT_ITEM_KEY;
10372         key.offset = (u64)-1;
10373
10374         root = btrfs_read_fs_root(fs_info, &key);
10375         if (IS_ERR(root)) {
10376                 err |= REFERENCER_MISSING;
10377                 goto out;
10378         }
10379
10380         /* Read out the tree block to get item/node key */
10381         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10382         if (!extent_buffer_uptodate(eb)) {
10383                 err |= REFERENCER_MISSING;
10384                 free_extent_buffer(eb);
10385                 goto out;
10386         }
10387
10388         /* Empty tree, no need to check key */
10389         if (!btrfs_header_nritems(eb) && !level) {
10390                 free_extent_buffer(eb);
10391                 goto out;
10392         }
10393
10394         if (level)
10395                 btrfs_node_key_to_cpu(eb, &key, 0);
10396         else
10397                 btrfs_item_key_to_cpu(eb, &key, 0);
10398
10399         free_extent_buffer(eb);
10400
10401         btrfs_init_path(&path);
10402         path.lowest_level = level;
10403         /* Search with the first key, to ensure we can reach it */
10404         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10405         if (ret < 0) {
10406                 err |= REFERENCER_MISSING;
10407                 goto release_out;
10408         }
10409
10410         node = path.nodes[level];
10411         if (btrfs_header_bytenr(node) != bytenr) {
10412                 error(
10413         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10414                         bytenr, nodesize, bytenr,
10415                         btrfs_header_bytenr(node));
10416                 err |= REFERENCER_MISMATCH;
10417         }
10418         if (btrfs_header_level(node) != level) {
10419                 error(
10420         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10421                         bytenr, nodesize, level,
10422                         btrfs_header_level(node));
10423                 err |= REFERENCER_MISMATCH;
10424         }
10425
10426 release_out:
10427         btrfs_release_path(&path);
10428 out:
10429         if (err & REFERENCER_MISSING) {
10430                 if (level < 0)
10431                         error("extent [%llu %d] lost referencer (owner: %llu)",
10432                                 bytenr, nodesize, root_id);
10433                 else
10434                         error(
10435                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10436                                 bytenr, nodesize, root_id, level);
10437         }
10438
10439         return err;
10440 }
10441
10442 /*
10443  * Check if tree block @eb is tree reloc root.
10444  * Return 0 if it's not or any problem happens
10445  * Return 1 if it's a tree reloc root
10446  */
10447 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10448                                  struct extent_buffer *eb)
10449 {
10450         struct btrfs_root *tree_reloc_root;
10451         struct btrfs_key key;
10452         u64 bytenr = btrfs_header_bytenr(eb);
10453         u64 owner = btrfs_header_owner(eb);
10454         int ret = 0;
10455
10456         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10457         key.offset = owner;
10458         key.type = BTRFS_ROOT_ITEM_KEY;
10459
10460         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10461         if (IS_ERR(tree_reloc_root))
10462                 return 0;
10463
10464         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10465                 ret = 1;
10466         btrfs_free_fs_root(tree_reloc_root);
10467         return ret;
10468 }
10469
10470 /*
10471  * Check referencer for shared block backref
10472  * If level == -1, this function will resolve the level.
10473  */
10474 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10475                                      u64 parent, u64 bytenr, int level)
10476 {
10477         struct extent_buffer *eb;
10478         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10479         u32 nr;
10480         int found_parent = 0;
10481         int i;
10482
10483         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10484         if (!extent_buffer_uptodate(eb))
10485                 goto out;
10486
10487         if (level == -1)
10488                 level = query_tree_block_level(fs_info, bytenr);
10489         if (level < 0)
10490                 goto out;
10491
10492         /* It's possible it's a tree reloc root */
10493         if (parent == bytenr) {
10494                 if (is_tree_reloc_root(fs_info, eb))
10495                         found_parent = 1;
10496                 goto out;
10497         }
10498
10499         if (level + 1 != btrfs_header_level(eb))
10500                 goto out;
10501
10502         nr = btrfs_header_nritems(eb);
10503         for (i = 0; i < nr; i++) {
10504                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10505                         found_parent = 1;
10506                         break;
10507                 }
10508         }
10509 out:
10510         free_extent_buffer(eb);
10511         if (!found_parent) {
10512                 error(
10513         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10514                         bytenr, nodesize, parent, level);
10515                 return REFERENCER_MISSING;
10516         }
10517         return 0;
10518 }
10519
10520 /*
10521  * Check referencer for normal (inlined) data ref
10522  * If len == 0, it will be resolved by searching in extent tree
10523  */
10524 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10525                                      u64 root_id, u64 objectid, u64 offset,
10526                                      u64 bytenr, u64 len, u32 count)
10527 {
10528         struct btrfs_root *root;
10529         struct btrfs_root *extent_root = fs_info->extent_root;
10530         struct btrfs_key key;
10531         struct btrfs_path path;
10532         struct extent_buffer *leaf;
10533         struct btrfs_file_extent_item *fi;
10534         u32 found_count = 0;
10535         int slot;
10536         int ret = 0;
10537
10538         if (!len) {
10539                 key.objectid = bytenr;
10540                 key.type = BTRFS_EXTENT_ITEM_KEY;
10541                 key.offset = (u64)-1;
10542
10543                 btrfs_init_path(&path);
10544                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10545                 if (ret < 0)
10546                         goto out;
10547                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10548                 if (ret)
10549                         goto out;
10550                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10551                 if (key.objectid != bytenr ||
10552                     key.type != BTRFS_EXTENT_ITEM_KEY)
10553                         goto out;
10554                 len = key.offset;
10555                 btrfs_release_path(&path);
10556         }
10557         key.objectid = root_id;
10558         key.type = BTRFS_ROOT_ITEM_KEY;
10559         key.offset = (u64)-1;
10560         btrfs_init_path(&path);
10561
10562         root = btrfs_read_fs_root(fs_info, &key);
10563         if (IS_ERR(root))
10564                 goto out;
10565
10566         key.objectid = objectid;
10567         key.type = BTRFS_EXTENT_DATA_KEY;
10568         /*
10569          * It can be nasty as data backref offset is
10570          * file offset - file extent offset, which is smaller or
10571          * equal to original backref offset.  The only special case is
10572          * overflow.  So we need to special check and do further search.
10573          */
10574         key.offset = offset & (1ULL << 63) ? 0 : offset;
10575
10576         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10577         if (ret < 0)
10578                 goto out;
10579
10580         /*
10581          * Search afterwards to get correct one
10582          * NOTE: As we must do a comprehensive check on the data backref to
10583          * make sure the dref count also matches, we must iterate all file
10584          * extents for that inode.
10585          */
10586         while (1) {
10587                 leaf = path.nodes[0];
10588                 slot = path.slots[0];
10589
10590                 btrfs_item_key_to_cpu(leaf, &key, slot);
10591                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10592                         break;
10593                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10594                 /*
10595                  * Except normal disk bytenr and disk num bytes, we still
10596                  * need to do extra check on dbackref offset as
10597                  * dbackref offset = file_offset - file_extent_offset
10598                  */
10599                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10600                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10601                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10602                     offset)
10603                         found_count++;
10604
10605                 ret = btrfs_next_item(root, &path);
10606                 if (ret)
10607                         break;
10608         }
10609 out:
10610         btrfs_release_path(&path);
10611         if (found_count != count) {
10612                 error(
10613 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10614                         bytenr, len, root_id, objectid, offset, count, found_count);
10615                 return REFERENCER_MISSING;
10616         }
10617         return 0;
10618 }
10619
10620 /*
10621  * Check if the referencer of a shared data backref exists
10622  */
10623 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10624                                      u64 parent, u64 bytenr)
10625 {
10626         struct extent_buffer *eb;
10627         struct btrfs_key key;
10628         struct btrfs_file_extent_item *fi;
10629         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10630         u32 nr;
10631         int found_parent = 0;
10632         int i;
10633
10634         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10635         if (!extent_buffer_uptodate(eb))
10636                 goto out;
10637
10638         nr = btrfs_header_nritems(eb);
10639         for (i = 0; i < nr; i++) {
10640                 btrfs_item_key_to_cpu(eb, &key, i);
10641                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10642                         continue;
10643
10644                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10645                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10646                         continue;
10647
10648                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10649                         found_parent = 1;
10650                         break;
10651                 }
10652         }
10653
10654 out:
10655         free_extent_buffer(eb);
10656         if (!found_parent) {
10657                 error("shared extent %llu referencer lost (parent: %llu)",
10658                         bytenr, parent);
10659                 return REFERENCER_MISSING;
10660         }
10661         return 0;
10662 }
10663
10664 /*
10665  * This function will check a given extent item, including its backref and
10666  * itself (like crossing stripe boundary and type)
10667  *
10668  * Since we don't use extent_record anymore, introduce new error bit
10669  */
10670 static int check_extent_item(struct btrfs_fs_info *fs_info,
10671                              struct extent_buffer *eb, int slot)
10672 {
10673         struct btrfs_extent_item *ei;
10674         struct btrfs_extent_inline_ref *iref;
10675         struct btrfs_extent_data_ref *dref;
10676         unsigned long end;
10677         unsigned long ptr;
10678         int type;
10679         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10680         u32 item_size = btrfs_item_size_nr(eb, slot);
10681         u64 flags;
10682         u64 offset;
10683         int metadata = 0;
10684         int level;
10685         struct btrfs_key key;
10686         int ret;
10687         int err = 0;
10688
10689         btrfs_item_key_to_cpu(eb, &key, slot);
10690         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10691                 bytes_used += key.offset;
10692         else
10693                 bytes_used += nodesize;
10694
10695         if (item_size < sizeof(*ei)) {
10696                 /*
10697                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10698                  * old thing when on disk format is still un-determined.
10699                  * No need to care about it anymore
10700                  */
10701                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10702                 return -ENOTTY;
10703         }
10704
10705         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10706         flags = btrfs_extent_flags(eb, ei);
10707
10708         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10709                 metadata = 1;
10710         if (metadata && check_crossing_stripes(global_info, key.objectid,
10711                                                eb->len)) {
10712                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10713                       key.objectid, key.objectid + nodesize);
10714                 err |= CROSSING_STRIPE_BOUNDARY;
10715         }
10716
10717         ptr = (unsigned long)(ei + 1);
10718
10719         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10720                 /* Old EXTENT_ITEM metadata */
10721                 struct btrfs_tree_block_info *info;
10722
10723                 info = (struct btrfs_tree_block_info *)ptr;
10724                 level = btrfs_tree_block_level(eb, info);
10725                 ptr += sizeof(struct btrfs_tree_block_info);
10726         } else {
10727                 /* New METADATA_ITEM */
10728                 level = key.offset;
10729         }
10730         end = (unsigned long)ei + item_size;
10731
10732         if (ptr >= end) {
10733                 err |= ITEM_SIZE_MISMATCH;
10734                 goto out;
10735         }
10736
10737         /* Now check every backref in this extent item */
10738 next:
10739         iref = (struct btrfs_extent_inline_ref *)ptr;
10740         type = btrfs_extent_inline_ref_type(eb, iref);
10741         offset = btrfs_extent_inline_ref_offset(eb, iref);
10742         switch (type) {
10743         case BTRFS_TREE_BLOCK_REF_KEY:
10744                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10745                                                level);
10746                 err |= ret;
10747                 break;
10748         case BTRFS_SHARED_BLOCK_REF_KEY:
10749                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10750                                                  level);
10751                 err |= ret;
10752                 break;
10753         case BTRFS_EXTENT_DATA_REF_KEY:
10754                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10755                 ret = check_extent_data_backref(fs_info,
10756                                 btrfs_extent_data_ref_root(eb, dref),
10757                                 btrfs_extent_data_ref_objectid(eb, dref),
10758                                 btrfs_extent_data_ref_offset(eb, dref),
10759                                 key.objectid, key.offset,
10760                                 btrfs_extent_data_ref_count(eb, dref));
10761                 err |= ret;
10762                 break;
10763         case BTRFS_SHARED_DATA_REF_KEY:
10764                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10765                 err |= ret;
10766                 break;
10767         default:
10768                 error("extent[%llu %d %llu] has unknown ref type: %d",
10769                         key.objectid, key.type, key.offset, type);
10770                 err |= UNKNOWN_TYPE;
10771                 goto out;
10772         }
10773
10774         ptr += btrfs_extent_inline_ref_size(type);
10775         if (ptr < end)
10776                 goto next;
10777
10778 out:
10779         return err;
10780 }
10781
10782 /*
10783  * Check if a dev extent item is referred correctly by its chunk
10784  */
10785 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10786                                  struct extent_buffer *eb, int slot)
10787 {
10788         struct btrfs_root *chunk_root = fs_info->chunk_root;
10789         struct btrfs_dev_extent *ptr;
10790         struct btrfs_path path;
10791         struct btrfs_key chunk_key;
10792         struct btrfs_key devext_key;
10793         struct btrfs_chunk *chunk;
10794         struct extent_buffer *l;
10795         int num_stripes;
10796         u64 length;
10797         int i;
10798         int found_chunk = 0;
10799         int ret;
10800
10801         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10802         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10803         length = btrfs_dev_extent_length(eb, ptr);
10804
10805         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10806         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10807         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10808
10809         btrfs_init_path(&path);
10810         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10811         if (ret)
10812                 goto out;
10813
10814         l = path.nodes[0];
10815         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10816         if (btrfs_chunk_length(l, chunk) != length)
10817                 goto out;
10818
10819         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10820         for (i = 0; i < num_stripes; i++) {
10821                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10822                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10823
10824                 if (devid == devext_key.objectid &&
10825                     offset == devext_key.offset) {
10826                         found_chunk = 1;
10827                         break;
10828                 }
10829         }
10830 out:
10831         btrfs_release_path(&path);
10832         if (!found_chunk) {
10833                 error(
10834                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10835                         devext_key.objectid, devext_key.offset, length);
10836                 return REFERENCER_MISSING;
10837         }
10838         return 0;
10839 }
10840
10841 /*
10842  * Check if the used space is correct with the dev item
10843  */
10844 static int check_dev_item(struct btrfs_fs_info *fs_info,
10845                           struct extent_buffer *eb, int slot)
10846 {
10847         struct btrfs_root *dev_root = fs_info->dev_root;
10848         struct btrfs_dev_item *dev_item;
10849         struct btrfs_path path;
10850         struct btrfs_key key;
10851         struct btrfs_dev_extent *ptr;
10852         u64 dev_id;
10853         u64 used;
10854         u64 total = 0;
10855         int ret;
10856
10857         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10858         dev_id = btrfs_device_id(eb, dev_item);
10859         used = btrfs_device_bytes_used(eb, dev_item);
10860
10861         key.objectid = dev_id;
10862         key.type = BTRFS_DEV_EXTENT_KEY;
10863         key.offset = 0;
10864
10865         btrfs_init_path(&path);
10866         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10867         if (ret < 0) {
10868                 btrfs_item_key_to_cpu(eb, &key, slot);
10869                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10870                         key.objectid, key.type, key.offset);
10871                 btrfs_release_path(&path);
10872                 return REFERENCER_MISSING;
10873         }
10874
10875         /* Iterate dev_extents to calculate the used space of a device */
10876         while (1) {
10877                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10878
10879                 if (key.objectid > dev_id)
10880                         break;
10881                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10882                         goto next;
10883
10884                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10885                                      struct btrfs_dev_extent);
10886                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10887 next:
10888                 ret = btrfs_next_item(dev_root, &path);
10889                 if (ret)
10890                         break;
10891         }
10892         btrfs_release_path(&path);
10893
10894         if (used != total) {
10895                 btrfs_item_key_to_cpu(eb, &key, slot);
10896                 error(
10897 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10898                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10899                         BTRFS_DEV_EXTENT_KEY, dev_id);
10900                 return ACCOUNTING_MISMATCH;
10901         }
10902         return 0;
10903 }
10904
10905 /*
10906  * Check a block group item with its referener (chunk) and its used space
10907  * with extent/metadata item
10908  */
10909 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10910                                   struct extent_buffer *eb, int slot)
10911 {
10912         struct btrfs_root *extent_root = fs_info->extent_root;
10913         struct btrfs_root *chunk_root = fs_info->chunk_root;
10914         struct btrfs_block_group_item *bi;
10915         struct btrfs_block_group_item bg_item;
10916         struct btrfs_path path;
10917         struct btrfs_key bg_key;
10918         struct btrfs_key chunk_key;
10919         struct btrfs_key extent_key;
10920         struct btrfs_chunk *chunk;
10921         struct extent_buffer *leaf;
10922         struct btrfs_extent_item *ei;
10923         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10924         u64 flags;
10925         u64 bg_flags;
10926         u64 used;
10927         u64 total = 0;
10928         int ret;
10929         int err = 0;
10930
10931         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10932         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10933         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10934         used = btrfs_block_group_used(&bg_item);
10935         bg_flags = btrfs_block_group_flags(&bg_item);
10936
10937         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10938         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10939         chunk_key.offset = bg_key.objectid;
10940
10941         btrfs_init_path(&path);
10942         /* Search for the referencer chunk */
10943         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10944         if (ret) {
10945                 error(
10946                 "block group[%llu %llu] did not find the related chunk item",
10947                         bg_key.objectid, bg_key.offset);
10948                 err |= REFERENCER_MISSING;
10949         } else {
10950                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10951                                         struct btrfs_chunk);
10952                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10953                                                 bg_key.offset) {
10954                         error(
10955         "block group[%llu %llu] related chunk item length does not match",
10956                                 bg_key.objectid, bg_key.offset);
10957                         err |= REFERENCER_MISMATCH;
10958                 }
10959         }
10960         btrfs_release_path(&path);
10961
10962         /* Search from the block group bytenr */
10963         extent_key.objectid = bg_key.objectid;
10964         extent_key.type = 0;
10965         extent_key.offset = 0;
10966
10967         btrfs_init_path(&path);
10968         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10969         if (ret < 0)
10970                 goto out;
10971
10972         /* Iterate extent tree to account used space */
10973         while (1) {
10974                 leaf = path.nodes[0];
10975                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10976                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10977                         break;
10978
10979                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10980                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10981                         goto next;
10982                 if (extent_key.objectid < bg_key.objectid)
10983                         goto next;
10984
10985                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10986                         total += nodesize;
10987                 else
10988                         total += extent_key.offset;
10989
10990                 ei = btrfs_item_ptr(leaf, path.slots[0],
10991                                     struct btrfs_extent_item);
10992                 flags = btrfs_extent_flags(leaf, ei);
10993                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10994                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10995                                 error(
10996                         "bad extent[%llu, %llu) type mismatch with chunk",
10997                                         extent_key.objectid,
10998                                         extent_key.objectid + extent_key.offset);
10999                                 err |= CHUNK_TYPE_MISMATCH;
11000                         }
11001                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11002                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11003                                     BTRFS_BLOCK_GROUP_METADATA))) {
11004                                 error(
11005                         "bad extent[%llu, %llu) type mismatch with chunk",
11006                                         extent_key.objectid,
11007                                         extent_key.objectid + nodesize);
11008                                 err |= CHUNK_TYPE_MISMATCH;
11009                         }
11010                 }
11011 next:
11012                 ret = btrfs_next_item(extent_root, &path);
11013                 if (ret)
11014                         break;
11015         }
11016
11017 out:
11018         btrfs_release_path(&path);
11019
11020         if (total != used) {
11021                 error(
11022                 "block group[%llu %llu] used %llu but extent items used %llu",
11023                         bg_key.objectid, bg_key.offset, used, total);
11024                 err |= ACCOUNTING_MISMATCH;
11025         }
11026         return err;
11027 }
11028
11029 /*
11030  * Check a chunk item.
11031  * Including checking all referred dev_extents and block group
11032  */
11033 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11034                             struct extent_buffer *eb, int slot)
11035 {
11036         struct btrfs_root *extent_root = fs_info->extent_root;
11037         struct btrfs_root *dev_root = fs_info->dev_root;
11038         struct btrfs_path path;
11039         struct btrfs_key chunk_key;
11040         struct btrfs_key bg_key;
11041         struct btrfs_key devext_key;
11042         struct btrfs_chunk *chunk;
11043         struct extent_buffer *leaf;
11044         struct btrfs_block_group_item *bi;
11045         struct btrfs_block_group_item bg_item;
11046         struct btrfs_dev_extent *ptr;
11047         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11048         u64 length;
11049         u64 chunk_end;
11050         u64 type;
11051         u64 profile;
11052         int num_stripes;
11053         u64 offset;
11054         u64 objectid;
11055         int i;
11056         int ret;
11057         int err = 0;
11058
11059         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11060         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11061         length = btrfs_chunk_length(eb, chunk);
11062         chunk_end = chunk_key.offset + length;
11063         if (!IS_ALIGNED(length, sectorsize)) {
11064                 error("chunk[%llu %llu) not aligned to %u",
11065                         chunk_key.offset, chunk_end, sectorsize);
11066                 err |= BYTES_UNALIGNED;
11067                 goto out;
11068         }
11069
11070         type = btrfs_chunk_type(eb, chunk);
11071         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11072         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11073                 error("chunk[%llu %llu) has no chunk type",
11074                         chunk_key.offset, chunk_end);
11075                 err |= UNKNOWN_TYPE;
11076         }
11077         if (profile && (profile & (profile - 1))) {
11078                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11079                         chunk_key.offset, chunk_end, profile);
11080                 err |= UNKNOWN_TYPE;
11081         }
11082
11083         bg_key.objectid = chunk_key.offset;
11084         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11085         bg_key.offset = length;
11086
11087         btrfs_init_path(&path);
11088         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11089         if (ret) {
11090                 error(
11091                 "chunk[%llu %llu) did not find the related block group item",
11092                         chunk_key.offset, chunk_end);
11093                 err |= REFERENCER_MISSING;
11094         } else{
11095                 leaf = path.nodes[0];
11096                 bi = btrfs_item_ptr(leaf, path.slots[0],
11097                                     struct btrfs_block_group_item);
11098                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11099                                    sizeof(bg_item));
11100                 if (btrfs_block_group_flags(&bg_item) != type) {
11101                         error(
11102 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11103                                 chunk_key.offset, chunk_end, type,
11104                                 btrfs_block_group_flags(&bg_item));
11105                         err |= REFERENCER_MISSING;
11106                 }
11107         }
11108
11109         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11110         for (i = 0; i < num_stripes; i++) {
11111                 btrfs_release_path(&path);
11112                 btrfs_init_path(&path);
11113                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11114                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11115                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11116
11117                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11118                                         0, 0);
11119                 if (ret)
11120                         goto not_match_dev;
11121
11122                 leaf = path.nodes[0];
11123                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11124                                      struct btrfs_dev_extent);
11125                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11126                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11127                 if (objectid != chunk_key.objectid ||
11128                     offset != chunk_key.offset ||
11129                     btrfs_dev_extent_length(leaf, ptr) != length)
11130                         goto not_match_dev;
11131                 continue;
11132 not_match_dev:
11133                 err |= BACKREF_MISSING;
11134                 error(
11135                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11136                         chunk_key.objectid, chunk_end, i);
11137                 continue;
11138         }
11139         btrfs_release_path(&path);
11140 out:
11141         return err;
11142 }
11143
11144 /*
11145  * Main entry function to check known items and update related accounting info
11146  */
11147 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11148 {
11149         struct btrfs_fs_info *fs_info = root->fs_info;
11150         struct btrfs_key key;
11151         int slot = 0;
11152         int type;
11153         struct btrfs_extent_data_ref *dref;
11154         int ret;
11155         int err = 0;
11156
11157 next:
11158         btrfs_item_key_to_cpu(eb, &key, slot);
11159         type = key.type;
11160
11161         switch (type) {
11162         case BTRFS_EXTENT_DATA_KEY:
11163                 ret = check_extent_data_item(root, eb, slot);
11164                 err |= ret;
11165                 break;
11166         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11167                 ret = check_block_group_item(fs_info, eb, slot);
11168                 err |= ret;
11169                 break;
11170         case BTRFS_DEV_ITEM_KEY:
11171                 ret = check_dev_item(fs_info, eb, slot);
11172                 err |= ret;
11173                 break;
11174         case BTRFS_CHUNK_ITEM_KEY:
11175                 ret = check_chunk_item(fs_info, eb, slot);
11176                 err |= ret;
11177                 break;
11178         case BTRFS_DEV_EXTENT_KEY:
11179                 ret = check_dev_extent_item(fs_info, eb, slot);
11180                 err |= ret;
11181                 break;
11182         case BTRFS_EXTENT_ITEM_KEY:
11183         case BTRFS_METADATA_ITEM_KEY:
11184                 ret = check_extent_item(fs_info, eb, slot);
11185                 err |= ret;
11186                 break;
11187         case BTRFS_EXTENT_CSUM_KEY:
11188                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11189                 break;
11190         case BTRFS_TREE_BLOCK_REF_KEY:
11191                 ret = check_tree_block_backref(fs_info, key.offset,
11192                                                key.objectid, -1);
11193                 err |= ret;
11194                 break;
11195         case BTRFS_EXTENT_DATA_REF_KEY:
11196                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11197                 ret = check_extent_data_backref(fs_info,
11198                                 btrfs_extent_data_ref_root(eb, dref),
11199                                 btrfs_extent_data_ref_objectid(eb, dref),
11200                                 btrfs_extent_data_ref_offset(eb, dref),
11201                                 key.objectid, 0,
11202                                 btrfs_extent_data_ref_count(eb, dref));
11203                 err |= ret;
11204                 break;
11205         case BTRFS_SHARED_BLOCK_REF_KEY:
11206                 ret = check_shared_block_backref(fs_info, key.offset,
11207                                                  key.objectid, -1);
11208                 err |= ret;
11209                 break;
11210         case BTRFS_SHARED_DATA_REF_KEY:
11211                 ret = check_shared_data_backref(fs_info, key.offset,
11212                                                 key.objectid);
11213                 err |= ret;
11214                 break;
11215         default:
11216                 break;
11217         }
11218
11219         if (++slot < btrfs_header_nritems(eb))
11220                 goto next;
11221
11222         return err;
11223 }
11224
11225 /*
11226  * Helper function for later fs/subvol tree check.  To determine if a tree
11227  * block should be checked.
11228  * This function will ensure only the direct referencer with lowest rootid to
11229  * check a fs/subvolume tree block.
11230  *
11231  * Backref check at extent tree would detect errors like missing subvolume
11232  * tree, so we can do aggressive check to reduce duplicated checks.
11233  */
11234 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11235 {
11236         struct btrfs_root *extent_root = root->fs_info->extent_root;
11237         struct btrfs_key key;
11238         struct btrfs_path path;
11239         struct extent_buffer *leaf;
11240         int slot;
11241         struct btrfs_extent_item *ei;
11242         unsigned long ptr;
11243         unsigned long end;
11244         int type;
11245         u32 item_size;
11246         u64 offset;
11247         struct btrfs_extent_inline_ref *iref;
11248         int ret;
11249
11250         btrfs_init_path(&path);
11251         key.objectid = btrfs_header_bytenr(eb);
11252         key.type = BTRFS_METADATA_ITEM_KEY;
11253         key.offset = (u64)-1;
11254
11255         /*
11256          * Any failure in backref resolving means we can't determine
11257          * whom the tree block belongs to.
11258          * So in that case, we need to check that tree block
11259          */
11260         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11261         if (ret < 0)
11262                 goto need_check;
11263
11264         ret = btrfs_previous_extent_item(extent_root, &path,
11265                                          btrfs_header_bytenr(eb));
11266         if (ret)
11267                 goto need_check;
11268
11269         leaf = path.nodes[0];
11270         slot = path.slots[0];
11271         btrfs_item_key_to_cpu(leaf, &key, slot);
11272         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11273
11274         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11275                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11276         } else {
11277                 struct btrfs_tree_block_info *info;
11278
11279                 info = (struct btrfs_tree_block_info *)(ei + 1);
11280                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11281         }
11282
11283         item_size = btrfs_item_size_nr(leaf, slot);
11284         ptr = (unsigned long)iref;
11285         end = (unsigned long)ei + item_size;
11286         while (ptr < end) {
11287                 iref = (struct btrfs_extent_inline_ref *)ptr;
11288                 type = btrfs_extent_inline_ref_type(leaf, iref);
11289                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11290
11291                 /*
11292                  * We only check the tree block if current root is
11293                  * the lowest referencer of it.
11294                  */
11295                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11296                     offset < root->objectid) {
11297                         btrfs_release_path(&path);
11298                         return 0;
11299                 }
11300
11301                 ptr += btrfs_extent_inline_ref_size(type);
11302         }
11303         /*
11304          * Normally we should also check keyed tree block ref, but that may be
11305          * very time consuming.  Inlined ref should already make us skip a lot
11306          * of refs now.  So skip search keyed tree block ref.
11307          */
11308
11309 need_check:
11310         btrfs_release_path(&path);
11311         return 1;
11312 }
11313
11314 /*
11315  * Traversal function for tree block. We will do:
11316  * 1) Skip shared fs/subvolume tree blocks
11317  * 2) Update related bytes accounting
11318  * 3) Pre-order traversal
11319  */
11320 static int traverse_tree_block(struct btrfs_root *root,
11321                                 struct extent_buffer *node)
11322 {
11323         struct extent_buffer *eb;
11324         struct btrfs_key key;
11325         struct btrfs_key drop_key;
11326         int level;
11327         u64 nr;
11328         int i;
11329         int err = 0;
11330         int ret;
11331
11332         /*
11333          * Skip shared fs/subvolume tree block, in that case they will
11334          * be checked by referencer with lowest rootid
11335          */
11336         if (is_fstree(root->objectid) && !should_check(root, node))
11337                 return 0;
11338
11339         /* Update bytes accounting */
11340         total_btree_bytes += node->len;
11341         if (fs_root_objectid(btrfs_header_owner(node)))
11342                 total_fs_tree_bytes += node->len;
11343         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11344                 total_extent_tree_bytes += node->len;
11345         if (!found_old_backref &&
11346             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11347             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11348             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11349                 found_old_backref = 1;
11350
11351         /* pre-order tranversal, check itself first */
11352         level = btrfs_header_level(node);
11353         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11354                                    btrfs_header_level(node),
11355                                    btrfs_header_owner(node));
11356         err |= ret;
11357         if (err)
11358                 error(
11359         "check %s failed root %llu bytenr %llu level %d, force continue check",
11360                         level ? "node":"leaf", root->objectid,
11361                         btrfs_header_bytenr(node), btrfs_header_level(node));
11362
11363         if (!level) {
11364                 btree_space_waste += btrfs_leaf_free_space(root, node);
11365                 ret = check_leaf_items(root, node);
11366                 err |= ret;
11367                 return err;
11368         }
11369
11370         nr = btrfs_header_nritems(node);
11371         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11372         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11373                 sizeof(struct btrfs_key_ptr);
11374
11375         /* Then check all its children */
11376         for (i = 0; i < nr; i++) {
11377                 u64 blocknr = btrfs_node_blockptr(node, i);
11378
11379                 btrfs_node_key_to_cpu(node, &key, i);
11380                 if (level == root->root_item.drop_level &&
11381                     is_dropped_key(&key, &drop_key))
11382                         continue;
11383
11384                 /*
11385                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11386                  * to call the function itself.
11387                  */
11388                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11389                 if (extent_buffer_uptodate(eb)) {
11390                         ret = traverse_tree_block(root, eb);
11391                         err |= ret;
11392                 }
11393                 free_extent_buffer(eb);
11394         }
11395
11396         return err;
11397 }
11398
11399 /*
11400  * Low memory usage version check_chunks_and_extents.
11401  */
11402 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11403 {
11404         struct btrfs_path path;
11405         struct btrfs_key key;
11406         struct btrfs_root *root1;
11407         struct btrfs_root *cur_root;
11408         int err = 0;
11409         int ret;
11410
11411         root1 = root->fs_info->chunk_root;
11412         ret = traverse_tree_block(root1, root1->node);
11413         err |= ret;
11414
11415         root1 = root->fs_info->tree_root;
11416         ret = traverse_tree_block(root1, root1->node);
11417         err |= ret;
11418
11419         btrfs_init_path(&path);
11420         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11421         key.offset = 0;
11422         key.type = BTRFS_ROOT_ITEM_KEY;
11423
11424         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11425         if (ret) {
11426                 error("cannot find extent treet in tree_root");
11427                 goto out;
11428         }
11429
11430         while (1) {
11431                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11432                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11433                         goto next;
11434                 key.offset = (u64)-1;
11435
11436                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11437                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11438                                         &key);
11439                 else
11440                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11441                 if (IS_ERR(cur_root) || !cur_root) {
11442                         error("failed to read tree: %lld", key.objectid);
11443                         goto next;
11444                 }
11445
11446                 ret = traverse_tree_block(cur_root, cur_root->node);
11447                 err |= ret;
11448
11449                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11450                         btrfs_free_fs_root(cur_root);
11451 next:
11452                 ret = btrfs_next_item(root1, &path);
11453                 if (ret)
11454                         goto out;
11455         }
11456
11457 out:
11458         btrfs_release_path(&path);
11459         return err;
11460 }
11461
11462 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11463                            struct btrfs_root *root, int overwrite)
11464 {
11465         struct extent_buffer *c;
11466         struct extent_buffer *old = root->node;
11467         int level;
11468         int ret;
11469         struct btrfs_disk_key disk_key = {0,0,0};
11470
11471         level = 0;
11472
11473         if (overwrite) {
11474                 c = old;
11475                 extent_buffer_get(c);
11476                 goto init;
11477         }
11478         c = btrfs_alloc_free_block(trans, root,
11479                                    root->nodesize,
11480                                    root->root_key.objectid,
11481                                    &disk_key, level, 0, 0);
11482         if (IS_ERR(c)) {
11483                 c = old;
11484                 extent_buffer_get(c);
11485                 overwrite = 1;
11486         }
11487 init:
11488         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11489         btrfs_set_header_level(c, level);
11490         btrfs_set_header_bytenr(c, c->start);
11491         btrfs_set_header_generation(c, trans->transid);
11492         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11493         btrfs_set_header_owner(c, root->root_key.objectid);
11494
11495         write_extent_buffer(c, root->fs_info->fsid,
11496                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11497
11498         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11499                             btrfs_header_chunk_tree_uuid(c),
11500                             BTRFS_UUID_SIZE);
11501
11502         btrfs_mark_buffer_dirty(c);
11503         /*
11504          * this case can happen in the following case:
11505          *
11506          * 1.overwrite previous root.
11507          *
11508          * 2.reinit reloc data root, this is because we skip pin
11509          * down reloc data tree before which means we can allocate
11510          * same block bytenr here.
11511          */
11512         if (old->start == c->start) {
11513                 btrfs_set_root_generation(&root->root_item,
11514                                           trans->transid);
11515                 root->root_item.level = btrfs_header_level(root->node);
11516                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11517                                         &root->root_key, &root->root_item);
11518                 if (ret) {
11519                         free_extent_buffer(c);
11520                         return ret;
11521                 }
11522         }
11523         free_extent_buffer(old);
11524         root->node = c;
11525         add_root_to_dirty_list(root);
11526         return 0;
11527 }
11528
11529 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11530                                 struct extent_buffer *eb, int tree_root)
11531 {
11532         struct extent_buffer *tmp;
11533         struct btrfs_root_item *ri;
11534         struct btrfs_key key;
11535         u64 bytenr;
11536         u32 nodesize;
11537         int level = btrfs_header_level(eb);
11538         int nritems;
11539         int ret;
11540         int i;
11541
11542         /*
11543          * If we have pinned this block before, don't pin it again.
11544          * This can not only avoid forever loop with broken filesystem
11545          * but also give us some speedups.
11546          */
11547         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11548                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11549                 return 0;
11550
11551         btrfs_pin_extent(fs_info, eb->start, eb->len);
11552
11553         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11554         nritems = btrfs_header_nritems(eb);
11555         for (i = 0; i < nritems; i++) {
11556                 if (level == 0) {
11557                         btrfs_item_key_to_cpu(eb, &key, i);
11558                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11559                                 continue;
11560                         /* Skip the extent root and reloc roots */
11561                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11562                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11563                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11564                                 continue;
11565                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11566                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11567
11568                         /*
11569                          * If at any point we start needing the real root we
11570                          * will have to build a stump root for the root we are
11571                          * in, but for now this doesn't actually use the root so
11572                          * just pass in extent_root.
11573                          */
11574                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11575                                               nodesize, 0);
11576                         if (!extent_buffer_uptodate(tmp)) {
11577                                 fprintf(stderr, "Error reading root block\n");
11578                                 return -EIO;
11579                         }
11580                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11581                         free_extent_buffer(tmp);
11582                         if (ret)
11583                                 return ret;
11584                 } else {
11585                         bytenr = btrfs_node_blockptr(eb, i);
11586
11587                         /* If we aren't the tree root don't read the block */
11588                         if (level == 1 && !tree_root) {
11589                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11590                                 continue;
11591                         }
11592
11593                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11594                                               nodesize, 0);
11595                         if (!extent_buffer_uptodate(tmp)) {
11596                                 fprintf(stderr, "Error reading tree block\n");
11597                                 return -EIO;
11598                         }
11599                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11600                         free_extent_buffer(tmp);
11601                         if (ret)
11602                                 return ret;
11603                 }
11604         }
11605
11606         return 0;
11607 }
11608
11609 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11610 {
11611         int ret;
11612
11613         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11614         if (ret)
11615                 return ret;
11616
11617         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11618 }
11619
11620 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11621 {
11622         struct btrfs_block_group_cache *cache;
11623         struct btrfs_path path;
11624         struct extent_buffer *leaf;
11625         struct btrfs_chunk *chunk;
11626         struct btrfs_key key;
11627         int ret;
11628         u64 start;
11629
11630         btrfs_init_path(&path);
11631         key.objectid = 0;
11632         key.type = BTRFS_CHUNK_ITEM_KEY;
11633         key.offset = 0;
11634         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11635         if (ret < 0) {
11636                 btrfs_release_path(&path);
11637                 return ret;
11638         }
11639
11640         /*
11641          * We do this in case the block groups were screwed up and had alloc
11642          * bits that aren't actually set on the chunks.  This happens with
11643          * restored images every time and could happen in real life I guess.
11644          */
11645         fs_info->avail_data_alloc_bits = 0;
11646         fs_info->avail_metadata_alloc_bits = 0;
11647         fs_info->avail_system_alloc_bits = 0;
11648
11649         /* First we need to create the in-memory block groups */
11650         while (1) {
11651                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11652                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11653                         if (ret < 0) {
11654                                 btrfs_release_path(&path);
11655                                 return ret;
11656                         }
11657                         if (ret) {
11658                                 ret = 0;
11659                                 break;
11660                         }
11661                 }
11662                 leaf = path.nodes[0];
11663                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11664                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11665                         path.slots[0]++;
11666                         continue;
11667                 }
11668
11669                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11670                 btrfs_add_block_group(fs_info, 0,
11671                                       btrfs_chunk_type(leaf, chunk),
11672                                       key.objectid, key.offset,
11673                                       btrfs_chunk_length(leaf, chunk));
11674                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11675                                  key.offset + btrfs_chunk_length(leaf, chunk));
11676                 path.slots[0]++;
11677         }
11678         start = 0;
11679         while (1) {
11680                 cache = btrfs_lookup_first_block_group(fs_info, start);
11681                 if (!cache)
11682                         break;
11683                 cache->cached = 1;
11684                 start = cache->key.objectid + cache->key.offset;
11685         }
11686
11687         btrfs_release_path(&path);
11688         return 0;
11689 }
11690
11691 static int reset_balance(struct btrfs_trans_handle *trans,
11692                          struct btrfs_fs_info *fs_info)
11693 {
11694         struct btrfs_root *root = fs_info->tree_root;
11695         struct btrfs_path path;
11696         struct extent_buffer *leaf;
11697         struct btrfs_key key;
11698         int del_slot, del_nr = 0;
11699         int ret;
11700         int found = 0;
11701
11702         btrfs_init_path(&path);
11703         key.objectid = BTRFS_BALANCE_OBJECTID;
11704         key.type = BTRFS_BALANCE_ITEM_KEY;
11705         key.offset = 0;
11706         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11707         if (ret) {
11708                 if (ret > 0)
11709                         ret = 0;
11710                 if (!ret)
11711                         goto reinit_data_reloc;
11712                 else
11713                         goto out;
11714         }
11715
11716         ret = btrfs_del_item(trans, root, &path);
11717         if (ret)
11718                 goto out;
11719         btrfs_release_path(&path);
11720
11721         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11722         key.type = BTRFS_ROOT_ITEM_KEY;
11723         key.offset = 0;
11724         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11725         if (ret < 0)
11726                 goto out;
11727         while (1) {
11728                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11729                         if (!found)
11730                                 break;
11731
11732                         if (del_nr) {
11733                                 ret = btrfs_del_items(trans, root, &path,
11734                                                       del_slot, del_nr);
11735                                 del_nr = 0;
11736                                 if (ret)
11737                                         goto out;
11738                         }
11739                         key.offset++;
11740                         btrfs_release_path(&path);
11741
11742                         found = 0;
11743                         ret = btrfs_search_slot(trans, root, &key, &path,
11744                                                 -1, 1);
11745                         if (ret < 0)
11746                                 goto out;
11747                         continue;
11748                 }
11749                 found = 1;
11750                 leaf = path.nodes[0];
11751                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11752                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11753                         break;
11754                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11755                         path.slots[0]++;
11756                         continue;
11757                 }
11758                 if (!del_nr) {
11759                         del_slot = path.slots[0];
11760                         del_nr = 1;
11761                 } else {
11762                         del_nr++;
11763                 }
11764                 path.slots[0]++;
11765         }
11766
11767         if (del_nr) {
11768                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11769                 if (ret)
11770                         goto out;
11771         }
11772         btrfs_release_path(&path);
11773
11774 reinit_data_reloc:
11775         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11776         key.type = BTRFS_ROOT_ITEM_KEY;
11777         key.offset = (u64)-1;
11778         root = btrfs_read_fs_root(fs_info, &key);
11779         if (IS_ERR(root)) {
11780                 fprintf(stderr, "Error reading data reloc tree\n");
11781                 ret = PTR_ERR(root);
11782                 goto out;
11783         }
11784         record_root_in_trans(trans, root);
11785         ret = btrfs_fsck_reinit_root(trans, root, 0);
11786         if (ret)
11787                 goto out;
11788         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11789 out:
11790         btrfs_release_path(&path);
11791         return ret;
11792 }
11793
11794 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11795                               struct btrfs_fs_info *fs_info)
11796 {
11797         u64 start = 0;
11798         int ret;
11799
11800         /*
11801          * The only reason we don't do this is because right now we're just
11802          * walking the trees we find and pinning down their bytes, we don't look
11803          * at any of the leaves.  In order to do mixed groups we'd have to check
11804          * the leaves of any fs roots and pin down the bytes for any file
11805          * extents we find.  Not hard but why do it if we don't have to?
11806          */
11807         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11808                 fprintf(stderr, "We don't support re-initing the extent tree "
11809                         "for mixed block groups yet, please notify a btrfs "
11810                         "developer you want to do this so they can add this "
11811                         "functionality.\n");
11812                 return -EINVAL;
11813         }
11814
11815         /*
11816          * first we need to walk all of the trees except the extent tree and pin
11817          * down the bytes that are in use so we don't overwrite any existing
11818          * metadata.
11819          */
11820         ret = pin_metadata_blocks(fs_info);
11821         if (ret) {
11822                 fprintf(stderr, "error pinning down used bytes\n");
11823                 return ret;
11824         }
11825
11826         /*
11827          * Need to drop all the block groups since we're going to recreate all
11828          * of them again.
11829          */
11830         btrfs_free_block_groups(fs_info);
11831         ret = reset_block_groups(fs_info);
11832         if (ret) {
11833                 fprintf(stderr, "error resetting the block groups\n");
11834                 return ret;
11835         }
11836
11837         /* Ok we can allocate now, reinit the extent root */
11838         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11839         if (ret) {
11840                 fprintf(stderr, "extent root initialization failed\n");
11841                 /*
11842                  * When the transaction code is updated we should end the
11843                  * transaction, but for now progs only knows about commit so
11844                  * just return an error.
11845                  */
11846                 return ret;
11847         }
11848
11849         /*
11850          * Now we have all the in-memory block groups setup so we can make
11851          * allocations properly, and the metadata we care about is safe since we
11852          * pinned all of it above.
11853          */
11854         while (1) {
11855                 struct btrfs_block_group_cache *cache;
11856
11857                 cache = btrfs_lookup_first_block_group(fs_info, start);
11858                 if (!cache)
11859                         break;
11860                 start = cache->key.objectid + cache->key.offset;
11861                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11862                                         &cache->key, &cache->item,
11863                                         sizeof(cache->item));
11864                 if (ret) {
11865                         fprintf(stderr, "Error adding block group\n");
11866                         return ret;
11867                 }
11868                 btrfs_extent_post_op(trans, fs_info->extent_root);
11869         }
11870
11871         ret = reset_balance(trans, fs_info);
11872         if (ret)
11873                 fprintf(stderr, "error resetting the pending balance\n");
11874
11875         return ret;
11876 }
11877
11878 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11879 {
11880         struct btrfs_path path;
11881         struct btrfs_trans_handle *trans;
11882         struct btrfs_key key;
11883         int ret;
11884
11885         printf("Recowing metadata block %llu\n", eb->start);
11886         key.objectid = btrfs_header_owner(eb);
11887         key.type = BTRFS_ROOT_ITEM_KEY;
11888         key.offset = (u64)-1;
11889
11890         root = btrfs_read_fs_root(root->fs_info, &key);
11891         if (IS_ERR(root)) {
11892                 fprintf(stderr, "Couldn't find owner root %llu\n",
11893                         key.objectid);
11894                 return PTR_ERR(root);
11895         }
11896
11897         trans = btrfs_start_transaction(root, 1);
11898         if (IS_ERR(trans))
11899                 return PTR_ERR(trans);
11900
11901         btrfs_init_path(&path);
11902         path.lowest_level = btrfs_header_level(eb);
11903         if (path.lowest_level)
11904                 btrfs_node_key_to_cpu(eb, &key, 0);
11905         else
11906                 btrfs_item_key_to_cpu(eb, &key, 0);
11907
11908         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11909         btrfs_commit_transaction(trans, root);
11910         btrfs_release_path(&path);
11911         return ret;
11912 }
11913
11914 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11915 {
11916         struct btrfs_path path;
11917         struct btrfs_trans_handle *trans;
11918         struct btrfs_key key;
11919         int ret;
11920
11921         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11922                bad->key.type, bad->key.offset);
11923         key.objectid = bad->root_id;
11924         key.type = BTRFS_ROOT_ITEM_KEY;
11925         key.offset = (u64)-1;
11926
11927         root = btrfs_read_fs_root(root->fs_info, &key);
11928         if (IS_ERR(root)) {
11929                 fprintf(stderr, "Couldn't find owner root %llu\n",
11930                         key.objectid);
11931                 return PTR_ERR(root);
11932         }
11933
11934         trans = btrfs_start_transaction(root, 1);
11935         if (IS_ERR(trans))
11936                 return PTR_ERR(trans);
11937
11938         btrfs_init_path(&path);
11939         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11940         if (ret) {
11941                 if (ret > 0)
11942                         ret = 0;
11943                 goto out;
11944         }
11945         ret = btrfs_del_item(trans, root, &path);
11946 out:
11947         btrfs_commit_transaction(trans, root);
11948         btrfs_release_path(&path);
11949         return ret;
11950 }
11951
11952 static int zero_log_tree(struct btrfs_root *root)
11953 {
11954         struct btrfs_trans_handle *trans;
11955         int ret;
11956
11957         trans = btrfs_start_transaction(root, 1);
11958         if (IS_ERR(trans)) {
11959                 ret = PTR_ERR(trans);
11960                 return ret;
11961         }
11962         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11963         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11964         ret = btrfs_commit_transaction(trans, root);
11965         return ret;
11966 }
11967
11968 static int populate_csum(struct btrfs_trans_handle *trans,
11969                          struct btrfs_root *csum_root, char *buf, u64 start,
11970                          u64 len)
11971 {
11972         u64 offset = 0;
11973         u64 sectorsize;
11974         int ret = 0;
11975
11976         while (offset < len) {
11977                 sectorsize = csum_root->sectorsize;
11978                 ret = read_extent_data(csum_root, buf, start + offset,
11979                                        &sectorsize, 0);
11980                 if (ret)
11981                         break;
11982                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11983                                             start + offset, buf, sectorsize);
11984                 if (ret)
11985                         break;
11986                 offset += sectorsize;
11987         }
11988         return ret;
11989 }
11990
11991 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11992                                       struct btrfs_root *csum_root,
11993                                       struct btrfs_root *cur_root)
11994 {
11995         struct btrfs_path path;
11996         struct btrfs_key key;
11997         struct extent_buffer *node;
11998         struct btrfs_file_extent_item *fi;
11999         char *buf = NULL;
12000         u64 start = 0;
12001         u64 len = 0;
12002         int slot = 0;
12003         int ret = 0;
12004
12005         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12006         if (!buf)
12007                 return -ENOMEM;
12008
12009         btrfs_init_path(&path);
12010         key.objectid = 0;
12011         key.offset = 0;
12012         key.type = 0;
12013         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12014         if (ret < 0)
12015                 goto out;
12016         /* Iterate all regular file extents and fill its csum */
12017         while (1) {
12018                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12019
12020                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12021                         goto next;
12022                 node = path.nodes[0];
12023                 slot = path.slots[0];
12024                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12025                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12026                         goto next;
12027                 start = btrfs_file_extent_disk_bytenr(node, fi);
12028                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12029
12030                 ret = populate_csum(trans, csum_root, buf, start, len);
12031                 if (ret == -EEXIST)
12032                         ret = 0;
12033                 if (ret < 0)
12034                         goto out;
12035 next:
12036                 /*
12037                  * TODO: if next leaf is corrupted, jump to nearest next valid
12038                  * leaf.
12039                  */
12040                 ret = btrfs_next_item(cur_root, &path);
12041                 if (ret < 0)
12042                         goto out;
12043                 if (ret > 0) {
12044                         ret = 0;
12045                         goto out;
12046                 }
12047         }
12048
12049 out:
12050         btrfs_release_path(&path);
12051         free(buf);
12052         return ret;
12053 }
12054
12055 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12056                                   struct btrfs_root *csum_root)
12057 {
12058         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12059         struct btrfs_path path;
12060         struct btrfs_root *tree_root = fs_info->tree_root;
12061         struct btrfs_root *cur_root;
12062         struct extent_buffer *node;
12063         struct btrfs_key key;
12064         int slot = 0;
12065         int ret = 0;
12066
12067         btrfs_init_path(&path);
12068         key.objectid = BTRFS_FS_TREE_OBJECTID;
12069         key.offset = 0;
12070         key.type = BTRFS_ROOT_ITEM_KEY;
12071         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12072         if (ret < 0)
12073                 goto out;
12074         if (ret > 0) {
12075                 ret = -ENOENT;
12076                 goto out;
12077         }
12078
12079         while (1) {
12080                 node = path.nodes[0];
12081                 slot = path.slots[0];
12082                 btrfs_item_key_to_cpu(node, &key, slot);
12083                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12084                         goto out;
12085                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12086                         goto next;
12087                 if (!is_fstree(key.objectid))
12088                         goto next;
12089                 key.offset = (u64)-1;
12090
12091                 cur_root = btrfs_read_fs_root(fs_info, &key);
12092                 if (IS_ERR(cur_root) || !cur_root) {
12093                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12094                                 key.objectid);
12095                         goto out;
12096                 }
12097                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12098                                 cur_root);
12099                 if (ret < 0)
12100                         goto out;
12101 next:
12102                 ret = btrfs_next_item(tree_root, &path);
12103                 if (ret > 0) {
12104                         ret = 0;
12105                         goto out;
12106                 }
12107                 if (ret < 0)
12108                         goto out;
12109         }
12110
12111 out:
12112         btrfs_release_path(&path);
12113         return ret;
12114 }
12115
12116 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12117                                       struct btrfs_root *csum_root)
12118 {
12119         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12120         struct btrfs_path path;
12121         struct btrfs_extent_item *ei;
12122         struct extent_buffer *leaf;
12123         char *buf;
12124         struct btrfs_key key;
12125         int ret;
12126
12127         btrfs_init_path(&path);
12128         key.objectid = 0;
12129         key.type = BTRFS_EXTENT_ITEM_KEY;
12130         key.offset = 0;
12131         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12132         if (ret < 0) {
12133                 btrfs_release_path(&path);
12134                 return ret;
12135         }
12136
12137         buf = malloc(csum_root->sectorsize);
12138         if (!buf) {
12139                 btrfs_release_path(&path);
12140                 return -ENOMEM;
12141         }
12142
12143         while (1) {
12144                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12145                         ret = btrfs_next_leaf(extent_root, &path);
12146                         if (ret < 0)
12147                                 break;
12148                         if (ret) {
12149                                 ret = 0;
12150                                 break;
12151                         }
12152                 }
12153                 leaf = path.nodes[0];
12154
12155                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12156                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12157                         path.slots[0]++;
12158                         continue;
12159                 }
12160
12161                 ei = btrfs_item_ptr(leaf, path.slots[0],
12162                                     struct btrfs_extent_item);
12163                 if (!(btrfs_extent_flags(leaf, ei) &
12164                       BTRFS_EXTENT_FLAG_DATA)) {
12165                         path.slots[0]++;
12166                         continue;
12167                 }
12168
12169                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12170                                     key.offset);
12171                 if (ret)
12172                         break;
12173                 path.slots[0]++;
12174         }
12175
12176         btrfs_release_path(&path);
12177         free(buf);
12178         return ret;
12179 }
12180
12181 /*
12182  * Recalculate the csum and put it into the csum tree.
12183  *
12184  * Extent tree init will wipe out all the extent info, so in that case, we
12185  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12186  * will use fs/subvol trees to init the csum tree.
12187  */
12188 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12189                           struct btrfs_root *csum_root,
12190                           int search_fs_tree)
12191 {
12192         if (search_fs_tree)
12193                 return fill_csum_tree_from_fs(trans, csum_root);
12194         else
12195                 return fill_csum_tree_from_extent(trans, csum_root);
12196 }
12197
12198 static void free_roots_info_cache(void)
12199 {
12200         if (!roots_info_cache)
12201                 return;
12202
12203         while (!cache_tree_empty(roots_info_cache)) {
12204                 struct cache_extent *entry;
12205                 struct root_item_info *rii;
12206
12207                 entry = first_cache_extent(roots_info_cache);
12208                 if (!entry)
12209                         break;
12210                 remove_cache_extent(roots_info_cache, entry);
12211                 rii = container_of(entry, struct root_item_info, cache_extent);
12212                 free(rii);
12213         }
12214
12215         free(roots_info_cache);
12216         roots_info_cache = NULL;
12217 }
12218
12219 static int build_roots_info_cache(struct btrfs_fs_info *info)
12220 {
12221         int ret = 0;
12222         struct btrfs_key key;
12223         struct extent_buffer *leaf;
12224         struct btrfs_path path;
12225
12226         if (!roots_info_cache) {
12227                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12228                 if (!roots_info_cache)
12229                         return -ENOMEM;
12230                 cache_tree_init(roots_info_cache);
12231         }
12232
12233         btrfs_init_path(&path);
12234         key.objectid = 0;
12235         key.type = BTRFS_EXTENT_ITEM_KEY;
12236         key.offset = 0;
12237         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12238         if (ret < 0)
12239                 goto out;
12240         leaf = path.nodes[0];
12241
12242         while (1) {
12243                 struct btrfs_key found_key;
12244                 struct btrfs_extent_item *ei;
12245                 struct btrfs_extent_inline_ref *iref;
12246                 int slot = path.slots[0];
12247                 int type;
12248                 u64 flags;
12249                 u64 root_id;
12250                 u8 level;
12251                 struct cache_extent *entry;
12252                 struct root_item_info *rii;
12253
12254                 if (slot >= btrfs_header_nritems(leaf)) {
12255                         ret = btrfs_next_leaf(info->extent_root, &path);
12256                         if (ret < 0) {
12257                                 break;
12258                         } else if (ret) {
12259                                 ret = 0;
12260                                 break;
12261                         }
12262                         leaf = path.nodes[0];
12263                         slot = path.slots[0];
12264                 }
12265
12266                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12267
12268                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12269                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12270                         goto next;
12271
12272                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12273                 flags = btrfs_extent_flags(leaf, ei);
12274
12275                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12276                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12277                         goto next;
12278
12279                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12280                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12281                         level = found_key.offset;
12282                 } else {
12283                         struct btrfs_tree_block_info *binfo;
12284
12285                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12286                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12287                         level = btrfs_tree_block_level(leaf, binfo);
12288                 }
12289
12290                 /*
12291                  * For a root extent, it must be of the following type and the
12292                  * first (and only one) iref in the item.
12293                  */
12294                 type = btrfs_extent_inline_ref_type(leaf, iref);
12295                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12296                         goto next;
12297
12298                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12299                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12300                 if (!entry) {
12301                         rii = malloc(sizeof(struct root_item_info));
12302                         if (!rii) {
12303                                 ret = -ENOMEM;
12304                                 goto out;
12305                         }
12306                         rii->cache_extent.start = root_id;
12307                         rii->cache_extent.size = 1;
12308                         rii->level = (u8)-1;
12309                         entry = &rii->cache_extent;
12310                         ret = insert_cache_extent(roots_info_cache, entry);
12311                         ASSERT(ret == 0);
12312                 } else {
12313                         rii = container_of(entry, struct root_item_info,
12314                                            cache_extent);
12315                 }
12316
12317                 ASSERT(rii->cache_extent.start == root_id);
12318                 ASSERT(rii->cache_extent.size == 1);
12319
12320                 if (level > rii->level || rii->level == (u8)-1) {
12321                         rii->level = level;
12322                         rii->bytenr = found_key.objectid;
12323                         rii->gen = btrfs_extent_generation(leaf, ei);
12324                         rii->node_count = 1;
12325                 } else if (level == rii->level) {
12326                         rii->node_count++;
12327                 }
12328 next:
12329                 path.slots[0]++;
12330         }
12331
12332 out:
12333         btrfs_release_path(&path);
12334
12335         return ret;
12336 }
12337
12338 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12339                                   struct btrfs_path *path,
12340                                   const struct btrfs_key *root_key,
12341                                   const int read_only_mode)
12342 {
12343         const u64 root_id = root_key->objectid;
12344         struct cache_extent *entry;
12345         struct root_item_info *rii;
12346         struct btrfs_root_item ri;
12347         unsigned long offset;
12348
12349         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12350         if (!entry) {
12351                 fprintf(stderr,
12352                         "Error: could not find extent items for root %llu\n",
12353                         root_key->objectid);
12354                 return -ENOENT;
12355         }
12356
12357         rii = container_of(entry, struct root_item_info, cache_extent);
12358         ASSERT(rii->cache_extent.start == root_id);
12359         ASSERT(rii->cache_extent.size == 1);
12360
12361         if (rii->node_count != 1) {
12362                 fprintf(stderr,
12363                         "Error: could not find btree root extent for root %llu\n",
12364                         root_id);
12365                 return -ENOENT;
12366         }
12367
12368         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12369         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12370
12371         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12372             btrfs_root_level(&ri) != rii->level ||
12373             btrfs_root_generation(&ri) != rii->gen) {
12374
12375                 /*
12376                  * If we're in repair mode but our caller told us to not update
12377                  * the root item, i.e. just check if it needs to be updated, don't
12378                  * print this message, since the caller will call us again shortly
12379                  * for the same root item without read only mode (the caller will
12380                  * open a transaction first).
12381                  */
12382                 if (!(read_only_mode && repair))
12383                         fprintf(stderr,
12384                                 "%sroot item for root %llu,"
12385                                 " current bytenr %llu, current gen %llu, current level %u,"
12386                                 " new bytenr %llu, new gen %llu, new level %u\n",
12387                                 (read_only_mode ? "" : "fixing "),
12388                                 root_id,
12389                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12390                                 btrfs_root_level(&ri),
12391                                 rii->bytenr, rii->gen, rii->level);
12392
12393                 if (btrfs_root_generation(&ri) > rii->gen) {
12394                         fprintf(stderr,
12395                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12396                                 root_id, btrfs_root_generation(&ri), rii->gen);
12397                         return -EINVAL;
12398                 }
12399
12400                 if (!read_only_mode) {
12401                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12402                         btrfs_set_root_level(&ri, rii->level);
12403                         btrfs_set_root_generation(&ri, rii->gen);
12404                         write_extent_buffer(path->nodes[0], &ri,
12405                                             offset, sizeof(ri));
12406                 }
12407
12408                 return 1;
12409         }
12410
12411         return 0;
12412 }
12413
12414 /*
12415  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12416  * caused read-only snapshots to be corrupted if they were created at a moment
12417  * when the source subvolume/snapshot had orphan items. The issue was that the
12418  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12419  * node instead of the post orphan cleanup root node.
12420  * So this function, and its callees, just detects and fixes those cases. Even
12421  * though the regression was for read-only snapshots, this function applies to
12422  * any snapshot/subvolume root.
12423  * This must be run before any other repair code - not doing it so, makes other
12424  * repair code delete or modify backrefs in the extent tree for example, which
12425  * will result in an inconsistent fs after repairing the root items.
12426  */
12427 static int repair_root_items(struct btrfs_fs_info *info)
12428 {
12429         struct btrfs_path path;
12430         struct btrfs_key key;
12431         struct extent_buffer *leaf;
12432         struct btrfs_trans_handle *trans = NULL;
12433         int ret = 0;
12434         int bad_roots = 0;
12435         int need_trans = 0;
12436
12437         btrfs_init_path(&path);
12438
12439         ret = build_roots_info_cache(info);
12440         if (ret)
12441                 goto out;
12442
12443         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12444         key.type = BTRFS_ROOT_ITEM_KEY;
12445         key.offset = 0;
12446
12447 again:
12448         /*
12449          * Avoid opening and committing transactions if a leaf doesn't have
12450          * any root items that need to be fixed, so that we avoid rotating
12451          * backup roots unnecessarily.
12452          */
12453         if (need_trans) {
12454                 trans = btrfs_start_transaction(info->tree_root, 1);
12455                 if (IS_ERR(trans)) {
12456                         ret = PTR_ERR(trans);
12457                         goto out;
12458                 }
12459         }
12460
12461         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12462                                 0, trans ? 1 : 0);
12463         if (ret < 0)
12464                 goto out;
12465         leaf = path.nodes[0];
12466
12467         while (1) {
12468                 struct btrfs_key found_key;
12469
12470                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12471                         int no_more_keys = find_next_key(&path, &key);
12472
12473                         btrfs_release_path(&path);
12474                         if (trans) {
12475                                 ret = btrfs_commit_transaction(trans,
12476                                                                info->tree_root);
12477                                 trans = NULL;
12478                                 if (ret < 0)
12479                                         goto out;
12480                         }
12481                         need_trans = 0;
12482                         if (no_more_keys)
12483                                 break;
12484                         goto again;
12485                 }
12486
12487                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12488
12489                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12490                         goto next;
12491                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12492                         goto next;
12493
12494                 ret = maybe_repair_root_item(info, &path, &found_key,
12495                                              trans ? 0 : 1);
12496                 if (ret < 0)
12497                         goto out;
12498                 if (ret) {
12499                         if (!trans && repair) {
12500                                 need_trans = 1;
12501                                 key = found_key;
12502                                 btrfs_release_path(&path);
12503                                 goto again;
12504                         }
12505                         bad_roots++;
12506                 }
12507 next:
12508                 path.slots[0]++;
12509         }
12510         ret = 0;
12511 out:
12512         free_roots_info_cache();
12513         btrfs_release_path(&path);
12514         if (trans)
12515                 btrfs_commit_transaction(trans, info->tree_root);
12516         if (ret < 0)
12517                 return ret;
12518
12519         return bad_roots;
12520 }
12521
12522 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12523 {
12524         struct btrfs_trans_handle *trans;
12525         struct btrfs_block_group_cache *bg_cache;
12526         u64 current = 0;
12527         int ret = 0;
12528
12529         /* Clear all free space cache inodes and its extent data */
12530         while (1) {
12531                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12532                 if (!bg_cache)
12533                         break;
12534                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12535                 if (ret < 0)
12536                         return ret;
12537                 current = bg_cache->key.objectid + bg_cache->key.offset;
12538         }
12539
12540         /* Don't forget to set cache_generation to -1 */
12541         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12542         if (IS_ERR(trans)) {
12543                 error("failed to update super block cache generation");
12544                 return PTR_ERR(trans);
12545         }
12546         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12547         btrfs_commit_transaction(trans, fs_info->tree_root);
12548
12549         return ret;
12550 }
12551
12552 const char * const cmd_check_usage[] = {
12553         "btrfs check [options] <device>",
12554         "Check structural integrity of a filesystem (unmounted).",
12555         "Check structural integrity of an unmounted filesystem. Verify internal",
12556         "trees' consistency and item connectivity. In the repair mode try to",
12557         "fix the problems found. ",
12558         "WARNING: the repair mode is considered dangerous",
12559         "",
12560         "-s|--super <superblock>     use this superblock copy",
12561         "-b|--backup                 use the first valid backup root copy",
12562         "--repair                    try to repair the filesystem",
12563         "--readonly                  run in read-only mode (default)",
12564         "--init-csum-tree            create a new CRC tree",
12565         "--init-extent-tree          create a new extent tree",
12566         "--mode <MODE>               allows choice of memory/IO trade-offs",
12567         "                            where MODE is one of:",
12568         "                            original - read inodes and extents to memory (requires",
12569         "                                       more memory, does less IO)",
12570         "                            lowmem   - try to use less memory but read blocks again",
12571         "                                       when needed",
12572         "--check-data-csum           verify checksums of data blocks",
12573         "-Q|--qgroup-report          print a report on qgroup consistency",
12574         "-E|--subvol-extents <subvolid>",
12575         "                            print subvolume extents and sharing state",
12576         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12577         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12578         "-p|--progress               indicate progress",
12579         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12580         NULL
12581 };
12582
12583 int cmd_check(int argc, char **argv)
12584 {
12585         struct cache_tree root_cache;
12586         struct btrfs_root *root;
12587         struct btrfs_fs_info *info;
12588         u64 bytenr = 0;
12589         u64 subvolid = 0;
12590         u64 tree_root_bytenr = 0;
12591         u64 chunk_root_bytenr = 0;
12592         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12593         int ret;
12594         int err = 0;
12595         u64 num;
12596         int init_csum_tree = 0;
12597         int readonly = 0;
12598         int clear_space_cache = 0;
12599         int qgroup_report = 0;
12600         int qgroups_repaired = 0;
12601         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12602
12603         while(1) {
12604                 int c;
12605                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12606                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12607                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12608                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12609                 static const struct option long_options[] = {
12610                         { "super", required_argument, NULL, 's' },
12611                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12612                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12613                         { "init-csum-tree", no_argument, NULL,
12614                                 GETOPT_VAL_INIT_CSUM },
12615                         { "init-extent-tree", no_argument, NULL,
12616                                 GETOPT_VAL_INIT_EXTENT },
12617                         { "check-data-csum", no_argument, NULL,
12618                                 GETOPT_VAL_CHECK_CSUM },
12619                         { "backup", no_argument, NULL, 'b' },
12620                         { "subvol-extents", required_argument, NULL, 'E' },
12621                         { "qgroup-report", no_argument, NULL, 'Q' },
12622                         { "tree-root", required_argument, NULL, 'r' },
12623                         { "chunk-root", required_argument, NULL,
12624                                 GETOPT_VAL_CHUNK_TREE },
12625                         { "progress", no_argument, NULL, 'p' },
12626                         { "mode", required_argument, NULL,
12627                                 GETOPT_VAL_MODE },
12628                         { "clear-space-cache", required_argument, NULL,
12629                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12630                         { NULL, 0, NULL, 0}
12631                 };
12632
12633                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12634                 if (c < 0)
12635                         break;
12636                 switch(c) {
12637                         case 'a': /* ignored */ break;
12638                         case 'b':
12639                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12640                                 break;
12641                         case 's':
12642                                 num = arg_strtou64(optarg);
12643                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12644                                         error(
12645                                         "super mirror should be less than %d",
12646                                                 BTRFS_SUPER_MIRROR_MAX);
12647                                         exit(1);
12648                                 }
12649                                 bytenr = btrfs_sb_offset(((int)num));
12650                                 printf("using SB copy %llu, bytenr %llu\n", num,
12651                                        (unsigned long long)bytenr);
12652                                 break;
12653                         case 'Q':
12654                                 qgroup_report = 1;
12655                                 break;
12656                         case 'E':
12657                                 subvolid = arg_strtou64(optarg);
12658                                 break;
12659                         case 'r':
12660                                 tree_root_bytenr = arg_strtou64(optarg);
12661                                 break;
12662                         case GETOPT_VAL_CHUNK_TREE:
12663                                 chunk_root_bytenr = arg_strtou64(optarg);
12664                                 break;
12665                         case 'p':
12666                                 ctx.progress_enabled = true;
12667                                 break;
12668                         case '?':
12669                         case 'h':
12670                                 usage(cmd_check_usage);
12671                         case GETOPT_VAL_REPAIR:
12672                                 printf("enabling repair mode\n");
12673                                 repair = 1;
12674                                 ctree_flags |= OPEN_CTREE_WRITES;
12675                                 break;
12676                         case GETOPT_VAL_READONLY:
12677                                 readonly = 1;
12678                                 break;
12679                         case GETOPT_VAL_INIT_CSUM:
12680                                 printf("Creating a new CRC tree\n");
12681                                 init_csum_tree = 1;
12682                                 repair = 1;
12683                                 ctree_flags |= OPEN_CTREE_WRITES;
12684                                 break;
12685                         case GETOPT_VAL_INIT_EXTENT:
12686                                 init_extent_tree = 1;
12687                                 ctree_flags |= (OPEN_CTREE_WRITES |
12688                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12689                                 repair = 1;
12690                                 break;
12691                         case GETOPT_VAL_CHECK_CSUM:
12692                                 check_data_csum = 1;
12693                                 break;
12694                         case GETOPT_VAL_MODE:
12695                                 check_mode = parse_check_mode(optarg);
12696                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12697                                         error("unknown mode: %s", optarg);
12698                                         exit(1);
12699                                 }
12700                                 break;
12701                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12702                                 if (strcmp(optarg, "v1") == 0) {
12703                                         clear_space_cache = 1;
12704                                 } else if (strcmp(optarg, "v2") == 0) {
12705                                         clear_space_cache = 2;
12706                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12707                                 } else {
12708                                         error(
12709                 "invalid argument to --clear-space-cache, must be v1 or v2");
12710                                         exit(1);
12711                                 }
12712                                 ctree_flags |= OPEN_CTREE_WRITES;
12713                                 break;
12714                 }
12715         }
12716
12717         if (check_argc_exact(argc - optind, 1))
12718                 usage(cmd_check_usage);
12719
12720         if (ctx.progress_enabled) {
12721                 ctx.tp = TASK_NOTHING;
12722                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12723         }
12724
12725         /* This check is the only reason for --readonly to exist */
12726         if (readonly && repair) {
12727                 error("repair options are not compatible with --readonly");
12728                 exit(1);
12729         }
12730
12731         /*
12732          * Not supported yet
12733          */
12734         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12735                 error("low memory mode doesn't support repair yet");
12736                 exit(1);
12737         }
12738
12739         radix_tree_init();
12740         cache_tree_init(&root_cache);
12741
12742         if((ret = check_mounted(argv[optind])) < 0) {
12743                 error("could not check mount status: %s", strerror(-ret));
12744                 err |= !!ret;
12745                 goto err_out;
12746         } else if(ret) {
12747                 error("%s is currently mounted, aborting", argv[optind]);
12748                 ret = -EBUSY;
12749                 err |= !!ret;
12750                 goto err_out;
12751         }
12752
12753         /* only allow partial opening under repair mode */
12754         if (repair)
12755                 ctree_flags |= OPEN_CTREE_PARTIAL;
12756
12757         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12758                                   chunk_root_bytenr, ctree_flags);
12759         if (!info) {
12760                 error("cannot open file system");
12761                 ret = -EIO;
12762                 err |= !!ret;
12763                 goto err_out;
12764         }
12765
12766         global_info = info;
12767         root = info->fs_root;
12768         if (clear_space_cache == 1) {
12769                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12770                         error(
12771                 "free space cache v2 detected, use --clear-space-cache v2");
12772                         ret = 1;
12773                         goto close_out;
12774                 }
12775                 printf("Clearing free space cache\n");
12776                 ret = clear_free_space_cache(info);
12777                 if (ret) {
12778                         error("failed to clear free space cache");
12779                         ret = 1;
12780                 } else {
12781                         printf("Free space cache cleared\n");
12782                 }
12783                 goto close_out;
12784         } else if (clear_space_cache == 2) {
12785                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12786                         printf("no free space cache v2 to clear\n");
12787                         ret = 0;
12788                         goto close_out;
12789                 }
12790                 printf("Clear free space cache v2\n");
12791                 ret = btrfs_clear_free_space_tree(info);
12792                 if (ret) {
12793                         error("failed to clear free space cache v2: %d", ret);
12794                         ret = 1;
12795                 } else {
12796                         printf("free space cache v2 cleared\n");
12797                 }
12798                 goto close_out;
12799         }
12800
12801         /*
12802          * repair mode will force us to commit transaction which
12803          * will make us fail to load log tree when mounting.
12804          */
12805         if (repair && btrfs_super_log_root(info->super_copy)) {
12806                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12807                 if (!ret) {
12808                         ret = 1;
12809                         err |= !!ret;
12810                         goto close_out;
12811                 }
12812                 ret = zero_log_tree(root);
12813                 err |= !!ret;
12814                 if (ret) {
12815                         error("failed to zero log tree: %d", ret);
12816                         goto close_out;
12817                 }
12818         }
12819
12820         uuid_unparse(info->super_copy->fsid, uuidbuf);
12821         if (qgroup_report) {
12822                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12823                        uuidbuf);
12824                 ret = qgroup_verify_all(info);
12825                 err |= !!ret;
12826                 if (ret == 0)
12827                         report_qgroups(1);
12828                 goto close_out;
12829         }
12830         if (subvolid) {
12831                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12832                        subvolid, argv[optind], uuidbuf);
12833                 ret = print_extent_state(info, subvolid);
12834                 err |= !!ret;
12835                 goto close_out;
12836         }
12837         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12838
12839         if (!extent_buffer_uptodate(info->tree_root->node) ||
12840             !extent_buffer_uptodate(info->dev_root->node) ||
12841             !extent_buffer_uptodate(info->chunk_root->node)) {
12842                 error("critical roots corrupted, unable to check the filesystem");
12843                 err |= !!ret;
12844                 ret = -EIO;
12845                 goto close_out;
12846         }
12847
12848         if (init_extent_tree || init_csum_tree) {
12849                 struct btrfs_trans_handle *trans;
12850
12851                 trans = btrfs_start_transaction(info->extent_root, 0);
12852                 if (IS_ERR(trans)) {
12853                         error("error starting transaction");
12854                         ret = PTR_ERR(trans);
12855                         err |= !!ret;
12856                         goto close_out;
12857                 }
12858
12859                 if (init_extent_tree) {
12860                         printf("Creating a new extent tree\n");
12861                         ret = reinit_extent_tree(trans, info);
12862                         err |= !!ret;
12863                         if (ret)
12864                                 goto close_out;
12865                 }
12866
12867                 if (init_csum_tree) {
12868                         printf("Reinitialize checksum tree\n");
12869                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12870                         if (ret) {
12871                                 error("checksum tree initialization failed: %d",
12872                                                 ret);
12873                                 ret = -EIO;
12874                                 err |= !!ret;
12875                                 goto close_out;
12876                         }
12877
12878                         ret = fill_csum_tree(trans, info->csum_root,
12879                                              init_extent_tree);
12880                         err |= !!ret;
12881                         if (ret) {
12882                                 error("checksum tree refilling failed: %d", ret);
12883                                 return -EIO;
12884                         }
12885                 }
12886                 /*
12887                  * Ok now we commit and run the normal fsck, which will add
12888                  * extent entries for all of the items it finds.
12889                  */
12890                 ret = btrfs_commit_transaction(trans, info->extent_root);
12891                 err |= !!ret;
12892                 if (ret)
12893                         goto close_out;
12894         }
12895         if (!extent_buffer_uptodate(info->extent_root->node)) {
12896                 error("critical: extent_root, unable to check the filesystem");
12897                 ret = -EIO;
12898                 err |= !!ret;
12899                 goto close_out;
12900         }
12901         if (!extent_buffer_uptodate(info->csum_root->node)) {
12902                 error("critical: csum_root, unable to check the filesystem");
12903                 ret = -EIO;
12904                 err |= !!ret;
12905                 goto close_out;
12906         }
12907
12908         if (!ctx.progress_enabled)
12909                 fprintf(stderr, "checking extents\n");
12910         if (check_mode == CHECK_MODE_LOWMEM)
12911                 ret = check_chunks_and_extents_v2(root);
12912         else
12913                 ret = check_chunks_and_extents(root);
12914         err |= !!ret;
12915         if (ret)
12916                 error(
12917                 "errors found in extent allocation tree or chunk allocation");
12918
12919         ret = repair_root_items(info);
12920         err |= !!ret;
12921         if (ret < 0)
12922                 goto close_out;
12923         if (repair) {
12924                 fprintf(stderr, "Fixed %d roots.\n", ret);
12925                 ret = 0;
12926         } else if (ret > 0) {
12927                 fprintf(stderr,
12928                        "Found %d roots with an outdated root item.\n",
12929                        ret);
12930                 fprintf(stderr,
12931                         "Please run a filesystem check with the option --repair to fix them.\n");
12932                 ret = 1;
12933                 err |= !!ret;
12934                 goto close_out;
12935         }
12936
12937         if (!ctx.progress_enabled) {
12938                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12939                         fprintf(stderr, "checking free space tree\n");
12940                 else
12941                         fprintf(stderr, "checking free space cache\n");
12942         }
12943         ret = check_space_cache(root);
12944         err |= !!ret;
12945         if (ret)
12946                 goto out;
12947
12948         /*
12949          * We used to have to have these hole extents in between our real
12950          * extents so if we don't have this flag set we need to make sure there
12951          * are no gaps in the file extents for inodes, otherwise we can just
12952          * ignore it when this happens.
12953          */
12954         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12955         if (!ctx.progress_enabled)
12956                 fprintf(stderr, "checking fs roots\n");
12957         if (check_mode == CHECK_MODE_LOWMEM)
12958                 ret = check_fs_roots_v2(root->fs_info);
12959         else
12960                 ret = check_fs_roots(root, &root_cache);
12961         err |= !!ret;
12962         if (ret)
12963                 goto out;
12964
12965         fprintf(stderr, "checking csums\n");
12966         ret = check_csums(root);
12967         err |= !!ret;
12968         if (ret)
12969                 goto out;
12970
12971         fprintf(stderr, "checking root refs\n");
12972         /* For low memory mode, check_fs_roots_v2 handles root refs */
12973         if (check_mode != CHECK_MODE_LOWMEM) {
12974                 ret = check_root_refs(root, &root_cache);
12975                 err |= !!ret;
12976                 if (ret)
12977                         goto out;
12978         }
12979
12980         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12981                 struct extent_buffer *eb;
12982
12983                 eb = list_first_entry(&root->fs_info->recow_ebs,
12984                                       struct extent_buffer, recow);
12985                 list_del_init(&eb->recow);
12986                 ret = recow_extent_buffer(root, eb);
12987                 err |= !!ret;
12988                 if (ret)
12989                         break;
12990         }
12991
12992         while (!list_empty(&delete_items)) {
12993                 struct bad_item *bad;
12994
12995                 bad = list_first_entry(&delete_items, struct bad_item, list);
12996                 list_del_init(&bad->list);
12997                 if (repair) {
12998                         ret = delete_bad_item(root, bad);
12999                         err |= !!ret;
13000                 }
13001                 free(bad);
13002         }
13003
13004         if (info->quota_enabled) {
13005                 fprintf(stderr, "checking quota groups\n");
13006                 ret = qgroup_verify_all(info);
13007                 err |= !!ret;
13008                 if (ret)
13009                         goto out;
13010                 report_qgroups(0);
13011                 ret = repair_qgroups(info, &qgroups_repaired);
13012                 err |= !!ret;
13013                 if (err)
13014                         goto out;
13015                 ret = 0;
13016         }
13017
13018         if (!list_empty(&root->fs_info->recow_ebs)) {
13019                 error("transid errors in file system");
13020                 ret = 1;
13021                 err |= !!ret;
13022         }
13023 out:
13024         if (found_old_backref) { /*
13025                  * there was a disk format change when mixed
13026                  * backref was in testing tree. The old format
13027                  * existed about one week.
13028                  */
13029                 printf("\n * Found old mixed backref format. "
13030                        "The old format is not supported! *"
13031                        "\n * Please mount the FS in readonly mode, "
13032                        "backup data and re-format the FS. *\n\n");
13033                 err |= 1;
13034         }
13035         printf("found %llu bytes used err is %d\n",
13036                (unsigned long long)bytes_used, ret);
13037         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13038         printf("total tree bytes: %llu\n",
13039                (unsigned long long)total_btree_bytes);
13040         printf("total fs tree bytes: %llu\n",
13041                (unsigned long long)total_fs_tree_bytes);
13042         printf("total extent tree bytes: %llu\n",
13043                (unsigned long long)total_extent_tree_bytes);
13044         printf("btree space waste bytes: %llu\n",
13045                (unsigned long long)btree_space_waste);
13046         printf("file data blocks allocated: %llu\n referenced %llu\n",
13047                 (unsigned long long)data_bytes_allocated,
13048                 (unsigned long long)data_bytes_referenced);
13049
13050         free_qgroup_counts();
13051         free_root_recs_tree(&root_cache);
13052 close_out:
13053         close_ctree(root);
13054 err_out:
13055         if (ctx.progress_enabled)
13056                 task_deinit(ctx.info);
13057
13058         return err;
13059 }