btrfs-progs: remove unused argument from set_extent_dirty
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct btrfs_root *root,
1482                             struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (name_len <= BTRFS_NAME_LEN) {
1517                         len = name_len;
1518                         error = 0;
1519                 } else {
1520                         len = BTRFS_NAME_LEN;
1521                         error = REF_ERR_NAME_TOO_LONG;
1522                 }
1523                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524
1525                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1526                         add_inode_backref(inode_cache, location.objectid,
1527                                           key->objectid, key->offset, namebuf,
1528                                           len, filetype, key->type, error);
1529                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1530                         add_inode_backref(root_cache, location.objectid,
1531                                           key->objectid, key->offset,
1532                                           namebuf, len, filetype,
1533                                           key->type, error);
1534                 } else {
1535                         fprintf(stderr, "invalid location in dir item %u\n",
1536                                 location.type);
1537                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1538                                           key->objectid, key->offset, namebuf,
1539                                           len, filetype, key->type, error);
1540                 }
1541
1542                 len = sizeof(*di) + name_len + data_len;
1543                 di = (struct btrfs_dir_item *)((char *)di + len);
1544                 cur += len;
1545         }
1546         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1547                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1548
1549         return 0;
1550 }
1551
1552 static int process_inode_ref(struct extent_buffer *eb,
1553                              int slot, struct btrfs_key *key,
1554                              struct shared_node *active_node)
1555 {
1556         u32 total;
1557         u32 cur = 0;
1558         u32 len;
1559         u32 name_len;
1560         u64 index;
1561         int error;
1562         struct cache_tree *inode_cache;
1563         struct btrfs_inode_ref *ref;
1564         char namebuf[BTRFS_NAME_LEN];
1565
1566         inode_cache = &active_node->inode_cache;
1567
1568         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1569         total = btrfs_item_size_nr(eb, slot);
1570         while (cur < total) {
1571                 name_len = btrfs_inode_ref_name_len(eb, ref);
1572                 index = btrfs_inode_ref_index(eb, ref);
1573                 if (name_len <= BTRFS_NAME_LEN) {
1574                         len = name_len;
1575                         error = 0;
1576                 } else {
1577                         len = BTRFS_NAME_LEN;
1578                         error = REF_ERR_NAME_TOO_LONG;
1579                 }
1580                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1581                 add_inode_backref(inode_cache, key->objectid, key->offset,
1582                                   index, namebuf, len, 0, key->type, error);
1583
1584                 len = sizeof(*ref) + name_len;
1585                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1586                 cur += len;
1587         }
1588         return 0;
1589 }
1590
1591 static int process_inode_extref(struct extent_buffer *eb,
1592                                 int slot, struct btrfs_key *key,
1593                                 struct shared_node *active_node)
1594 {
1595         u32 total;
1596         u32 cur = 0;
1597         u32 len;
1598         u32 name_len;
1599         u64 index;
1600         u64 parent;
1601         int error;
1602         struct cache_tree *inode_cache;
1603         struct btrfs_inode_extref *extref;
1604         char namebuf[BTRFS_NAME_LEN];
1605
1606         inode_cache = &active_node->inode_cache;
1607
1608         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1609         total = btrfs_item_size_nr(eb, slot);
1610         while (cur < total) {
1611                 name_len = btrfs_inode_extref_name_len(eb, extref);
1612                 index = btrfs_inode_extref_index(eb, extref);
1613                 parent = btrfs_inode_extref_parent(eb, extref);
1614                 if (name_len <= BTRFS_NAME_LEN) {
1615                         len = name_len;
1616                         error = 0;
1617                 } else {
1618                         len = BTRFS_NAME_LEN;
1619                         error = REF_ERR_NAME_TOO_LONG;
1620                 }
1621                 read_extent_buffer(eb, namebuf,
1622                                    (unsigned long)(extref + 1), len);
1623                 add_inode_backref(inode_cache, key->objectid, parent,
1624                                   index, namebuf, len, 0, key->type, error);
1625
1626                 len = sizeof(*extref) + name_len;
1627                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1628                 cur += len;
1629         }
1630         return 0;
1631
1632 }
1633
1634 static int count_csum_range(struct btrfs_root *root, u64 start,
1635                             u64 len, u64 *found)
1636 {
1637         struct btrfs_key key;
1638         struct btrfs_path path;
1639         struct extent_buffer *leaf;
1640         int ret;
1641         size_t size;
1642         *found = 0;
1643         u64 csum_end;
1644         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645
1646         btrfs_init_path(&path);
1647
1648         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649         key.offset = start;
1650         key.type = BTRFS_EXTENT_CSUM_KEY;
1651
1652         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1653                                 &key, &path, 0, 0);
1654         if (ret < 0)
1655                 goto out;
1656         if (ret > 0 && path.slots[0] > 0) {
1657                 leaf = path.nodes[0];
1658                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1659                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1660                     key.type == BTRFS_EXTENT_CSUM_KEY)
1661                         path.slots[0]--;
1662         }
1663
1664         while (len > 0) {
1665                 leaf = path.nodes[0];
1666                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1667                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1668                         if (ret > 0)
1669                                 break;
1670                         else if (ret < 0)
1671                                 goto out;
1672                         leaf = path.nodes[0];
1673                 }
1674
1675                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1676                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1677                     key.type != BTRFS_EXTENT_CSUM_KEY)
1678                         break;
1679
1680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1681                 if (key.offset >= start + len)
1682                         break;
1683
1684                 if (key.offset > start)
1685                         start = key.offset;
1686
1687                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1688                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1689                 if (csum_end > start) {
1690                         size = min(csum_end - start, len);
1691                         len -= size;
1692                         start += size;
1693                         *found += size;
1694                 }
1695
1696                 path.slots[0]++;
1697         }
1698 out:
1699         btrfs_release_path(&path);
1700         if (ret < 0)
1701                 return ret;
1702         return 0;
1703 }
1704
1705 static int process_file_extent(struct btrfs_root *root,
1706                                 struct extent_buffer *eb,
1707                                 int slot, struct btrfs_key *key,
1708                                 struct shared_node *active_node)
1709 {
1710         struct inode_record *rec;
1711         struct btrfs_file_extent_item *fi;
1712         u64 num_bytes = 0;
1713         u64 disk_bytenr = 0;
1714         u64 extent_offset = 0;
1715         u64 mask = root->sectorsize - 1;
1716         int extent_type;
1717         int ret;
1718
1719         rec = active_node->current;
1720         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1721         rec->found_file_extent = 1;
1722
1723         if (rec->extent_start == (u64)-1) {
1724                 rec->extent_start = key->offset;
1725                 rec->extent_end = key->offset;
1726         }
1727
1728         if (rec->extent_end > key->offset)
1729                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1730         else if (rec->extent_end < key->offset) {
1731                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1732                                            key->offset - rec->extent_end);
1733                 if (ret < 0)
1734                         return ret;
1735         }
1736
1737         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1738         extent_type = btrfs_file_extent_type(eb, fi);
1739
1740         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1741                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742                 if (num_bytes == 0)
1743                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1744                 rec->found_size += num_bytes;
1745                 num_bytes = (num_bytes + mask) & ~mask;
1746         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1747                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1748                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1749                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1750                 extent_offset = btrfs_file_extent_offset(eb, fi);
1751                 if (num_bytes == 0 || (num_bytes & mask))
1752                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1753                 if (num_bytes + extent_offset >
1754                     btrfs_file_extent_ram_bytes(eb, fi))
1755                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1756                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1757                     (btrfs_file_extent_compression(eb, fi) ||
1758                      btrfs_file_extent_encryption(eb, fi) ||
1759                      btrfs_file_extent_other_encoding(eb, fi)))
1760                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1761                 if (disk_bytenr > 0)
1762                         rec->found_size += num_bytes;
1763         } else {
1764                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765         }
1766         rec->extent_end = key->offset + num_bytes;
1767
1768         /*
1769          * The data reloc tree will copy full extents into its inode and then
1770          * copy the corresponding csums.  Because the extent it copied could be
1771          * a preallocated extent that hasn't been written to yet there may be no
1772          * csums to copy, ergo we won't have csums for our file extent.  This is
1773          * ok so just don't bother checking csums if the inode belongs to the
1774          * data reloc tree.
1775          */
1776         if (disk_bytenr > 0 &&
1777             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778                 u64 found;
1779                 if (btrfs_file_extent_compression(eb, fi))
1780                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781                 else
1782                         disk_bytenr += extent_offset;
1783
1784                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1785                 if (ret < 0)
1786                         return ret;
1787                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788                         if (found > 0)
1789                                 rec->found_csum_item = 1;
1790                         if (found < num_bytes)
1791                                 rec->some_csum_missing = 1;
1792                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793                         if (found > 0)
1794                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1795                 }
1796         }
1797         return 0;
1798 }
1799
1800 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1801                             struct walk_control *wc)
1802 {
1803         struct btrfs_key key;
1804         u32 nritems;
1805         int i;
1806         int ret = 0;
1807         struct cache_tree *inode_cache;
1808         struct shared_node *active_node;
1809
1810         if (wc->root_level == wc->active_node &&
1811             btrfs_root_refs(&root->root_item) == 0)
1812                 return 0;
1813
1814         active_node = wc->nodes[wc->active_node];
1815         inode_cache = &active_node->inode_cache;
1816         nritems = btrfs_header_nritems(eb);
1817         for (i = 0; i < nritems; i++) {
1818                 btrfs_item_key_to_cpu(eb, &key, i);
1819
1820                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821                         continue;
1822                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1823                         continue;
1824
1825                 if (active_node->current == NULL ||
1826                     active_node->current->ino < key.objectid) {
1827                         if (active_node->current) {
1828                                 active_node->current->checked = 1;
1829                                 maybe_free_inode_rec(inode_cache,
1830                                                      active_node->current);
1831                         }
1832                         active_node->current = get_inode_rec(inode_cache,
1833                                                              key.objectid, 1);
1834                         BUG_ON(IS_ERR(active_node->current));
1835                 }
1836                 switch (key.type) {
1837                 case BTRFS_DIR_ITEM_KEY:
1838                 case BTRFS_DIR_INDEX_KEY:
1839                         ret = process_dir_item(root, eb, i, &key, active_node);
1840                         break;
1841                 case BTRFS_INODE_REF_KEY:
1842                         ret = process_inode_ref(eb, i, &key, active_node);
1843                         break;
1844                 case BTRFS_INODE_EXTREF_KEY:
1845                         ret = process_inode_extref(eb, i, &key, active_node);
1846                         break;
1847                 case BTRFS_INODE_ITEM_KEY:
1848                         ret = process_inode_item(eb, i, &key, active_node);
1849                         break;
1850                 case BTRFS_EXTENT_DATA_KEY:
1851                         ret = process_file_extent(root, eb, i, &key,
1852                                                   active_node);
1853                         break;
1854                 default:
1855                         break;
1856                 };
1857         }
1858         return ret;
1859 }
1860
1861 struct node_refs {
1862         u64 bytenr[BTRFS_MAX_LEVEL];
1863         u64 refs[BTRFS_MAX_LEVEL];
1864         int need_check[BTRFS_MAX_LEVEL];
1865 };
1866
1867 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1868                              struct node_refs *nrefs, u64 level);
1869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1870                             unsigned int ext_ref);
1871
1872 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1873                                struct node_refs *nrefs, int *level, int ext_ref)
1874 {
1875         struct extent_buffer *cur = path->nodes[0];
1876         struct btrfs_key key;
1877         u64 cur_bytenr;
1878         u32 nritems;
1879         u64 first_ino = 0;
1880         int root_level = btrfs_header_level(root->node);
1881         int i;
1882         int ret = 0; /* Final return value */
1883         int err = 0; /* Positive error bitmap */
1884
1885         cur_bytenr = cur->start;
1886
1887         /* skip to first inode item or the first inode number change */
1888         nritems = btrfs_header_nritems(cur);
1889         for (i = 0; i < nritems; i++) {
1890                 btrfs_item_key_to_cpu(cur, &key, i);
1891                 if (i == 0)
1892                         first_ino = key.objectid;
1893                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1894                     (first_ino && first_ino != key.objectid))
1895                         break;
1896         }
1897         if (i == nritems) {
1898                 path->slots[0] = nritems;
1899                 return 0;
1900         }
1901         path->slots[0] = i;
1902
1903 again:
1904         err |= check_inode_item(root, path, ext_ref);
1905
1906         if (err & LAST_ITEM)
1907                 goto out;
1908
1909         /* still have inode items in thie leaf */
1910         if (cur->start == cur_bytenr)
1911                 goto again;
1912
1913         /*
1914          * we have switched to another leaf, above nodes may
1915          * have changed, here walk down the path, if a node
1916          * or leaf is shared, check whether we can skip this
1917          * node or leaf.
1918          */
1919         for (i = root_level; i >= 0; i--) {
1920                 if (path->nodes[i]->start == nrefs->bytenr[i])
1921                         continue;
1922
1923                 ret = update_nodes_refs(root,
1924                                 path->nodes[i]->start,
1925                                 nrefs, i);
1926                 if (ret)
1927                         goto out;
1928
1929                 if (!nrefs->need_check[i]) {
1930                         *level += 1;
1931                         break;
1932                 }
1933         }
1934
1935         for (i = 0; i < *level; i++) {
1936                 free_extent_buffer(path->nodes[i]);
1937                 path->nodes[i] = NULL;
1938         }
1939 out:
1940         err &= ~LAST_ITEM;
1941         /*
1942          * Convert any error bitmap to -EIO, as we should avoid
1943          * mixing positive and negative return value to represent
1944          * error
1945          */
1946         if (err && !ret)
1947                 ret = -EIO;
1948         return ret;
1949 }
1950
1951 static void reada_walk_down(struct btrfs_root *root,
1952                             struct extent_buffer *node, int slot)
1953 {
1954         u64 bytenr;
1955         u64 ptr_gen;
1956         u32 nritems;
1957         u32 blocksize;
1958         int i;
1959         int level;
1960
1961         level = btrfs_header_level(node);
1962         if (level != 1)
1963                 return;
1964
1965         nritems = btrfs_header_nritems(node);
1966         blocksize = root->nodesize;
1967         for (i = slot; i < nritems; i++) {
1968                 bytenr = btrfs_node_blockptr(node, i);
1969                 ptr_gen = btrfs_node_ptr_generation(node, i);
1970                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1971         }
1972 }
1973
1974 /*
1975  * Check the child node/leaf by the following condition:
1976  * 1. the first item key of the node/leaf should be the same with the one
1977  *    in parent.
1978  * 2. block in parent node should match the child node/leaf.
1979  * 3. generation of parent node and child's header should be consistent.
1980  *
1981  * Or the child node/leaf pointed by the key in parent is not valid.
1982  *
1983  * We hope to check leaf owner too, but since subvol may share leaves,
1984  * which makes leaf owner check not so strong, key check should be
1985  * sufficient enough for that case.
1986  */
1987 static int check_child_node(struct btrfs_root *root,
1988                             struct extent_buffer *parent, int slot,
1989                             struct extent_buffer *child)
1990 {
1991         struct btrfs_key parent_key;
1992         struct btrfs_key child_key;
1993         int ret = 0;
1994
1995         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1996         if (btrfs_header_level(child) == 0)
1997                 btrfs_item_key_to_cpu(child, &child_key, 0);
1998         else
1999                 btrfs_node_key_to_cpu(child, &child_key, 0);
2000
2001         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002                 ret = -EINVAL;
2003                 fprintf(stderr,
2004                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2005                         parent_key.objectid, parent_key.type, parent_key.offset,
2006                         child_key.objectid, child_key.type, child_key.offset);
2007         }
2008         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2009                 ret = -EINVAL;
2010                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2011                         btrfs_node_blockptr(parent, slot),
2012                         btrfs_header_bytenr(child));
2013         }
2014         if (btrfs_node_ptr_generation(parent, slot) !=
2015             btrfs_header_generation(child)) {
2016                 ret = -EINVAL;
2017                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2018                         btrfs_header_generation(child),
2019                         btrfs_node_ptr_generation(parent, slot));
2020         }
2021         return ret;
2022 }
2023
2024 /*
2025  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2026  * in every fs or file tree check. Here we find its all root ids, and only check
2027  * it in the fs or file tree which has the smallest root id.
2028  */
2029 static int need_check(struct btrfs_root *root, struct ulist *roots)
2030 {
2031         struct rb_node *node;
2032         struct ulist_node *u;
2033
2034         if (roots->nnodes == 1)
2035                 return 1;
2036
2037         node = rb_first(&roots->root);
2038         u = rb_entry(node, struct ulist_node, rb_node);
2039         /*
2040          * current root id is not smallest, we skip it and let it be checked
2041          * in the fs or file tree who hash the smallest root id.
2042          */
2043         if (root->objectid != u->val)
2044                 return 0;
2045
2046         return 1;
2047 }
2048
2049 /*
2050  * for a tree node or leaf, we record its reference count, so later if we still
2051  * process this node or leaf, don't need to compute its reference count again.
2052  */
2053 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2054                              struct node_refs *nrefs, u64 level)
2055 {
2056         int check, ret;
2057         u64 refs;
2058         struct ulist *roots;
2059
2060         if (nrefs->bytenr[level] != bytenr) {
2061                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2062                                        level, 1, &refs, NULL);
2063                 if (ret < 0)
2064                         return ret;
2065
2066                 nrefs->bytenr[level] = bytenr;
2067                 nrefs->refs[level] = refs;
2068                 if (refs > 1) {
2069                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2070                                                    0, &roots);
2071                         if (ret)
2072                                 return -EIO;
2073
2074                         check = need_check(root, roots);
2075                         ulist_free(roots);
2076                         nrefs->need_check[level] = check;
2077                 } else {
2078                         nrefs->need_check[level] = 1;
2079                 }
2080         }
2081
2082         return 0;
2083 }
2084
2085 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2086                           struct walk_control *wc, int *level,
2087                           struct node_refs *nrefs)
2088 {
2089         enum btrfs_tree_block_status status;
2090         u64 bytenr;
2091         u64 ptr_gen;
2092         struct extent_buffer *next;
2093         struct extent_buffer *cur;
2094         u32 blocksize;
2095         int ret, err = 0;
2096         u64 refs;
2097
2098         WARN_ON(*level < 0);
2099         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2100
2101         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2102                 refs = nrefs->refs[*level];
2103                 ret = 0;
2104         } else {
2105                 ret = btrfs_lookup_extent_info(NULL, root,
2106                                        path->nodes[*level]->start,
2107                                        *level, 1, &refs, NULL);
2108                 if (ret < 0) {
2109                         err = ret;
2110                         goto out;
2111                 }
2112                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2113                 nrefs->refs[*level] = refs;
2114         }
2115
2116         if (refs > 1) {
2117                 ret = enter_shared_node(root, path->nodes[*level]->start,
2118                                         refs, wc, *level);
2119                 if (ret > 0) {
2120                         err = ret;
2121                         goto out;
2122                 }
2123         }
2124
2125         while (*level >= 0) {
2126                 WARN_ON(*level < 0);
2127                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2128                 cur = path->nodes[*level];
2129
2130                 if (btrfs_header_level(cur) != *level)
2131                         WARN_ON(1);
2132
2133                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134                         break;
2135                 if (*level == 0) {
2136                         ret = process_one_leaf(root, cur, wc);
2137                         if (ret < 0)
2138                                 err = ret;
2139                         break;
2140                 }
2141                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2142                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2143                 blocksize = root->nodesize;
2144
2145                 if (bytenr == nrefs->bytenr[*level - 1]) {
2146                         refs = nrefs->refs[*level - 1];
2147                 } else {
2148                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2149                                         *level - 1, 1, &refs, NULL);
2150                         if (ret < 0) {
2151                                 refs = 0;
2152                         } else {
2153                                 nrefs->bytenr[*level - 1] = bytenr;
2154                                 nrefs->refs[*level - 1] = refs;
2155                         }
2156                 }
2157
2158                 if (refs > 1) {
2159                         ret = enter_shared_node(root, bytenr, refs,
2160                                                 wc, *level - 1);
2161                         if (ret > 0) {
2162                                 path->slots[*level]++;
2163                                 continue;
2164                         }
2165                 }
2166
2167                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2168                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2169                         free_extent_buffer(next);
2170                         reada_walk_down(root, cur, path->slots[*level]);
2171                         next = read_tree_block(root, bytenr, blocksize,
2172                                                ptr_gen);
2173                         if (!extent_buffer_uptodate(next)) {
2174                                 struct btrfs_key node_key;
2175
2176                                 btrfs_node_key_to_cpu(path->nodes[*level],
2177                                                       &node_key,
2178                                                       path->slots[*level]);
2179                                 btrfs_add_corrupt_extent_record(root->fs_info,
2180                                                 &node_key,
2181                                                 path->nodes[*level]->start,
2182                                                 root->nodesize, *level);
2183                                 err = -EIO;
2184                                 goto out;
2185                         }
2186                 }
2187
2188                 ret = check_child_node(root, cur, path->slots[*level], next);
2189                 if (ret) {
2190                         err = ret;
2191                         goto out;
2192                 }
2193
2194                 if (btrfs_is_leaf(next))
2195                         status = btrfs_check_leaf(root, NULL, next);
2196                 else
2197                         status = btrfs_check_node(root, NULL, next);
2198                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2199                         free_extent_buffer(next);
2200                         err = -EIO;
2201                         goto out;
2202                 }
2203
2204                 *level = *level - 1;
2205                 free_extent_buffer(path->nodes[*level]);
2206                 path->nodes[*level] = next;
2207                 path->slots[*level] = 0;
2208         }
2209 out:
2210         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2211         return err;
2212 }
2213
2214 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2215                             unsigned int ext_ref);
2216
2217 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2218                              int *level, struct node_refs *nrefs, int ext_ref)
2219 {
2220         enum btrfs_tree_block_status status;
2221         u64 bytenr;
2222         u64 ptr_gen;
2223         struct extent_buffer *next;
2224         struct extent_buffer *cur;
2225         u32 blocksize;
2226         int ret;
2227
2228         WARN_ON(*level < 0);
2229         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2230
2231         ret = update_nodes_refs(root, path->nodes[*level]->start,
2232                                 nrefs, *level);
2233         if (ret < 0)
2234                 return ret;
2235
2236         while (*level >= 0) {
2237                 WARN_ON(*level < 0);
2238                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2239                 cur = path->nodes[*level];
2240
2241                 if (btrfs_header_level(cur) != *level)
2242                         WARN_ON(1);
2243
2244                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2245                         break;
2246                 /* Don't forgot to check leaf/node validation */
2247                 if (*level == 0) {
2248                         ret = btrfs_check_leaf(root, NULL, cur);
2249                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2250                                 ret = -EIO;
2251                                 break;
2252                         }
2253                         ret = process_one_leaf_v2(root, path, nrefs,
2254                                                   level, ext_ref);
2255                         break;
2256                 } else {
2257                         ret = btrfs_check_node(root, NULL, cur);
2258                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2259                                 ret = -EIO;
2260                                 break;
2261                         }
2262                 }
2263                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2264                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2265                 blocksize = root->nodesize;
2266
2267                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2268                 if (ret)
2269                         break;
2270                 if (!nrefs->need_check[*level - 1]) {
2271                         path->slots[*level]++;
2272                         continue;
2273                 }
2274
2275                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2276                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2277                         free_extent_buffer(next);
2278                         reada_walk_down(root, cur, path->slots[*level]);
2279                         next = read_tree_block(root, bytenr, blocksize,
2280                                                ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->nodesize, *level);
2291                                 ret = -EIO;
2292                                 break;
2293                         }
2294                 }
2295
2296                 ret = check_child_node(root, cur, path->slots[*level], next);
2297                 if (ret < 0) 
2298                         break;
2299
2300                 if (btrfs_is_leaf(next))
2301                         status = btrfs_check_leaf(root, NULL, next);
2302                 else
2303                         status = btrfs_check_node(root, NULL, next);
2304                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2305                         free_extent_buffer(next);
2306                         ret = -EIO;
2307                         break;
2308                 }
2309
2310                 *level = *level - 1;
2311                 free_extent_buffer(path->nodes[*level]);
2312                 path->nodes[*level] = next;
2313                 path->slots[*level] = 0;
2314         }
2315         return ret;
2316 }
2317
2318 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2319                         struct walk_control *wc, int *level)
2320 {
2321         int i;
2322         struct extent_buffer *leaf;
2323
2324         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2325                 leaf = path->nodes[i];
2326                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2327                         path->slots[i]++;
2328                         *level = i;
2329                         return 0;
2330                 } else {
2331                         free_extent_buffer(path->nodes[*level]);
2332                         path->nodes[*level] = NULL;
2333                         BUG_ON(*level > wc->active_node);
2334                         if (*level == wc->active_node)
2335                                 leave_shared_node(root, wc, *level);
2336                         *level = i + 1;
2337                 }
2338         }
2339         return 1;
2340 }
2341
2342 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2343                            int *level)
2344 {
2345         int i;
2346         struct extent_buffer *leaf;
2347
2348         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349                 leaf = path->nodes[i];
2350                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2351                         path->slots[i]++;
2352                         *level = i;
2353                         return 0;
2354                 } else {
2355                         free_extent_buffer(path->nodes[*level]);
2356                         path->nodes[*level] = NULL;
2357                         *level = i + 1;
2358                 }
2359         }
2360         return 1;
2361 }
2362
2363 static int check_root_dir(struct inode_record *rec)
2364 {
2365         struct inode_backref *backref;
2366         int ret = -1;
2367
2368         if (!rec->found_inode_item || rec->errors)
2369                 goto out;
2370         if (rec->nlink != 1 || rec->found_link != 0)
2371                 goto out;
2372         if (list_empty(&rec->backrefs))
2373                 goto out;
2374         backref = to_inode_backref(rec->backrefs.next);
2375         if (!backref->found_inode_ref)
2376                 goto out;
2377         if (backref->index != 0 || backref->namelen != 2 ||
2378             memcmp(backref->name, "..", 2))
2379                 goto out;
2380         if (backref->found_dir_index || backref->found_dir_item)
2381                 goto out;
2382         ret = 0;
2383 out:
2384         return ret;
2385 }
2386
2387 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2388                               struct btrfs_root *root, struct btrfs_path *path,
2389                               struct inode_record *rec)
2390 {
2391         struct btrfs_inode_item *ei;
2392         struct btrfs_key key;
2393         int ret;
2394
2395         key.objectid = rec->ino;
2396         key.type = BTRFS_INODE_ITEM_KEY;
2397         key.offset = (u64)-1;
2398
2399         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2400         if (ret < 0)
2401                 goto out;
2402         if (ret) {
2403                 if (!path->slots[0]) {
2404                         ret = -ENOENT;
2405                         goto out;
2406                 }
2407                 path->slots[0]--;
2408                 ret = 0;
2409         }
2410         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2411         if (key.objectid != rec->ino) {
2412                 ret = -ENOENT;
2413                 goto out;
2414         }
2415
2416         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2417                             struct btrfs_inode_item);
2418         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2419         btrfs_mark_buffer_dirty(path->nodes[0]);
2420         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2421         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2422                root->root_key.objectid);
2423 out:
2424         btrfs_release_path(path);
2425         return ret;
2426 }
2427
2428 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2429                                     struct btrfs_root *root,
2430                                     struct btrfs_path *path,
2431                                     struct inode_record *rec)
2432 {
2433         int ret;
2434
2435         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2436         btrfs_release_path(path);
2437         if (!ret)
2438                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2439         return ret;
2440 }
2441
2442 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2443                                struct btrfs_root *root,
2444                                struct btrfs_path *path,
2445                                struct inode_record *rec)
2446 {
2447         struct btrfs_inode_item *ei;
2448         struct btrfs_key key;
2449         int ret = 0;
2450
2451         key.objectid = rec->ino;
2452         key.type = BTRFS_INODE_ITEM_KEY;
2453         key.offset = 0;
2454
2455         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2456         if (ret) {
2457                 if (ret > 0)
2458                         ret = -ENOENT;
2459                 goto out;
2460         }
2461
2462         /* Since ret == 0, no need to check anything */
2463         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2464                             struct btrfs_inode_item);
2465         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2466         btrfs_mark_buffer_dirty(path->nodes[0]);
2467         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2468         printf("reset nbytes for ino %llu root %llu\n",
2469                rec->ino, root->root_key.objectid);
2470 out:
2471         btrfs_release_path(path);
2472         return ret;
2473 }
2474
2475 static int add_missing_dir_index(struct btrfs_root *root,
2476                                  struct cache_tree *inode_cache,
2477                                  struct inode_record *rec,
2478                                  struct inode_backref *backref)
2479 {
2480         struct btrfs_path path;
2481         struct btrfs_trans_handle *trans;
2482         struct btrfs_dir_item *dir_item;
2483         struct extent_buffer *leaf;
2484         struct btrfs_key key;
2485         struct btrfs_disk_key disk_key;
2486         struct inode_record *dir_rec;
2487         unsigned long name_ptr;
2488         u32 data_size = sizeof(*dir_item) + backref->namelen;
2489         int ret;
2490
2491         trans = btrfs_start_transaction(root, 1);
2492         if (IS_ERR(trans))
2493                 return PTR_ERR(trans);
2494
2495         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2496                 (unsigned long long)rec->ino);
2497
2498         btrfs_init_path(&path);
2499         key.objectid = backref->dir;
2500         key.type = BTRFS_DIR_INDEX_KEY;
2501         key.offset = backref->index;
2502         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2503         BUG_ON(ret);
2504
2505         leaf = path.nodes[0];
2506         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2507
2508         disk_key.objectid = cpu_to_le64(rec->ino);
2509         disk_key.type = BTRFS_INODE_ITEM_KEY;
2510         disk_key.offset = 0;
2511
2512         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2513         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2514         btrfs_set_dir_data_len(leaf, dir_item, 0);
2515         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2516         name_ptr = (unsigned long)(dir_item + 1);
2517         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2518         btrfs_mark_buffer_dirty(leaf);
2519         btrfs_release_path(&path);
2520         btrfs_commit_transaction(trans, root);
2521
2522         backref->found_dir_index = 1;
2523         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2524         BUG_ON(IS_ERR(dir_rec));
2525         if (!dir_rec)
2526                 return 0;
2527         dir_rec->found_size += backref->namelen;
2528         if (dir_rec->found_size == dir_rec->isize &&
2529             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2530                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2531         if (dir_rec->found_size != dir_rec->isize)
2532                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2533
2534         return 0;
2535 }
2536
2537 static int delete_dir_index(struct btrfs_root *root,
2538                             struct cache_tree *inode_cache,
2539                             struct inode_record *rec,
2540                             struct inode_backref *backref)
2541 {
2542         struct btrfs_trans_handle *trans;
2543         struct btrfs_dir_item *di;
2544         struct btrfs_path path;
2545         int ret = 0;
2546
2547         trans = btrfs_start_transaction(root, 1);
2548         if (IS_ERR(trans))
2549                 return PTR_ERR(trans);
2550
2551         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2552                 (unsigned long long)backref->dir,
2553                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2554                 (unsigned long long)root->objectid);
2555
2556         btrfs_init_path(&path);
2557         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2558                                     backref->name, backref->namelen,
2559                                     backref->index, -1);
2560         if (IS_ERR(di)) {
2561                 ret = PTR_ERR(di);
2562                 btrfs_release_path(&path);
2563                 btrfs_commit_transaction(trans, root);
2564                 if (ret == -ENOENT)
2565                         return 0;
2566                 return ret;
2567         }
2568
2569         if (!di)
2570                 ret = btrfs_del_item(trans, root, &path);
2571         else
2572                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2573         BUG_ON(ret);
2574         btrfs_release_path(&path);
2575         btrfs_commit_transaction(trans, root);
2576         return ret;
2577 }
2578
2579 static int create_inode_item(struct btrfs_root *root,
2580                              struct inode_record *rec,
2581                              struct inode_backref *backref, int root_dir)
2582 {
2583         struct btrfs_trans_handle *trans;
2584         struct btrfs_inode_item inode_item;
2585         time_t now = time(NULL);
2586         int ret;
2587
2588         trans = btrfs_start_transaction(root, 1);
2589         if (IS_ERR(trans)) {
2590                 ret = PTR_ERR(trans);
2591                 return ret;
2592         }
2593
2594         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2595                 "be incomplete, please check permissions and content after "
2596                 "the fsck completes.\n", (unsigned long long)root->objectid,
2597                 (unsigned long long)rec->ino);
2598
2599         memset(&inode_item, 0, sizeof(inode_item));
2600         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2601         if (root_dir)
2602                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2603         else
2604                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2605         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2606         if (rec->found_dir_item) {
2607                 if (rec->found_file_extent)
2608                         fprintf(stderr, "root %llu inode %llu has both a dir "
2609                                 "item and extents, unsure if it is a dir or a "
2610                                 "regular file so setting it as a directory\n",
2611                                 (unsigned long long)root->objectid,
2612                                 (unsigned long long)rec->ino);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2614                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2615         } else if (!rec->found_dir_item) {
2616                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2617                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2618         }
2619         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2626         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2627
2628         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2629         BUG_ON(ret);
2630         btrfs_commit_transaction(trans, root);
2631         return 0;
2632 }
2633
2634 static int repair_inode_backrefs(struct btrfs_root *root,
2635                                  struct inode_record *rec,
2636                                  struct cache_tree *inode_cache,
2637                                  int delete)
2638 {
2639         struct inode_backref *tmp, *backref;
2640         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2641         int ret = 0;
2642         int repaired = 0;
2643
2644         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2645                 if (!delete && rec->ino == root_dirid) {
2646                         if (!rec->found_inode_item) {
2647                                 ret = create_inode_item(root, rec, backref, 1);
2648                                 if (ret)
2649                                         break;
2650                                 repaired++;
2651                         }
2652                 }
2653
2654                 /* Index 0 for root dir's are special, don't mess with it */
2655                 if (rec->ino == root_dirid && backref->index == 0)
2656                         continue;
2657
2658                 if (delete &&
2659                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2660                      (backref->found_dir_index && backref->found_inode_ref &&
2661                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2662                         ret = delete_dir_index(root, inode_cache, rec, backref);
2663                         if (ret)
2664                                 break;
2665                         repaired++;
2666                         list_del(&backref->list);
2667                         free(backref);
2668                 }
2669
2670                 if (!delete && !backref->found_dir_index &&
2671                     backref->found_dir_item && backref->found_inode_ref) {
2672                         ret = add_missing_dir_index(root, inode_cache, rec,
2673                                                     backref);
2674                         if (ret)
2675                                 break;
2676                         repaired++;
2677                         if (backref->found_dir_item &&
2678                             backref->found_dir_index &&
2679                             backref->found_dir_index) {
2680                                 if (!backref->errors &&
2681                                     backref->found_inode_ref) {
2682                                         list_del(&backref->list);
2683                                         free(backref);
2684                                 }
2685                         }
2686                 }
2687
2688                 if (!delete && (!backref->found_dir_index &&
2689                                 !backref->found_dir_item &&
2690                                 backref->found_inode_ref)) {
2691                         struct btrfs_trans_handle *trans;
2692                         struct btrfs_key location;
2693
2694                         ret = check_dir_conflict(root, backref->name,
2695                                                  backref->namelen,
2696                                                  backref->dir,
2697                                                  backref->index);
2698                         if (ret) {
2699                                 /*
2700                                  * let nlink fixing routine to handle it,
2701                                  * which can do it better.
2702                                  */
2703                                 ret = 0;
2704                                 break;
2705                         }
2706                         location.objectid = rec->ino;
2707                         location.type = BTRFS_INODE_ITEM_KEY;
2708                         location.offset = 0;
2709
2710                         trans = btrfs_start_transaction(root, 1);
2711                         if (IS_ERR(trans)) {
2712                                 ret = PTR_ERR(trans);
2713                                 break;
2714                         }
2715                         fprintf(stderr, "adding missing dir index/item pair "
2716                                 "for inode %llu\n",
2717                                 (unsigned long long)rec->ino);
2718                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2719                                                     backref->namelen,
2720                                                     backref->dir, &location,
2721                                                     imode_to_type(rec->imode),
2722                                                     backref->index);
2723                         BUG_ON(ret);
2724                         btrfs_commit_transaction(trans, root);
2725                         repaired++;
2726                 }
2727
2728                 if (!delete && (backref->found_inode_ref &&
2729                                 backref->found_dir_index &&
2730                                 backref->found_dir_item &&
2731                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2732                                 !rec->found_inode_item)) {
2733                         ret = create_inode_item(root, rec, backref, 0);
2734                         if (ret)
2735                                 break;
2736                         repaired++;
2737                 }
2738
2739         }
2740         return ret ? ret : repaired;
2741 }
2742
2743 /*
2744  * To determine the file type for nlink/inode_item repair
2745  *
2746  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2747  * Return -ENOENT if file type is not found.
2748  */
2749 static int find_file_type(struct inode_record *rec, u8 *type)
2750 {
2751         struct inode_backref *backref;
2752
2753         /* For inode item recovered case */
2754         if (rec->found_inode_item) {
2755                 *type = imode_to_type(rec->imode);
2756                 return 0;
2757         }
2758
2759         list_for_each_entry(backref, &rec->backrefs, list) {
2760                 if (backref->found_dir_index || backref->found_dir_item) {
2761                         *type = backref->filetype;
2762                         return 0;
2763                 }
2764         }
2765         return -ENOENT;
2766 }
2767
2768 /*
2769  * To determine the file name for nlink repair
2770  *
2771  * Return 0 if file name is found, set name and namelen.
2772  * Return -ENOENT if file name is not found.
2773  */
2774 static int find_file_name(struct inode_record *rec,
2775                           char *name, int *namelen)
2776 {
2777         struct inode_backref *backref;
2778
2779         list_for_each_entry(backref, &rec->backrefs, list) {
2780                 if (backref->found_dir_index || backref->found_dir_item ||
2781                     backref->found_inode_ref) {
2782                         memcpy(name, backref->name, backref->namelen);
2783                         *namelen = backref->namelen;
2784                         return 0;
2785                 }
2786         }
2787         return -ENOENT;
2788 }
2789
2790 /* Reset the nlink of the inode to the correct one */
2791 static int reset_nlink(struct btrfs_trans_handle *trans,
2792                        struct btrfs_root *root,
2793                        struct btrfs_path *path,
2794                        struct inode_record *rec)
2795 {
2796         struct inode_backref *backref;
2797         struct inode_backref *tmp;
2798         struct btrfs_key key;
2799         struct btrfs_inode_item *inode_item;
2800         int ret = 0;
2801
2802         /* We don't believe this either, reset it and iterate backref */
2803         rec->found_link = 0;
2804
2805         /* Remove all backref including the valid ones */
2806         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2807                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2808                                    backref->index, backref->name,
2809                                    backref->namelen, 0);
2810                 if (ret < 0)
2811                         goto out;
2812
2813                 /* remove invalid backref, so it won't be added back */
2814                 if (!(backref->found_dir_index &&
2815                       backref->found_dir_item &&
2816                       backref->found_inode_ref)) {
2817                         list_del(&backref->list);
2818                         free(backref);
2819                 } else {
2820                         rec->found_link++;
2821                 }
2822         }
2823
2824         /* Set nlink to 0 */
2825         key.objectid = rec->ino;
2826         key.type = BTRFS_INODE_ITEM_KEY;
2827         key.offset = 0;
2828         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2829         if (ret < 0)
2830                 goto out;
2831         if (ret > 0) {
2832                 ret = -ENOENT;
2833                 goto out;
2834         }
2835         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2836                                     struct btrfs_inode_item);
2837         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2838         btrfs_mark_buffer_dirty(path->nodes[0]);
2839         btrfs_release_path(path);
2840
2841         /*
2842          * Add back valid inode_ref/dir_item/dir_index,
2843          * add_link() will handle the nlink inc, so new nlink must be correct
2844          */
2845         list_for_each_entry(backref, &rec->backrefs, list) {
2846                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2847                                      backref->name, backref->namelen,
2848                                      backref->filetype, &backref->index, 1);
2849                 if (ret < 0)
2850                         goto out;
2851         }
2852 out:
2853         btrfs_release_path(path);
2854         return ret;
2855 }
2856
2857 static int get_highest_inode(struct btrfs_trans_handle *trans,
2858                                 struct btrfs_root *root,
2859                                 struct btrfs_path *path,
2860                                 u64 *highest_ino)
2861 {
2862         struct btrfs_key key, found_key;
2863         int ret;
2864
2865         btrfs_init_path(path);
2866         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2867         key.offset = -1;
2868         key.type = BTRFS_INODE_ITEM_KEY;
2869         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2870         if (ret == 1) {
2871                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2872                                 path->slots[0] - 1);
2873                 *highest_ino = found_key.objectid;
2874                 ret = 0;
2875         }
2876         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2877                 ret = -EOVERFLOW;
2878         btrfs_release_path(path);
2879         return ret;
2880 }
2881
2882 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2883                                struct btrfs_root *root,
2884                                struct btrfs_path *path,
2885                                struct inode_record *rec)
2886 {
2887         char *dir_name = "lost+found";
2888         char namebuf[BTRFS_NAME_LEN] = {0};
2889         u64 lost_found_ino;
2890         u32 mode = 0700;
2891         u8 type = 0;
2892         int namelen = 0;
2893         int name_recovered = 0;
2894         int type_recovered = 0;
2895         int ret = 0;
2896
2897         /*
2898          * Get file name and type first before these invalid inode ref
2899          * are deleted by remove_all_invalid_backref()
2900          */
2901         name_recovered = !find_file_name(rec, namebuf, &namelen);
2902         type_recovered = !find_file_type(rec, &type);
2903
2904         if (!name_recovered) {
2905                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2906                        rec->ino, rec->ino);
2907                 namelen = count_digits(rec->ino);
2908                 sprintf(namebuf, "%llu", rec->ino);
2909                 name_recovered = 1;
2910         }
2911         if (!type_recovered) {
2912                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2913                        rec->ino);
2914                 type = BTRFS_FT_REG_FILE;
2915                 type_recovered = 1;
2916         }
2917
2918         ret = reset_nlink(trans, root, path, rec);
2919         if (ret < 0) {
2920                 fprintf(stderr,
2921                         "Failed to reset nlink for inode %llu: %s\n",
2922                         rec->ino, strerror(-ret));
2923                 goto out;
2924         }
2925
2926         if (rec->found_link == 0) {
2927                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2928                 if (ret < 0)
2929                         goto out;
2930                 lost_found_ino++;
2931                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2932                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2933                                   mode);
2934                 if (ret < 0) {
2935                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2936                                 dir_name, strerror(-ret));
2937                         goto out;
2938                 }
2939                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2940                                      namebuf, namelen, type, NULL, 1);
2941                 /*
2942                  * Add ".INO" suffix several times to handle case where
2943                  * "FILENAME.INO" is already taken by another file.
2944                  */
2945                 while (ret == -EEXIST) {
2946                         /*
2947                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2948                          */
2949                         if (namelen + count_digits(rec->ino) + 1 >
2950                             BTRFS_NAME_LEN) {
2951                                 ret = -EFBIG;
2952                                 goto out;
2953                         }
2954                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2955                                  ".%llu", rec->ino);
2956                         namelen += count_digits(rec->ino) + 1;
2957                         ret = btrfs_add_link(trans, root, rec->ino,
2958                                              lost_found_ino, namebuf,
2959                                              namelen, type, NULL, 1);
2960                 }
2961                 if (ret < 0) {
2962                         fprintf(stderr,
2963                                 "Failed to link the inode %llu to %s dir: %s\n",
2964                                 rec->ino, dir_name, strerror(-ret));
2965                         goto out;
2966                 }
2967                 /*
2968                  * Just increase the found_link, don't actually add the
2969                  * backref. This will make things easier and this inode
2970                  * record will be freed after the repair is done.
2971                  * So fsck will not report problem about this inode.
2972                  */
2973                 rec->found_link++;
2974                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2975                        namelen, namebuf, dir_name);
2976         }
2977         printf("Fixed the nlink of inode %llu\n", rec->ino);
2978 out:
2979         /*
2980          * Clear the flag anyway, or we will loop forever for the same inode
2981          * as it will not be removed from the bad inode list and the dead loop
2982          * happens.
2983          */
2984         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2985         btrfs_release_path(path);
2986         return ret;
2987 }
2988
2989 /*
2990  * Check if there is any normal(reg or prealloc) file extent for given
2991  * ino.
2992  * This is used to determine the file type when neither its dir_index/item or
2993  * inode_item exists.
2994  *
2995  * This will *NOT* report error, if any error happens, just consider it does
2996  * not have any normal file extent.
2997  */
2998 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2999 {
3000         struct btrfs_path path;
3001         struct btrfs_key key;
3002         struct btrfs_key found_key;
3003         struct btrfs_file_extent_item *fi;
3004         u8 type;
3005         int ret = 0;
3006
3007         btrfs_init_path(&path);
3008         key.objectid = ino;
3009         key.type = BTRFS_EXTENT_DATA_KEY;
3010         key.offset = 0;
3011
3012         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3013         if (ret < 0) {
3014                 ret = 0;
3015                 goto out;
3016         }
3017         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3018                 ret = btrfs_next_leaf(root, &path);
3019                 if (ret) {
3020                         ret = 0;
3021                         goto out;
3022                 }
3023         }
3024         while (1) {
3025                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3026                                       path.slots[0]);
3027                 if (found_key.objectid != ino ||
3028                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3029                         break;
3030                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3031                                     struct btrfs_file_extent_item);
3032                 type = btrfs_file_extent_type(path.nodes[0], fi);
3033                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3034                         ret = 1;
3035                         goto out;
3036                 }
3037         }
3038 out:
3039         btrfs_release_path(&path);
3040         return ret;
3041 }
3042
3043 static u32 btrfs_type_to_imode(u8 type)
3044 {
3045         static u32 imode_by_btrfs_type[] = {
3046                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3047                 [BTRFS_FT_DIR]          = S_IFDIR,
3048                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3049                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3050                 [BTRFS_FT_FIFO]         = S_IFIFO,
3051                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3052                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3053         };
3054
3055         return imode_by_btrfs_type[(type)];
3056 }
3057
3058 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3059                                 struct btrfs_root *root,
3060                                 struct btrfs_path *path,
3061                                 struct inode_record *rec)
3062 {
3063         u8 filetype;
3064         u32 mode = 0700;
3065         int type_recovered = 0;
3066         int ret = 0;
3067
3068         printf("Trying to rebuild inode:%llu\n", rec->ino);
3069
3070         type_recovered = !find_file_type(rec, &filetype);
3071
3072         /*
3073          * Try to determine inode type if type not found.
3074          *
3075          * For found regular file extent, it must be FILE.
3076          * For found dir_item/index, it must be DIR.
3077          *
3078          * For undetermined one, use FILE as fallback.
3079          *
3080          * TODO:
3081          * 1. If found backref(inode_index/item is already handled) to it,
3082          *    it must be DIR.
3083          *    Need new inode-inode ref structure to allow search for that.
3084          */
3085         if (!type_recovered) {
3086                 if (rec->found_file_extent &&
3087                     find_normal_file_extent(root, rec->ino)) {
3088                         type_recovered = 1;
3089                         filetype = BTRFS_FT_REG_FILE;
3090                 } else if (rec->found_dir_item) {
3091                         type_recovered = 1;
3092                         filetype = BTRFS_FT_DIR;
3093                 } else if (!list_empty(&rec->orphan_extents)) {
3094                         type_recovered = 1;
3095                         filetype = BTRFS_FT_REG_FILE;
3096                 } else{
3097                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3098                                rec->ino);
3099                         type_recovered = 1;
3100                         filetype = BTRFS_FT_REG_FILE;
3101                 }
3102         }
3103
3104         ret = btrfs_new_inode(trans, root, rec->ino,
3105                               mode | btrfs_type_to_imode(filetype));
3106         if (ret < 0)
3107                 goto out;
3108
3109         /*
3110          * Here inode rebuild is done, we only rebuild the inode item,
3111          * don't repair the nlink(like move to lost+found).
3112          * That is the job of nlink repair.
3113          *
3114          * We just fill the record and return
3115          */
3116         rec->found_dir_item = 1;
3117         rec->imode = mode | btrfs_type_to_imode(filetype);
3118         rec->nlink = 0;
3119         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3120         /* Ensure the inode_nlinks repair function will be called */
3121         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3122 out:
3123         return ret;
3124 }
3125
3126 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3127                                       struct btrfs_root *root,
3128                                       struct btrfs_path *path,
3129                                       struct inode_record *rec)
3130 {
3131         struct orphan_data_extent *orphan;
3132         struct orphan_data_extent *tmp;
3133         int ret = 0;
3134
3135         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3136                 /*
3137                  * Check for conflicting file extents
3138                  *
3139                  * Here we don't know whether the extents is compressed or not,
3140                  * so we can only assume it not compressed nor data offset,
3141                  * and use its disk_len as extent length.
3142                  */
3143                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3144                                        orphan->offset, orphan->disk_len, 0);
3145                 btrfs_release_path(path);
3146                 if (ret < 0)
3147                         goto out;
3148                 if (!ret) {
3149                         fprintf(stderr,
3150                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3151                                 orphan->disk_bytenr, orphan->disk_len);
3152                         ret = btrfs_free_extent(trans,
3153                                         root->fs_info->extent_root,
3154                                         orphan->disk_bytenr, orphan->disk_len,
3155                                         0, root->objectid, orphan->objectid,
3156                                         orphan->offset);
3157                         if (ret < 0)
3158                                 goto out;
3159                 }
3160                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3161                                 orphan->offset, orphan->disk_bytenr,
3162                                 orphan->disk_len, orphan->disk_len);
3163                 if (ret < 0)
3164                         goto out;
3165
3166                 /* Update file size info */
3167                 rec->found_size += orphan->disk_len;
3168                 if (rec->found_size == rec->nbytes)
3169                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3170
3171                 /* Update the file extent hole info too */
3172                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3173                                            orphan->disk_len);
3174                 if (ret < 0)
3175                         goto out;
3176                 if (RB_EMPTY_ROOT(&rec->holes))
3177                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3178
3179                 list_del(&orphan->list);
3180                 free(orphan);
3181         }
3182         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3183 out:
3184         return ret;
3185 }
3186
3187 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3188                                         struct btrfs_root *root,
3189                                         struct btrfs_path *path,
3190                                         struct inode_record *rec)
3191 {
3192         struct rb_node *node;
3193         struct file_extent_hole *hole;
3194         int found = 0;
3195         int ret = 0;
3196
3197         node = rb_first(&rec->holes);
3198
3199         while (node) {
3200                 found = 1;
3201                 hole = rb_entry(node, struct file_extent_hole, node);
3202                 ret = btrfs_punch_hole(trans, root, rec->ino,
3203                                        hole->start, hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 ret = del_file_extent_hole(&rec->holes, hole->start,
3207                                            hole->len);
3208                 if (ret < 0)
3209                         goto out;
3210                 if (RB_EMPTY_ROOT(&rec->holes))
3211                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3212                 node = rb_first(&rec->holes);
3213         }
3214         /* special case for a file losing all its file extent */
3215         if (!found) {
3216                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3217                                        round_up(rec->isize, root->sectorsize));
3218                 if (ret < 0)
3219                         goto out;
3220         }
3221         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3222                rec->ino, root->objectid);
3223 out:
3224         return ret;
3225 }
3226
3227 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3228 {
3229         struct btrfs_trans_handle *trans;
3230         struct btrfs_path path;
3231         int ret = 0;
3232
3233         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3234                              I_ERR_NO_ORPHAN_ITEM |
3235                              I_ERR_LINK_COUNT_WRONG |
3236                              I_ERR_NO_INODE_ITEM |
3237                              I_ERR_FILE_EXTENT_ORPHAN |
3238                              I_ERR_FILE_EXTENT_DISCOUNT|
3239                              I_ERR_FILE_NBYTES_WRONG)))
3240                 return rec->errors;
3241
3242         /*
3243          * For nlink repair, it may create a dir and add link, so
3244          * 2 for parent(256)'s dir_index and dir_item
3245          * 2 for lost+found dir's inode_item and inode_ref
3246          * 1 for the new inode_ref of the file
3247          * 2 for lost+found dir's dir_index and dir_item for the file
3248          */
3249         trans = btrfs_start_transaction(root, 7);
3250         if (IS_ERR(trans))
3251                 return PTR_ERR(trans);
3252
3253         btrfs_init_path(&path);
3254         if (rec->errors & I_ERR_NO_INODE_ITEM)
3255                 ret = repair_inode_no_item(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3257                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3259                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3261                 ret = repair_inode_isize(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3263                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3265                 ret = repair_inode_nlinks(trans, root, &path, rec);
3266         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3267                 ret = repair_inode_nbytes(trans, root, &path, rec);
3268         btrfs_commit_transaction(trans, root);
3269         btrfs_release_path(&path);
3270         return ret;
3271 }
3272
3273 static int check_inode_recs(struct btrfs_root *root,
3274                             struct cache_tree *inode_cache)
3275 {
3276         struct cache_extent *cache;
3277         struct ptr_node *node;
3278         struct inode_record *rec;
3279         struct inode_backref *backref;
3280         int stage = 0;
3281         int ret = 0;
3282         int err = 0;
3283         u64 error = 0;
3284         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3285
3286         if (btrfs_root_refs(&root->root_item) == 0) {
3287                 if (!cache_tree_empty(inode_cache))
3288                         fprintf(stderr, "warning line %d\n", __LINE__);
3289                 return 0;
3290         }
3291
3292         /*
3293          * We need to repair backrefs first because we could change some of the
3294          * errors in the inode recs.
3295          *
3296          * We also need to go through and delete invalid backrefs first and then
3297          * add the correct ones second.  We do this because we may get EEXIST
3298          * when adding back the correct index because we hadn't yet deleted the
3299          * invalid index.
3300          *
3301          * For example, if we were missing a dir index then the directories
3302          * isize would be wrong, so if we fixed the isize to what we thought it
3303          * would be and then fixed the backref we'd still have a invalid fs, so
3304          * we need to add back the dir index and then check to see if the isize
3305          * is still wrong.
3306          */
3307         while (stage < 3) {
3308                 stage++;
3309                 if (stage == 3 && !err)
3310                         break;
3311
3312                 cache = search_cache_extent(inode_cache, 0);
3313                 while (repair && cache) {
3314                         node = container_of(cache, struct ptr_node, cache);
3315                         rec = node->data;
3316                         cache = next_cache_extent(cache);
3317
3318                         /* Need to free everything up and rescan */
3319                         if (stage == 3) {
3320                                 remove_cache_extent(inode_cache, &node->cache);
3321                                 free(node);
3322                                 free_inode_rec(rec);
3323                                 continue;
3324                         }
3325
3326                         if (list_empty(&rec->backrefs))
3327                                 continue;
3328
3329                         ret = repair_inode_backrefs(root, rec, inode_cache,
3330                                                     stage == 1);
3331                         if (ret < 0) {
3332                                 err = ret;
3333                                 stage = 2;
3334                                 break;
3335                         } if (ret > 0) {
3336                                 err = -EAGAIN;
3337                         }
3338                 }
3339         }
3340         if (err)
3341                 return err;
3342
3343         rec = get_inode_rec(inode_cache, root_dirid, 0);
3344         BUG_ON(IS_ERR(rec));
3345         if (rec) {
3346                 ret = check_root_dir(rec);
3347                 if (ret) {
3348                         fprintf(stderr, "root %llu root dir %llu error\n",
3349                                 (unsigned long long)root->root_key.objectid,
3350                                 (unsigned long long)root_dirid);
3351                         print_inode_error(root, rec);
3352                         error++;
3353                 }
3354         } else {
3355                 if (repair) {
3356                         struct btrfs_trans_handle *trans;
3357
3358                         trans = btrfs_start_transaction(root, 1);
3359                         if (IS_ERR(trans)) {
3360                                 err = PTR_ERR(trans);
3361                                 return err;
3362                         }
3363
3364                         fprintf(stderr,
3365                                 "root %llu missing its root dir, recreating\n",
3366                                 (unsigned long long)root->objectid);
3367
3368                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3369                         BUG_ON(ret);
3370
3371                         btrfs_commit_transaction(trans, root);
3372                         return -EAGAIN;
3373                 }
3374
3375                 fprintf(stderr, "root %llu root dir %llu not found\n",
3376                         (unsigned long long)root->root_key.objectid,
3377                         (unsigned long long)root_dirid);
3378         }
3379
3380         while (1) {
3381                 cache = search_cache_extent(inode_cache, 0);
3382                 if (!cache)
3383                         break;
3384                 node = container_of(cache, struct ptr_node, cache);
3385                 rec = node->data;
3386                 remove_cache_extent(inode_cache, &node->cache);
3387                 free(node);
3388                 if (rec->ino == root_dirid ||
3389                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3390                         free_inode_rec(rec);
3391                         continue;
3392                 }
3393
3394                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3395                         ret = check_orphan_item(root, rec->ino);
3396                         if (ret == 0)
3397                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3398                         if (can_free_inode_rec(rec)) {
3399                                 free_inode_rec(rec);
3400                                 continue;
3401                         }
3402                 }
3403
3404                 if (!rec->found_inode_item)
3405                         rec->errors |= I_ERR_NO_INODE_ITEM;
3406                 if (rec->found_link != rec->nlink)
3407                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3408                 if (repair) {
3409                         ret = try_repair_inode(root, rec);
3410                         if (ret == 0 && can_free_inode_rec(rec)) {
3411                                 free_inode_rec(rec);
3412                                 continue;
3413                         }
3414                         ret = 0;
3415                 }
3416
3417                 if (!(repair && ret == 0))
3418                         error++;
3419                 print_inode_error(root, rec);
3420                 list_for_each_entry(backref, &rec->backrefs, list) {
3421                         if (!backref->found_dir_item)
3422                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3423                         if (!backref->found_dir_index)
3424                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3425                         if (!backref->found_inode_ref)
3426                                 backref->errors |= REF_ERR_NO_INODE_REF;
3427                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3428                                 " namelen %u name %s filetype %d errors %x",
3429                                 (unsigned long long)backref->dir,
3430                                 (unsigned long long)backref->index,
3431                                 backref->namelen, backref->name,
3432                                 backref->filetype, backref->errors);
3433                         print_ref_error(backref->errors);
3434                 }
3435                 free_inode_rec(rec);
3436         }
3437         return (error > 0) ? -1 : 0;
3438 }
3439
3440 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3441                                         u64 objectid)
3442 {
3443         struct cache_extent *cache;
3444         struct root_record *rec = NULL;
3445         int ret;
3446
3447         cache = lookup_cache_extent(root_cache, objectid, 1);
3448         if (cache) {
3449                 rec = container_of(cache, struct root_record, cache);
3450         } else {
3451                 rec = calloc(1, sizeof(*rec));
3452                 if (!rec)
3453                         return ERR_PTR(-ENOMEM);
3454                 rec->objectid = objectid;
3455                 INIT_LIST_HEAD(&rec->backrefs);
3456                 rec->cache.start = objectid;
3457                 rec->cache.size = 1;
3458
3459                 ret = insert_cache_extent(root_cache, &rec->cache);
3460                 if (ret)
3461                         return ERR_PTR(-EEXIST);
3462         }
3463         return rec;
3464 }
3465
3466 static struct root_backref *get_root_backref(struct root_record *rec,
3467                                              u64 ref_root, u64 dir, u64 index,
3468                                              const char *name, int namelen)
3469 {
3470         struct root_backref *backref;
3471
3472         list_for_each_entry(backref, &rec->backrefs, list) {
3473                 if (backref->ref_root != ref_root || backref->dir != dir ||
3474                     backref->namelen != namelen)
3475                         continue;
3476                 if (memcmp(name, backref->name, namelen))
3477                         continue;
3478                 return backref;
3479         }
3480
3481         backref = calloc(1, sizeof(*backref) + namelen + 1);
3482         if (!backref)
3483                 return NULL;
3484         backref->ref_root = ref_root;
3485         backref->dir = dir;
3486         backref->index = index;
3487         backref->namelen = namelen;
3488         memcpy(backref->name, name, namelen);
3489         backref->name[namelen] = '\0';
3490         list_add_tail(&backref->list, &rec->backrefs);
3491         return backref;
3492 }
3493
3494 static void free_root_record(struct cache_extent *cache)
3495 {
3496         struct root_record *rec;
3497         struct root_backref *backref;
3498
3499         rec = container_of(cache, struct root_record, cache);
3500         while (!list_empty(&rec->backrefs)) {
3501                 backref = to_root_backref(rec->backrefs.next);
3502                 list_del(&backref->list);
3503                 free(backref);
3504         }
3505
3506         free(rec);
3507 }
3508
3509 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3510
3511 static int add_root_backref(struct cache_tree *root_cache,
3512                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3513                             const char *name, int namelen,
3514                             int item_type, int errors)
3515 {
3516         struct root_record *rec;
3517         struct root_backref *backref;
3518
3519         rec = get_root_rec(root_cache, root_id);
3520         BUG_ON(IS_ERR(rec));
3521         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3522         BUG_ON(!backref);
3523
3524         backref->errors |= errors;
3525
3526         if (item_type != BTRFS_DIR_ITEM_KEY) {
3527                 if (backref->found_dir_index || backref->found_back_ref ||
3528                     backref->found_forward_ref) {
3529                         if (backref->index != index)
3530                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3531                 } else {
3532                         backref->index = index;
3533                 }
3534         }
3535
3536         if (item_type == BTRFS_DIR_ITEM_KEY) {
3537                 if (backref->found_forward_ref)
3538                         rec->found_ref++;
3539                 backref->found_dir_item = 1;
3540         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3541                 backref->found_dir_index = 1;
3542         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3543                 if (backref->found_forward_ref)
3544                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3545                 else if (backref->found_dir_item)
3546                         rec->found_ref++;
3547                 backref->found_forward_ref = 1;
3548         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3549                 if (backref->found_back_ref)
3550                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3551                 backref->found_back_ref = 1;
3552         } else {
3553                 BUG_ON(1);
3554         }
3555
3556         if (backref->found_forward_ref && backref->found_dir_item)
3557                 backref->reachable = 1;
3558         return 0;
3559 }
3560
3561 static int merge_root_recs(struct btrfs_root *root,
3562                            struct cache_tree *src_cache,
3563                            struct cache_tree *dst_cache)
3564 {
3565         struct cache_extent *cache;
3566         struct ptr_node *node;
3567         struct inode_record *rec;
3568         struct inode_backref *backref;
3569         int ret = 0;
3570
3571         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3572                 free_inode_recs_tree(src_cache);
3573                 return 0;
3574         }
3575
3576         while (1) {
3577                 cache = search_cache_extent(src_cache, 0);
3578                 if (!cache)
3579                         break;
3580                 node = container_of(cache, struct ptr_node, cache);
3581                 rec = node->data;
3582                 remove_cache_extent(src_cache, &node->cache);
3583                 free(node);
3584
3585                 ret = is_child_root(root, root->objectid, rec->ino);
3586                 if (ret < 0)
3587                         break;
3588                 else if (ret == 0)
3589                         goto skip;
3590
3591                 list_for_each_entry(backref, &rec->backrefs, list) {
3592                         BUG_ON(backref->found_inode_ref);
3593                         if (backref->found_dir_item)
3594                                 add_root_backref(dst_cache, rec->ino,
3595                                         root->root_key.objectid, backref->dir,
3596                                         backref->index, backref->name,
3597                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3598                                         backref->errors);
3599                         if (backref->found_dir_index)
3600                                 add_root_backref(dst_cache, rec->ino,
3601                                         root->root_key.objectid, backref->dir,
3602                                         backref->index, backref->name,
3603                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3604                                         backref->errors);
3605                 }
3606 skip:
3607                 free_inode_rec(rec);
3608         }
3609         if (ret < 0)
3610                 return ret;
3611         return 0;
3612 }
3613
3614 static int check_root_refs(struct btrfs_root *root,
3615                            struct cache_tree *root_cache)
3616 {
3617         struct root_record *rec;
3618         struct root_record *ref_root;
3619         struct root_backref *backref;
3620         struct cache_extent *cache;
3621         int loop = 1;
3622         int ret;
3623         int error;
3624         int errors = 0;
3625
3626         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3627         BUG_ON(IS_ERR(rec));
3628         rec->found_ref = 1;
3629
3630         /* fixme: this can not detect circular references */
3631         while (loop) {
3632                 loop = 0;
3633                 cache = search_cache_extent(root_cache, 0);
3634                 while (1) {
3635                         if (!cache)
3636                                 break;
3637                         rec = container_of(cache, struct root_record, cache);
3638                         cache = next_cache_extent(cache);
3639
3640                         if (rec->found_ref == 0)
3641                                 continue;
3642
3643                         list_for_each_entry(backref, &rec->backrefs, list) {
3644                                 if (!backref->reachable)
3645                                         continue;
3646
3647                                 ref_root = get_root_rec(root_cache,
3648                                                         backref->ref_root);
3649                                 BUG_ON(IS_ERR(ref_root));
3650                                 if (ref_root->found_ref > 0)
3651                                         continue;
3652
3653                                 backref->reachable = 0;
3654                                 rec->found_ref--;
3655                                 if (rec->found_ref == 0)
3656                                         loop = 1;
3657                         }
3658                 }
3659         }
3660
3661         cache = search_cache_extent(root_cache, 0);
3662         while (1) {
3663                 if (!cache)
3664                         break;
3665                 rec = container_of(cache, struct root_record, cache);
3666                 cache = next_cache_extent(cache);
3667
3668                 if (rec->found_ref == 0 &&
3669                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3670                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3671                         ret = check_orphan_item(root->fs_info->tree_root,
3672                                                 rec->objectid);
3673                         if (ret == 0)
3674                                 continue;
3675
3676                         /*
3677                          * If we don't have a root item then we likely just have
3678                          * a dir item in a snapshot for this root but no actual
3679                          * ref key or anything so it's meaningless.
3680                          */
3681                         if (!rec->found_root_item)
3682                                 continue;
3683                         errors++;
3684                         fprintf(stderr, "fs tree %llu not referenced\n",
3685                                 (unsigned long long)rec->objectid);
3686                 }
3687
3688                 error = 0;
3689                 if (rec->found_ref > 0 && !rec->found_root_item)
3690                         error = 1;
3691                 list_for_each_entry(backref, &rec->backrefs, list) {
3692                         if (!backref->found_dir_item)
3693                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3694                         if (!backref->found_dir_index)
3695                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3696                         if (!backref->found_back_ref)
3697                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3698                         if (!backref->found_forward_ref)
3699                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3700                         if (backref->reachable && backref->errors)
3701                                 error = 1;
3702                 }
3703                 if (!error)
3704                         continue;
3705
3706                 errors++;
3707                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3708                         (unsigned long long)rec->objectid, rec->found_ref,
3709                          rec->found_root_item ? "" : "not found");
3710
3711                 list_for_each_entry(backref, &rec->backrefs, list) {
3712                         if (!backref->reachable)
3713                                 continue;
3714                         if (!backref->errors && rec->found_root_item)
3715                                 continue;
3716                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3717                                 " index %llu namelen %u name %s errors %x\n",
3718                                 (unsigned long long)backref->ref_root,
3719                                 (unsigned long long)backref->dir,
3720                                 (unsigned long long)backref->index,
3721                                 backref->namelen, backref->name,
3722                                 backref->errors);
3723                         print_ref_error(backref->errors);
3724                 }
3725         }
3726         return errors > 0 ? 1 : 0;
3727 }
3728
3729 static int process_root_ref(struct extent_buffer *eb, int slot,
3730                             struct btrfs_key *key,
3731                             struct cache_tree *root_cache)
3732 {
3733         u64 dirid;
3734         u64 index;
3735         u32 len;
3736         u32 name_len;
3737         struct btrfs_root_ref *ref;
3738         char namebuf[BTRFS_NAME_LEN];
3739         int error;
3740
3741         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3742
3743         dirid = btrfs_root_ref_dirid(eb, ref);
3744         index = btrfs_root_ref_sequence(eb, ref);
3745         name_len = btrfs_root_ref_name_len(eb, ref);
3746
3747         if (name_len <= BTRFS_NAME_LEN) {
3748                 len = name_len;
3749                 error = 0;
3750         } else {
3751                 len = BTRFS_NAME_LEN;
3752                 error = REF_ERR_NAME_TOO_LONG;
3753         }
3754         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3755
3756         if (key->type == BTRFS_ROOT_REF_KEY) {
3757                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3758                                  index, namebuf, len, key->type, error);
3759         } else {
3760                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3761                                  index, namebuf, len, key->type, error);
3762         }
3763         return 0;
3764 }
3765
3766 static void free_corrupt_block(struct cache_extent *cache)
3767 {
3768         struct btrfs_corrupt_block *corrupt;
3769
3770         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3771         free(corrupt);
3772 }
3773
3774 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3775
3776 /*
3777  * Repair the btree of the given root.
3778  *
3779  * The fix is to remove the node key in corrupt_blocks cache_tree.
3780  * and rebalance the tree.
3781  * After the fix, the btree should be writeable.
3782  */
3783 static int repair_btree(struct btrfs_root *root,
3784                         struct cache_tree *corrupt_blocks)
3785 {
3786         struct btrfs_trans_handle *trans;
3787         struct btrfs_path path;
3788         struct btrfs_corrupt_block *corrupt;
3789         struct cache_extent *cache;
3790         struct btrfs_key key;
3791         u64 offset;
3792         int level;
3793         int ret = 0;
3794
3795         if (cache_tree_empty(corrupt_blocks))
3796                 return 0;
3797
3798         trans = btrfs_start_transaction(root, 1);
3799         if (IS_ERR(trans)) {
3800                 ret = PTR_ERR(trans);
3801                 fprintf(stderr, "Error starting transaction: %s\n",
3802                         strerror(-ret));
3803                 return ret;
3804         }
3805         btrfs_init_path(&path);
3806         cache = first_cache_extent(corrupt_blocks);
3807         while (cache) {
3808                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3809                                        cache);
3810                 level = corrupt->level;
3811                 path.lowest_level = level;
3812                 key.objectid = corrupt->key.objectid;
3813                 key.type = corrupt->key.type;
3814                 key.offset = corrupt->key.offset;
3815
3816                 /*
3817                  * Here we don't want to do any tree balance, since it may
3818                  * cause a balance with corrupted brother leaf/node,
3819                  * so ins_len set to 0 here.
3820                  * Balance will be done after all corrupt node/leaf is deleted.
3821                  */
3822                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3823                 if (ret < 0)
3824                         goto out;
3825                 offset = btrfs_node_blockptr(path.nodes[level],
3826                                              path.slots[level]);
3827
3828                 /* Remove the ptr */
3829                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3830                 if (ret < 0)
3831                         goto out;
3832                 /*
3833                  * Remove the corresponding extent
3834                  * return value is not concerned.
3835                  */
3836                 btrfs_release_path(&path);
3837                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3838                                         0, root->root_key.objectid,
3839                                         level - 1, 0);
3840                 cache = next_cache_extent(cache);
3841         }
3842
3843         /* Balance the btree using btrfs_search_slot() */
3844         cache = first_cache_extent(corrupt_blocks);
3845         while (cache) {
3846                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3847                                        cache);
3848                 memcpy(&key, &corrupt->key, sizeof(key));
3849                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3850                 if (ret < 0)
3851                         goto out;
3852                 /* return will always >0 since it won't find the item */
3853                 ret = 0;
3854                 btrfs_release_path(&path);
3855                 cache = next_cache_extent(cache);
3856         }
3857 out:
3858         btrfs_commit_transaction(trans, root);
3859         btrfs_release_path(&path);
3860         return ret;
3861 }
3862
3863 static int check_fs_root(struct btrfs_root *root,
3864                          struct cache_tree *root_cache,
3865                          struct walk_control *wc)
3866 {
3867         int ret = 0;
3868         int err = 0;
3869         int wret;
3870         int level;
3871         struct btrfs_path path;
3872         struct shared_node root_node;
3873         struct root_record *rec;
3874         struct btrfs_root_item *root_item = &root->root_item;
3875         struct cache_tree corrupt_blocks;
3876         struct orphan_data_extent *orphan;
3877         struct orphan_data_extent *tmp;
3878         enum btrfs_tree_block_status status;
3879         struct node_refs nrefs;
3880
3881         /*
3882          * Reuse the corrupt_block cache tree to record corrupted tree block
3883          *
3884          * Unlike the usage in extent tree check, here we do it in a per
3885          * fs/subvol tree base.
3886          */
3887         cache_tree_init(&corrupt_blocks);
3888         root->fs_info->corrupt_blocks = &corrupt_blocks;
3889
3890         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3891                 rec = get_root_rec(root_cache, root->root_key.objectid);
3892                 BUG_ON(IS_ERR(rec));
3893                 if (btrfs_root_refs(root_item) > 0)
3894                         rec->found_root_item = 1;
3895         }
3896
3897         btrfs_init_path(&path);
3898         memset(&root_node, 0, sizeof(root_node));
3899         cache_tree_init(&root_node.root_cache);
3900         cache_tree_init(&root_node.inode_cache);
3901         memset(&nrefs, 0, sizeof(nrefs));
3902
3903         /* Move the orphan extent record to corresponding inode_record */
3904         list_for_each_entry_safe(orphan, tmp,
3905                                  &root->orphan_data_extents, list) {
3906                 struct inode_record *inode;
3907
3908                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3909                                       1);
3910                 BUG_ON(IS_ERR(inode));
3911                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3912                 list_move(&orphan->list, &inode->orphan_extents);
3913         }
3914
3915         level = btrfs_header_level(root->node);
3916         memset(wc->nodes, 0, sizeof(wc->nodes));
3917         wc->nodes[level] = &root_node;
3918         wc->active_node = level;
3919         wc->root_level = level;
3920
3921         /* We may not have checked the root block, lets do that now */
3922         if (btrfs_is_leaf(root->node))
3923                 status = btrfs_check_leaf(root, NULL, root->node);
3924         else
3925                 status = btrfs_check_node(root, NULL, root->node);
3926         if (status != BTRFS_TREE_BLOCK_CLEAN)
3927                 return -EIO;
3928
3929         if (btrfs_root_refs(root_item) > 0 ||
3930             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3931                 path.nodes[level] = root->node;
3932                 extent_buffer_get(root->node);
3933                 path.slots[level] = 0;
3934         } else {
3935                 struct btrfs_key key;
3936                 struct btrfs_disk_key found_key;
3937
3938                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3939                 level = root_item->drop_level;
3940                 path.lowest_level = level;
3941                 if (level > btrfs_header_level(root->node) ||
3942                     level >= BTRFS_MAX_LEVEL) {
3943                         error("ignoring invalid drop level: %u", level);
3944                         goto skip_walking;
3945                 }
3946                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3947                 if (wret < 0)
3948                         goto skip_walking;
3949                 btrfs_node_key(path.nodes[level], &found_key,
3950                                 path.slots[level]);
3951                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3952                                         sizeof(found_key)));
3953         }
3954
3955         while (1) {
3956                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3957                 if (wret < 0)
3958                         ret = wret;
3959                 if (wret != 0)
3960                         break;
3961
3962                 wret = walk_up_tree(root, &path, wc, &level);
3963                 if (wret < 0)
3964                         ret = wret;
3965                 if (wret != 0)
3966                         break;
3967         }
3968 skip_walking:
3969         btrfs_release_path(&path);
3970
3971         if (!cache_tree_empty(&corrupt_blocks)) {
3972                 struct cache_extent *cache;
3973                 struct btrfs_corrupt_block *corrupt;
3974
3975                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3976                        root->root_key.objectid);
3977                 cache = first_cache_extent(&corrupt_blocks);
3978                 while (cache) {
3979                         corrupt = container_of(cache,
3980                                                struct btrfs_corrupt_block,
3981                                                cache);
3982                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3983                                cache->start, corrupt->level,
3984                                corrupt->key.objectid, corrupt->key.type,
3985                                corrupt->key.offset);
3986                         cache = next_cache_extent(cache);
3987                 }
3988                 if (repair) {
3989                         printf("Try to repair the btree for root %llu\n",
3990                                root->root_key.objectid);
3991                         ret = repair_btree(root, &corrupt_blocks);
3992                         if (ret < 0)
3993                                 fprintf(stderr, "Failed to repair btree: %s\n",
3994                                         strerror(-ret));
3995                         if (!ret)
3996                                 printf("Btree for root %llu is fixed\n",
3997                                        root->root_key.objectid);
3998                 }
3999         }
4000
4001         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4002         if (err < 0)
4003                 ret = err;
4004
4005         if (root_node.current) {
4006                 root_node.current->checked = 1;
4007                 maybe_free_inode_rec(&root_node.inode_cache,
4008                                 root_node.current);
4009         }
4010
4011         err = check_inode_recs(root, &root_node.inode_cache);
4012         if (!ret)
4013                 ret = err;
4014
4015         free_corrupt_blocks_tree(&corrupt_blocks);
4016         root->fs_info->corrupt_blocks = NULL;
4017         free_orphan_data_extents(&root->orphan_data_extents);
4018         return ret;
4019 }
4020
4021 static int fs_root_objectid(u64 objectid)
4022 {
4023         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4024             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4025                 return 1;
4026         return is_fstree(objectid);
4027 }
4028
4029 static int check_fs_roots(struct btrfs_root *root,
4030                           struct cache_tree *root_cache)
4031 {
4032         struct btrfs_path path;
4033         struct btrfs_key key;
4034         struct walk_control wc;
4035         struct extent_buffer *leaf, *tree_node;
4036         struct btrfs_root *tmp_root;
4037         struct btrfs_root *tree_root = root->fs_info->tree_root;
4038         int ret;
4039         int err = 0;
4040
4041         if (ctx.progress_enabled) {
4042                 ctx.tp = TASK_FS_ROOTS;
4043                 task_start(ctx.info);
4044         }
4045
4046         /*
4047          * Just in case we made any changes to the extent tree that weren't
4048          * reflected into the free space cache yet.
4049          */
4050         if (repair)
4051                 reset_cached_block_groups(root->fs_info);
4052         memset(&wc, 0, sizeof(wc));
4053         cache_tree_init(&wc.shared);
4054         btrfs_init_path(&path);
4055
4056 again:
4057         key.offset = 0;
4058         key.objectid = 0;
4059         key.type = BTRFS_ROOT_ITEM_KEY;
4060         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4061         if (ret < 0) {
4062                 err = 1;
4063                 goto out;
4064         }
4065         tree_node = tree_root->node;
4066         while (1) {
4067                 if (tree_node != tree_root->node) {
4068                         free_root_recs_tree(root_cache);
4069                         btrfs_release_path(&path);
4070                         goto again;
4071                 }
4072                 leaf = path.nodes[0];
4073                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4074                         ret = btrfs_next_leaf(tree_root, &path);
4075                         if (ret) {
4076                                 if (ret < 0)
4077                                         err = 1;
4078                                 break;
4079                         }
4080                         leaf = path.nodes[0];
4081                 }
4082                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4083                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4084                     fs_root_objectid(key.objectid)) {
4085                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4086                                 tmp_root = btrfs_read_fs_root_no_cache(
4087                                                 root->fs_info, &key);
4088                         } else {
4089                                 key.offset = (u64)-1;
4090                                 tmp_root = btrfs_read_fs_root(
4091                                                 root->fs_info, &key);
4092                         }
4093                         if (IS_ERR(tmp_root)) {
4094                                 err = 1;
4095                                 goto next;
4096                         }
4097                         ret = check_fs_root(tmp_root, root_cache, &wc);
4098                         if (ret == -EAGAIN) {
4099                                 free_root_recs_tree(root_cache);
4100                                 btrfs_release_path(&path);
4101                                 goto again;
4102                         }
4103                         if (ret)
4104                                 err = 1;
4105                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4106                                 btrfs_free_fs_root(tmp_root);
4107                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4108                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4109                         process_root_ref(leaf, path.slots[0], &key,
4110                                          root_cache);
4111                 }
4112 next:
4113                 path.slots[0]++;
4114         }
4115 out:
4116         btrfs_release_path(&path);
4117         if (err)
4118                 free_extent_cache_tree(&wc.shared);
4119         if (!cache_tree_empty(&wc.shared))
4120                 fprintf(stderr, "warning line %d\n", __LINE__);
4121
4122         task_stop(ctx.info);
4123
4124         return err;
4125 }
4126
4127 /*
4128  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4129  * INODE_REF/INODE_EXTREF match.
4130  *
4131  * @root:       the root of the fs/file tree
4132  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4133  * @key:        the key of the DIR_ITEM/DIR_INDEX
4134  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4135  *              distinguish root_dir between normal dir/file
4136  * @name:       the name in the INODE_REF/INODE_EXTREF
4137  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4138  * @mode:       the st_mode of INODE_ITEM
4139  *
4140  * Return 0 if no error occurred.
4141  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4142  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4143  * dir/file.
4144  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4145  * not match for normal dir/file.
4146  */
4147 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4148                          struct btrfs_key *key, u64 index, char *name,
4149                          u32 namelen, u32 mode)
4150 {
4151         struct btrfs_path path;
4152         struct extent_buffer *node;
4153         struct btrfs_dir_item *di;
4154         struct btrfs_key location;
4155         char namebuf[BTRFS_NAME_LEN] = {0};
4156         u32 total;
4157         u32 cur = 0;
4158         u32 len;
4159         u32 name_len;
4160         u32 data_len;
4161         u8 filetype;
4162         int slot;
4163         int ret;
4164
4165         btrfs_init_path(&path);
4166         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4167         if (ret < 0) {
4168                 ret = DIR_ITEM_MISSING;
4169                 goto out;
4170         }
4171
4172         /* Process root dir and goto out*/
4173         if (index == 0) {
4174                 if (ret == 0) {
4175                         ret = ROOT_DIR_ERROR;
4176                         error(
4177                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4178                                 root->objectid,
4179                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4180                                         "REF" : "EXTREF",
4181                                 ref_key->objectid, ref_key->offset,
4182                                 key->type == BTRFS_DIR_ITEM_KEY ?
4183                                         "DIR_ITEM" : "DIR_INDEX");
4184                 } else {
4185                         ret = 0;
4186                 }
4187
4188                 goto out;
4189         }
4190
4191         /* Process normal file/dir */
4192         if (ret > 0) {
4193                 ret = DIR_ITEM_MISSING;
4194                 error(
4195                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4196                         root->objectid,
4197                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4198                         ref_key->objectid, ref_key->offset,
4199                         key->type == BTRFS_DIR_ITEM_KEY ?
4200                                 "DIR_ITEM" : "DIR_INDEX",
4201                         key->objectid, key->offset, namelen, name,
4202                         imode_to_type(mode));
4203                 goto out;
4204         }
4205
4206         /* Check whether inode_id/filetype/name match */
4207         node = path.nodes[0];
4208         slot = path.slots[0];
4209         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4210         total = btrfs_item_size_nr(node, slot);
4211         while (cur < total) {
4212                 ret = DIR_ITEM_MISMATCH;
4213                 name_len = btrfs_dir_name_len(node, di);
4214                 data_len = btrfs_dir_data_len(node, di);
4215
4216                 btrfs_dir_item_key_to_cpu(node, di, &location);
4217                 if (location.objectid != ref_key->objectid ||
4218                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4219                     location.offset != 0)
4220                         goto next;
4221
4222                 filetype = btrfs_dir_type(node, di);
4223                 if (imode_to_type(mode) != filetype)
4224                         goto next;
4225
4226                 if (name_len <= BTRFS_NAME_LEN) {
4227                         len = name_len;
4228                 } else {
4229                         len = BTRFS_NAME_LEN;
4230                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4231                         root->objectid,
4232                         key->type == BTRFS_DIR_ITEM_KEY ?
4233                         "DIR_ITEM" : "DIR_INDEX",
4234                         key->objectid, key->offset, name_len);
4235                 }
4236                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4237                 if (len != namelen || strncmp(namebuf, name, len))
4238                         goto next;
4239
4240                 ret = 0;
4241                 goto out;
4242 next:
4243                 len = sizeof(*di) + name_len + data_len;
4244                 di = (struct btrfs_dir_item *)((char *)di + len);
4245                 cur += len;
4246         }
4247         if (ret == DIR_ITEM_MISMATCH)
4248                 error(
4249                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4250                         root->objectid,
4251                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4252                         ref_key->objectid, ref_key->offset,
4253                         key->type == BTRFS_DIR_ITEM_KEY ?
4254                                 "DIR_ITEM" : "DIR_INDEX",
4255                         key->objectid, key->offset, namelen, name,
4256                         imode_to_type(mode));
4257 out:
4258         btrfs_release_path(&path);
4259         return ret;
4260 }
4261
4262 /*
4263  * Traverse the given INODE_REF and call find_dir_item() to find related
4264  * DIR_ITEM/DIR_INDEX.
4265  *
4266  * @root:       the root of the fs/file tree
4267  * @ref_key:    the key of the INODE_REF
4268  * @refs:       the count of INODE_REF
4269  * @mode:       the st_mode of INODE_ITEM
4270  *
4271  * Return 0 if no error occurred.
4272  */
4273 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4274                            struct extent_buffer *node, int slot, u64 *refs,
4275                            int mode)
4276 {
4277         struct btrfs_key key;
4278         struct btrfs_inode_ref *ref;
4279         char namebuf[BTRFS_NAME_LEN] = {0};
4280         u32 total;
4281         u32 cur = 0;
4282         u32 len;
4283         u32 name_len;
4284         u64 index;
4285         int ret, err = 0;
4286
4287         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4288         total = btrfs_item_size_nr(node, slot);
4289
4290 next:
4291         /* Update inode ref count */
4292         (*refs)++;
4293
4294         index = btrfs_inode_ref_index(node, ref);
4295         name_len = btrfs_inode_ref_name_len(node, ref);
4296         if (name_len <= BTRFS_NAME_LEN) {
4297                 len = name_len;
4298         } else {
4299                 len = BTRFS_NAME_LEN;
4300                 warning("root %llu INODE_REF[%llu %llu] name too long",
4301                         root->objectid, ref_key->objectid, ref_key->offset);
4302         }
4303
4304         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4305
4306         /* Check root dir ref name */
4307         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4308                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4309                       root->objectid, ref_key->objectid, ref_key->offset,
4310                       namebuf);
4311                 err |= ROOT_DIR_ERROR;
4312         }
4313
4314         /* Find related DIR_INDEX */
4315         key.objectid = ref_key->offset;
4316         key.type = BTRFS_DIR_INDEX_KEY;
4317         key.offset = index;
4318         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4319         err |= ret;
4320
4321         /* Find related dir_item */
4322         key.objectid = ref_key->offset;
4323         key.type = BTRFS_DIR_ITEM_KEY;
4324         key.offset = btrfs_name_hash(namebuf, len);
4325         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4326         err |= ret;
4327
4328         len = sizeof(*ref) + name_len;
4329         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4330         cur += len;
4331         if (cur < total)
4332                 goto next;
4333
4334         return err;
4335 }
4336
4337 /*
4338  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4339  * DIR_ITEM/DIR_INDEX.
4340  *
4341  * @root:       the root of the fs/file tree
4342  * @ref_key:    the key of the INODE_EXTREF
4343  * @refs:       the count of INODE_EXTREF
4344  * @mode:       the st_mode of INODE_ITEM
4345  *
4346  * Return 0 if no error occurred.
4347  */
4348 static int check_inode_extref(struct btrfs_root *root,
4349                               struct btrfs_key *ref_key,
4350                               struct extent_buffer *node, int slot, u64 *refs,
4351                               int mode)
4352 {
4353         struct btrfs_key key;
4354         struct btrfs_inode_extref *extref;
4355         char namebuf[BTRFS_NAME_LEN] = {0};
4356         u32 total;
4357         u32 cur = 0;
4358         u32 len;
4359         u32 name_len;
4360         u64 index;
4361         u64 parent;
4362         int ret;
4363         int err = 0;
4364
4365         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4366         total = btrfs_item_size_nr(node, slot);
4367
4368 next:
4369         /* update inode ref count */
4370         (*refs)++;
4371         name_len = btrfs_inode_extref_name_len(node, extref);
4372         index = btrfs_inode_extref_index(node, extref);
4373         parent = btrfs_inode_extref_parent(node, extref);
4374         if (name_len <= BTRFS_NAME_LEN) {
4375                 len = name_len;
4376         } else {
4377                 len = BTRFS_NAME_LEN;
4378                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4379                         root->objectid, ref_key->objectid, ref_key->offset);
4380         }
4381         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4382
4383         /* Check root dir ref name */
4384         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4385                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4386                       root->objectid, ref_key->objectid, ref_key->offset,
4387                       namebuf);
4388                 err |= ROOT_DIR_ERROR;
4389         }
4390
4391         /* find related dir_index */
4392         key.objectid = parent;
4393         key.type = BTRFS_DIR_INDEX_KEY;
4394         key.offset = index;
4395         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4396         err |= ret;
4397
4398         /* find related dir_item */
4399         key.objectid = parent;
4400         key.type = BTRFS_DIR_ITEM_KEY;
4401         key.offset = btrfs_name_hash(namebuf, len);
4402         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4403         err |= ret;
4404
4405         len = sizeof(*extref) + name_len;
4406         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4407         cur += len;
4408
4409         if (cur < total)
4410                 goto next;
4411
4412         return err;
4413 }
4414
4415 /*
4416  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4417  * DIR_ITEM/DIR_INDEX match.
4418  *
4419  * @root:       the root of the fs/file tree
4420  * @key:        the key of the INODE_REF/INODE_EXTREF
4421  * @name:       the name in the INODE_REF/INODE_EXTREF
4422  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4423  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4424  * to (u64)-1
4425  * @ext_ref:    the EXTENDED_IREF feature
4426  *
4427  * Return 0 if no error occurred.
4428  * Return >0 for error bitmap
4429  */
4430 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4431                           char *name, int namelen, u64 index,
4432                           unsigned int ext_ref)
4433 {
4434         struct btrfs_path path;
4435         struct btrfs_inode_ref *ref;
4436         struct btrfs_inode_extref *extref;
4437         struct extent_buffer *node;
4438         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4439         u32 total;
4440         u32 cur = 0;
4441         u32 len;
4442         u32 ref_namelen;
4443         u64 ref_index;
4444         u64 parent;
4445         u64 dir_id;
4446         int slot;
4447         int ret;
4448
4449         btrfs_init_path(&path);
4450         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4451         if (ret) {
4452                 ret = INODE_REF_MISSING;
4453                 goto extref;
4454         }
4455
4456         node = path.nodes[0];
4457         slot = path.slots[0];
4458
4459         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4460         total = btrfs_item_size_nr(node, slot);
4461
4462         /* Iterate all entry of INODE_REF */
4463         while (cur < total) {
4464                 ret = INODE_REF_MISSING;
4465
4466                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4467                 ref_index = btrfs_inode_ref_index(node, ref);
4468                 if (index != (u64)-1 && index != ref_index)
4469                         goto next_ref;
4470
4471                 if (ref_namelen <= BTRFS_NAME_LEN) {
4472                         len = ref_namelen;
4473                 } else {
4474                         len = BTRFS_NAME_LEN;
4475                         warning("root %llu INODE %s[%llu %llu] name too long",
4476                                 root->objectid,
4477                                 key->type == BTRFS_INODE_REF_KEY ?
4478                                         "REF" : "EXTREF",
4479                                 key->objectid, key->offset);
4480                 }
4481                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4482                                    len);
4483
4484                 if (len != namelen || strncmp(ref_namebuf, name, len))
4485                         goto next_ref;
4486
4487                 ret = 0;
4488                 goto out;
4489 next_ref:
4490                 len = sizeof(*ref) + ref_namelen;
4491                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4492                 cur += len;
4493         }
4494
4495 extref:
4496         /* Skip if not support EXTENDED_IREF feature */
4497         if (!ext_ref)
4498                 goto out;
4499
4500         btrfs_release_path(&path);
4501         btrfs_init_path(&path);
4502
4503         dir_id = key->offset;
4504         key->type = BTRFS_INODE_EXTREF_KEY;
4505         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4506
4507         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4508         if (ret) {
4509                 ret = INODE_REF_MISSING;
4510                 goto out;
4511         }
4512
4513         node = path.nodes[0];
4514         slot = path.slots[0];
4515
4516         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4517         cur = 0;
4518         total = btrfs_item_size_nr(node, slot);
4519
4520         /* Iterate all entry of INODE_EXTREF */
4521         while (cur < total) {
4522                 ret = INODE_REF_MISSING;
4523
4524                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4525                 ref_index = btrfs_inode_extref_index(node, extref);
4526                 parent = btrfs_inode_extref_parent(node, extref);
4527                 if (index != (u64)-1 && index != ref_index)
4528                         goto next_extref;
4529
4530                 if (parent != dir_id)
4531                         goto next_extref;
4532
4533                 if (ref_namelen <= BTRFS_NAME_LEN) {
4534                         len = ref_namelen;
4535                 } else {
4536                         len = BTRFS_NAME_LEN;
4537                         warning("root %llu INODE %s[%llu %llu] name too long",
4538                                 root->objectid,
4539                                 key->type == BTRFS_INODE_REF_KEY ?
4540                                         "REF" : "EXTREF",
4541                                 key->objectid, key->offset);
4542                 }
4543                 read_extent_buffer(node, ref_namebuf,
4544                                    (unsigned long)(extref + 1), len);
4545
4546                 if (len != namelen || strncmp(ref_namebuf, name, len))
4547                         goto next_extref;
4548
4549                 ret = 0;
4550                 goto out;
4551
4552 next_extref:
4553                 len = sizeof(*extref) + ref_namelen;
4554                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4555                 cur += len;
4556
4557         }
4558 out:
4559         btrfs_release_path(&path);
4560         return ret;
4561 }
4562
4563 /*
4564  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4565  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4566  *
4567  * @root:       the root of the fs/file tree
4568  * @key:        the key of the INODE_REF/INODE_EXTREF
4569  * @size:       the st_size of the INODE_ITEM
4570  * @ext_ref:    the EXTENDED_IREF feature
4571  *
4572  * Return 0 if no error occurred.
4573  */
4574 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4575                           struct extent_buffer *node, int slot, u64 *size,
4576                           unsigned int ext_ref)
4577 {
4578         struct btrfs_dir_item *di;
4579         struct btrfs_inode_item *ii;
4580         struct btrfs_path path;
4581         struct btrfs_key location;
4582         char namebuf[BTRFS_NAME_LEN] = {0};
4583         u32 total;
4584         u32 cur = 0;
4585         u32 len;
4586         u32 name_len;
4587         u32 data_len;
4588         u8 filetype;
4589         u32 mode;
4590         u64 index;
4591         int ret;
4592         int err = 0;
4593
4594         /*
4595          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4596          * ignore index check.
4597          */
4598         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4599
4600         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4601         total = btrfs_item_size_nr(node, slot);
4602
4603         while (cur < total) {
4604                 data_len = btrfs_dir_data_len(node, di);
4605                 if (data_len)
4606                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4607                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4608                               "DIR_ITEM" : "DIR_INDEX",
4609                               key->objectid, key->offset, data_len);
4610
4611                 name_len = btrfs_dir_name_len(node, di);
4612                 if (name_len <= BTRFS_NAME_LEN) {
4613                         len = name_len;
4614                 } else {
4615                         len = BTRFS_NAME_LEN;
4616                         warning("root %llu %s[%llu %llu] name too long",
4617                                 root->objectid,
4618                                 key->type == BTRFS_DIR_ITEM_KEY ?
4619                                 "DIR_ITEM" : "DIR_INDEX",
4620                                 key->objectid, key->offset);
4621                 }
4622                 (*size) += name_len;
4623
4624                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4625                 filetype = btrfs_dir_type(node, di);
4626
4627                 btrfs_init_path(&path);
4628                 btrfs_dir_item_key_to_cpu(node, di, &location);
4629
4630                 /* Ignore related ROOT_ITEM check */
4631                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4632                         goto next;
4633
4634                 /* Check relative INODE_ITEM(existence/filetype) */
4635                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4636                 if (ret) {
4637                         err |= INODE_ITEM_MISSING;
4638                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4639                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4640                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4641                               key->offset, location.objectid, name_len,
4642                               namebuf, filetype);
4643                         goto next;
4644                 }
4645
4646                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4647                                     struct btrfs_inode_item);
4648                 mode = btrfs_inode_mode(path.nodes[0], ii);
4649
4650                 if (imode_to_type(mode) != filetype) {
4651                         err |= INODE_ITEM_MISMATCH;
4652                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4653                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4654                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4655                               key->offset, name_len, namebuf, filetype);
4656                 }
4657
4658                 /* Check relative INODE_REF/INODE_EXTREF */
4659                 location.type = BTRFS_INODE_REF_KEY;
4660                 location.offset = key->objectid;
4661                 ret = find_inode_ref(root, &location, namebuf, len,
4662                                        index, ext_ref);
4663                 err |= ret;
4664                 if (ret & INODE_REF_MISSING)
4665                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4666                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4667                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4668                               key->offset, name_len, namebuf, filetype);
4669
4670 next:
4671                 btrfs_release_path(&path);
4672                 len = sizeof(*di) + name_len + data_len;
4673                 di = (struct btrfs_dir_item *)((char *)di + len);
4674                 cur += len;
4675
4676                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4677                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4678                               root->objectid, key->objectid, key->offset);
4679                         break;
4680                 }
4681         }
4682
4683         return err;
4684 }
4685
4686 /*
4687  * Check file extent datasum/hole, update the size of the file extents,
4688  * check and update the last offset of the file extent.
4689  *
4690  * @root:       the root of fs/file tree.
4691  * @fkey:       the key of the file extent.
4692  * @nodatasum:  INODE_NODATASUM feature.
4693  * @size:       the sum of all EXTENT_DATA items size for this inode.
4694  * @end:        the offset of the last extent.
4695  *
4696  * Return 0 if no error occurred.
4697  */
4698 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4699                              struct extent_buffer *node, int slot,
4700                              unsigned int nodatasum, u64 *size, u64 *end)
4701 {
4702         struct btrfs_file_extent_item *fi;
4703         u64 disk_bytenr;
4704         u64 disk_num_bytes;
4705         u64 extent_num_bytes;
4706         u64 found;
4707         unsigned int extent_type;
4708         unsigned int is_hole;
4709         int ret;
4710         int err = 0;
4711
4712         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4713
4714         extent_type = btrfs_file_extent_type(node, fi);
4715         /* Skip if file extent is inline */
4716         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717                 struct btrfs_item *e = btrfs_item_nr(slot);
4718                 u32 item_inline_len;
4719
4720                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722                 if (extent_num_bytes == 0 ||
4723                     extent_num_bytes != item_inline_len)
4724                         err |= FILE_EXTENT_ERROR;
4725                 *size += extent_num_bytes;
4726                 return err;
4727         }
4728
4729         /* Check extent type */
4730         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4731                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4732                 err |= FILE_EXTENT_ERROR;
4733                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4734                       root->objectid, fkey->objectid, fkey->offset);
4735                 return err;
4736         }
4737
4738         /* Check REG_EXTENT/PREALLOC_EXTENT */
4739         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4740         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4741         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4742         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4743
4744         /* Check EXTENT_DATA datasum */
4745         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4746         if (found > 0 && nodatasum) {
4747                 err |= ODD_CSUM_ITEM;
4748                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4749                       root->objectid, fkey->objectid, fkey->offset);
4750         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4751                    !is_hole &&
4752                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4753                 err |= CSUM_ITEM_MISSING;
4754                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4757                 err |= ODD_CSUM_ITEM;
4758                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4759                       root->objectid, fkey->objectid, fkey->offset);
4760         }
4761
4762         /* Check EXTENT_DATA hole */
4763         if (no_holes && is_hole) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         } else if (!no_holes && *end != fkey->offset) {
4768                 err |= FILE_EXTENT_ERROR;
4769                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4770                       root->objectid, fkey->objectid, fkey->offset);
4771         }
4772
4773         *end += extent_num_bytes;
4774         if (!is_hole)
4775                 *size += extent_num_bytes;
4776
4777         return err;
4778 }
4779
4780 /*
4781  * Check INODE_ITEM and related ITEMs (the same inode number)
4782  * 1. check link count
4783  * 2. check inode ref/extref
4784  * 3. check dir item/index
4785  *
4786  * @ext_ref:    the EXTENDED_IREF feature
4787  *
4788  * Return 0 if no error occurred.
4789  * Return >0 for error or hit the traversal is done(by error bitmap)
4790  */
4791 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4792                             unsigned int ext_ref)
4793 {
4794         struct extent_buffer *node;
4795         struct btrfs_inode_item *ii;
4796         struct btrfs_key key;
4797         u64 inode_id;
4798         u32 mode;
4799         u64 nlink;
4800         u64 nbytes;
4801         u64 isize;
4802         u64 size = 0;
4803         u64 refs = 0;
4804         u64 extent_end = 0;
4805         u64 extent_size = 0;
4806         unsigned int dir;
4807         unsigned int nodatasum;
4808         int slot;
4809         int ret;
4810         int err = 0;
4811
4812         node = path->nodes[0];
4813         slot = path->slots[0];
4814
4815         btrfs_item_key_to_cpu(node, &key, slot);
4816         inode_id = key.objectid;
4817
4818         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4819                 ret = btrfs_next_item(root, path);
4820                 if (ret > 0)
4821                         err |= LAST_ITEM;
4822                 return err;
4823         }
4824
4825         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4826         isize = btrfs_inode_size(node, ii);
4827         nbytes = btrfs_inode_nbytes(node, ii);
4828         mode = btrfs_inode_mode(node, ii);
4829         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4830         nlink = btrfs_inode_nlink(node, ii);
4831         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4832
4833         while (1) {
4834                 ret = btrfs_next_item(root, path);
4835                 if (ret < 0) {
4836                         /* out will fill 'err' rusing current statistics */
4837                         goto out;
4838                 } else if (ret > 0) {
4839                         err |= LAST_ITEM;
4840                         goto out;
4841                 }
4842
4843                 node = path->nodes[0];
4844                 slot = path->slots[0];
4845                 btrfs_item_key_to_cpu(node, &key, slot);
4846                 if (key.objectid != inode_id)
4847                         goto out;
4848
4849                 switch (key.type) {
4850                 case BTRFS_INODE_REF_KEY:
4851                         ret = check_inode_ref(root, &key, node, slot, &refs,
4852                                               mode);
4853                         err |= ret;
4854                         break;
4855                 case BTRFS_INODE_EXTREF_KEY:
4856                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4857                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4858                                         root->objectid, key.objectid,
4859                                         key.offset);
4860                         ret = check_inode_extref(root, &key, node, slot, &refs,
4861                                                  mode);
4862                         err |= ret;
4863                         break;
4864                 case BTRFS_DIR_ITEM_KEY:
4865                 case BTRFS_DIR_INDEX_KEY:
4866                         if (!dir) {
4867                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4868                                         root->objectid, inode_id,
4869                                         imode_to_type(mode), key.objectid,
4870                                         key.offset);
4871                         }
4872                         ret = check_dir_item(root, &key, node, slot, &size,
4873                                              ext_ref);
4874                         err |= ret;
4875                         break;
4876                 case BTRFS_EXTENT_DATA_KEY:
4877                         if (dir) {
4878                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4879                                         root->objectid, inode_id, key.objectid,
4880                                         key.offset);
4881                         }
4882                         ret = check_file_extent(root, &key, node, slot,
4883                                                 nodatasum, &extent_size,
4884                                                 &extent_end);
4885                         err |= ret;
4886                         break;
4887                 case BTRFS_XATTR_ITEM_KEY:
4888                         break;
4889                 default:
4890                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4891                               key.objectid, key.type, key.offset);
4892                 }
4893         }
4894
4895 out:
4896         /* verify INODE_ITEM nlink/isize/nbytes */
4897         if (dir) {
4898                 if (nlink != 1) {
4899                         err |= LINK_COUNT_ERROR;
4900                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4901                               root->objectid, inode_id, nlink);
4902                 }
4903
4904                 /*
4905                  * Just a warning, as dir inode nbytes is just an
4906                  * instructive value.
4907                  */
4908                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4909                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4910                                 root->objectid, inode_id, root->nodesize);
4911                 }
4912
4913                 if (isize != size) {
4914                         err |= ISIZE_ERROR;
4915                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4916                               root->objectid, inode_id, isize, size);
4917                 }
4918         } else {
4919                 if (nlink != refs) {
4920                         err |= LINK_COUNT_ERROR;
4921                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4922                               root->objectid, inode_id, nlink, refs);
4923                 } else if (!nlink) {
4924                         err |= ORPHAN_ITEM;
4925                 }
4926
4927                 if (!nbytes && !no_holes && extent_end < isize) {
4928                         err |= NBYTES_ERROR;
4929                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4930                               root->objectid, inode_id, isize);
4931                 }
4932
4933                 if (nbytes != extent_size) {
4934                         err |= NBYTES_ERROR;
4935                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4936                               root->objectid, inode_id, nbytes, extent_size);
4937                 }
4938         }
4939
4940         return err;
4941 }
4942
4943 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4944 {
4945         struct btrfs_path path;
4946         struct btrfs_key key;
4947         int err = 0;
4948         int ret;
4949
4950         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4951         key.type = BTRFS_INODE_ITEM_KEY;
4952         key.offset = 0;
4953
4954         /* For root being dropped, we don't need to check first inode */
4955         if (btrfs_root_refs(&root->root_item) == 0 &&
4956             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4957             key.objectid)
4958                 return 0;
4959
4960         btrfs_init_path(&path);
4961
4962         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4963         if (ret < 0)
4964                 goto out;
4965         if (ret > 0) {
4966                 ret = 0;
4967                 err |= INODE_ITEM_MISSING;
4968         }
4969
4970         err |= check_inode_item(root, &path, ext_ref);
4971         err &= ~LAST_ITEM;
4972         if (err && !ret)
4973                 ret = -EIO;
4974 out:
4975         btrfs_release_path(&path);
4976         return ret;
4977 }
4978
4979 /*
4980  * Iterate all item on the tree and call check_inode_item() to check.
4981  *
4982  * @root:       the root of the tree to be checked.
4983  * @ext_ref:    the EXTENDED_IREF feature
4984  *
4985  * Return 0 if no error found.
4986  * Return <0 for error.
4987  */
4988 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4989 {
4990         struct btrfs_path path;
4991         struct node_refs nrefs;
4992         struct btrfs_root_item *root_item = &root->root_item;
4993         int ret, wret;
4994         int level;
4995
4996         /*
4997          * We need to manually check the first inode item(256)
4998          * As the following traversal function will only start from
4999          * the first inode item in the leaf, if inode item(256) is missing
5000          * we will just skip it forever.
5001          */
5002         ret = check_fs_first_inode(root, ext_ref);
5003         if (ret < 0)
5004                 return ret;
5005
5006         memset(&nrefs, 0, sizeof(nrefs));
5007         level = btrfs_header_level(root->node);
5008         btrfs_init_path(&path);
5009
5010         if (btrfs_root_refs(root_item) > 0 ||
5011             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5012                 path.nodes[level] = root->node;
5013                 path.slots[level] = 0;
5014                 extent_buffer_get(root->node);
5015         } else {
5016                 struct btrfs_key key;
5017
5018                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5019                 level = root_item->drop_level;
5020                 path.lowest_level = level;
5021                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5022                 if (ret < 0)
5023                         goto out;
5024                 ret = 0;
5025         }
5026
5027         while (1) {
5028                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5029                 if (wret < 0)
5030                         ret = wret;
5031                 if (wret != 0)
5032                         break;
5033
5034                 wret = walk_up_tree_v2(root, &path, &level);
5035                 if (wret < 0)
5036                         ret = wret;
5037                 if (wret != 0)
5038                         break;
5039         }
5040
5041 out:
5042         btrfs_release_path(&path);
5043         return ret;
5044 }
5045
5046 /*
5047  * Find the relative ref for root_ref and root_backref.
5048  *
5049  * @root:       the root of the root tree.
5050  * @ref_key:    the key of the root ref.
5051  *
5052  * Return 0 if no error occurred.
5053  */
5054 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5055                           struct extent_buffer *node, int slot)
5056 {
5057         struct btrfs_path path;
5058         struct btrfs_key key;
5059         struct btrfs_root_ref *ref;
5060         struct btrfs_root_ref *backref;
5061         char ref_name[BTRFS_NAME_LEN] = {0};
5062         char backref_name[BTRFS_NAME_LEN] = {0};
5063         u64 ref_dirid;
5064         u64 ref_seq;
5065         u32 ref_namelen;
5066         u64 backref_dirid;
5067         u64 backref_seq;
5068         u32 backref_namelen;
5069         u32 len;
5070         int ret;
5071         int err = 0;
5072
5073         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5074         ref_dirid = btrfs_root_ref_dirid(node, ref);
5075         ref_seq = btrfs_root_ref_sequence(node, ref);
5076         ref_namelen = btrfs_root_ref_name_len(node, ref);
5077
5078         if (ref_namelen <= BTRFS_NAME_LEN) {
5079                 len = ref_namelen;
5080         } else {
5081                 len = BTRFS_NAME_LEN;
5082                 warning("%s[%llu %llu] ref_name too long",
5083                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5084                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5085                         ref_key->offset);
5086         }
5087         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5088
5089         /* Find relative root_ref */
5090         key.objectid = ref_key->offset;
5091         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5092         key.offset = ref_key->objectid;
5093
5094         btrfs_init_path(&path);
5095         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5096         if (ret) {
5097                 err |= ROOT_REF_MISSING;
5098                 error("%s[%llu %llu] couldn't find relative ref",
5099                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5100                       "ROOT_REF" : "ROOT_BACKREF",
5101                       ref_key->objectid, ref_key->offset);
5102                 goto out;
5103         }
5104
5105         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5106                                  struct btrfs_root_ref);
5107         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5108         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5109         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5110
5111         if (backref_namelen <= BTRFS_NAME_LEN) {
5112                 len = backref_namelen;
5113         } else {
5114                 len = BTRFS_NAME_LEN;
5115                 warning("%s[%llu %llu] ref_name too long",
5116                         key.type == BTRFS_ROOT_REF_KEY ?
5117                         "ROOT_REF" : "ROOT_BACKREF",
5118                         key.objectid, key.offset);
5119         }
5120         read_extent_buffer(path.nodes[0], backref_name,
5121                            (unsigned long)(backref + 1), len);
5122
5123         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5124             ref_namelen != backref_namelen ||
5125             strncmp(ref_name, backref_name, len)) {
5126                 err |= ROOT_REF_MISMATCH;
5127                 error("%s[%llu %llu] mismatch relative ref",
5128                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5129                       "ROOT_REF" : "ROOT_BACKREF",
5130                       ref_key->objectid, ref_key->offset);
5131         }
5132 out:
5133         btrfs_release_path(&path);
5134         return err;
5135 }
5136
5137 /*
5138  * Check all fs/file tree in low_memory mode.
5139  *
5140  * 1. for fs tree root item, call check_fs_root_v2()
5141  * 2. for fs tree root ref/backref, call check_root_ref()
5142  *
5143  * Return 0 if no error occurred.
5144  */
5145 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5146 {
5147         struct btrfs_root *tree_root = fs_info->tree_root;
5148         struct btrfs_root *cur_root = NULL;
5149         struct btrfs_path path;
5150         struct btrfs_key key;
5151         struct extent_buffer *node;
5152         unsigned int ext_ref;
5153         int slot;
5154         int ret;
5155         int err = 0;
5156
5157         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5158
5159         btrfs_init_path(&path);
5160         key.objectid = BTRFS_FS_TREE_OBJECTID;
5161         key.offset = 0;
5162         key.type = BTRFS_ROOT_ITEM_KEY;
5163
5164         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5165         if (ret < 0) {
5166                 err = ret;
5167                 goto out;
5168         } else if (ret > 0) {
5169                 err = -ENOENT;
5170                 goto out;
5171         }
5172
5173         while (1) {
5174                 node = path.nodes[0];
5175                 slot = path.slots[0];
5176                 btrfs_item_key_to_cpu(node, &key, slot);
5177                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5178                         goto out;
5179                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5180                     fs_root_objectid(key.objectid)) {
5181                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5182                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5183                                                                        &key);
5184                         } else {
5185                                 key.offset = (u64)-1;
5186                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5187                         }
5188
5189                         if (IS_ERR(cur_root)) {
5190                                 error("Fail to read fs/subvol tree: %lld",
5191                                       key.objectid);
5192                                 err = -EIO;
5193                                 goto next;
5194                         }
5195
5196                         ret = check_fs_root_v2(cur_root, ext_ref);
5197                         err |= ret;
5198
5199                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5200                                 btrfs_free_fs_root(cur_root);
5201                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5202                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5203                         ret = check_root_ref(tree_root, &key, node, slot);
5204                         err |= ret;
5205                 }
5206 next:
5207                 ret = btrfs_next_item(tree_root, &path);
5208                 if (ret > 0)
5209                         goto out;
5210                 if (ret < 0) {
5211                         err = ret;
5212                         goto out;
5213                 }
5214         }
5215
5216 out:
5217         btrfs_release_path(&path);
5218         return err;
5219 }
5220
5221 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5222 {
5223         struct list_head *cur = rec->backrefs.next;
5224         struct extent_backref *back;
5225         struct tree_backref *tback;
5226         struct data_backref *dback;
5227         u64 found = 0;
5228         int err = 0;
5229
5230         while(cur != &rec->backrefs) {
5231                 back = to_extent_backref(cur);
5232                 cur = cur->next;
5233                 if (!back->found_extent_tree) {
5234                         err = 1;
5235                         if (!print_errs)
5236                                 goto out;
5237                         if (back->is_data) {
5238                                 dback = to_data_backref(back);
5239                                 fprintf(stderr, "Backref %llu %s %llu"
5240                                         " owner %llu offset %llu num_refs %lu"
5241                                         " not found in extent tree\n",
5242                                         (unsigned long long)rec->start,
5243                                         back->full_backref ?
5244                                         "parent" : "root",
5245                                         back->full_backref ?
5246                                         (unsigned long long)dback->parent:
5247                                         (unsigned long long)dback->root,
5248                                         (unsigned long long)dback->owner,
5249                                         (unsigned long long)dback->offset,
5250                                         (unsigned long)dback->num_refs);
5251                         } else {
5252                                 tback = to_tree_backref(back);
5253                                 fprintf(stderr, "Backref %llu parent %llu"
5254                                         " root %llu not found in extent tree\n",
5255                                         (unsigned long long)rec->start,
5256                                         (unsigned long long)tback->parent,
5257                                         (unsigned long long)tback->root);
5258                         }
5259                 }
5260                 if (!back->is_data && !back->found_ref) {
5261                         err = 1;
5262                         if (!print_errs)
5263                                 goto out;
5264                         tback = to_tree_backref(back);
5265                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5266                                 (unsigned long long)rec->start,
5267                                 back->full_backref ? "parent" : "root",
5268                                 back->full_backref ?
5269                                 (unsigned long long)tback->parent :
5270                                 (unsigned long long)tback->root, back);
5271                 }
5272                 if (back->is_data) {
5273                         dback = to_data_backref(back);
5274                         if (dback->found_ref != dback->num_refs) {
5275                                 err = 1;
5276                                 if (!print_errs)
5277                                         goto out;
5278                                 fprintf(stderr, "Incorrect local backref count"
5279                                         " on %llu %s %llu owner %llu"
5280                                         " offset %llu found %u wanted %u back %p\n",
5281                                         (unsigned long long)rec->start,
5282                                         back->full_backref ?
5283                                         "parent" : "root",
5284                                         back->full_backref ?
5285                                         (unsigned long long)dback->parent:
5286                                         (unsigned long long)dback->root,
5287                                         (unsigned long long)dback->owner,
5288                                         (unsigned long long)dback->offset,
5289                                         dback->found_ref, dback->num_refs, back);
5290                         }
5291                         if (dback->disk_bytenr != rec->start) {
5292                                 err = 1;
5293                                 if (!print_errs)
5294                                         goto out;
5295                                 fprintf(stderr, "Backref disk bytenr does not"
5296                                         " match extent record, bytenr=%llu, "
5297                                         "ref bytenr=%llu\n",
5298                                         (unsigned long long)rec->start,
5299                                         (unsigned long long)dback->disk_bytenr);
5300                         }
5301
5302                         if (dback->bytes != rec->nr) {
5303                                 err = 1;
5304                                 if (!print_errs)
5305                                         goto out;
5306                                 fprintf(stderr, "Backref bytes do not match "
5307                                         "extent backref, bytenr=%llu, ref "
5308                                         "bytes=%llu, backref bytes=%llu\n",
5309                                         (unsigned long long)rec->start,
5310                                         (unsigned long long)rec->nr,
5311                                         (unsigned long long)dback->bytes);
5312                         }
5313                 }
5314                 if (!back->is_data) {
5315                         found += 1;
5316                 } else {
5317                         dback = to_data_backref(back);
5318                         found += dback->found_ref;
5319                 }
5320         }
5321         if (found != rec->refs) {
5322                 err = 1;
5323                 if (!print_errs)
5324                         goto out;
5325                 fprintf(stderr, "Incorrect global backref count "
5326                         "on %llu found %llu wanted %llu\n",
5327                         (unsigned long long)rec->start,
5328                         (unsigned long long)found,
5329                         (unsigned long long)rec->refs);
5330         }
5331 out:
5332         return err;
5333 }
5334
5335 static int free_all_extent_backrefs(struct extent_record *rec)
5336 {
5337         struct extent_backref *back;
5338         struct list_head *cur;
5339         while (!list_empty(&rec->backrefs)) {
5340                 cur = rec->backrefs.next;
5341                 back = to_extent_backref(cur);
5342                 list_del(cur);
5343                 free(back);
5344         }
5345         return 0;
5346 }
5347
5348 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5349                                      struct cache_tree *extent_cache)
5350 {
5351         struct cache_extent *cache;
5352         struct extent_record *rec;
5353
5354         while (1) {
5355                 cache = first_cache_extent(extent_cache);
5356                 if (!cache)
5357                         break;
5358                 rec = container_of(cache, struct extent_record, cache);
5359                 remove_cache_extent(extent_cache, cache);
5360                 free_all_extent_backrefs(rec);
5361                 free(rec);
5362         }
5363 }
5364
5365 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5366                                  struct extent_record *rec)
5367 {
5368         if (rec->content_checked && rec->owner_ref_checked &&
5369             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5370             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5371             !rec->bad_full_backref && !rec->crossing_stripes &&
5372             !rec->wrong_chunk_type) {
5373                 remove_cache_extent(extent_cache, &rec->cache);
5374                 free_all_extent_backrefs(rec);
5375                 list_del_init(&rec->list);
5376                 free(rec);
5377         }
5378         return 0;
5379 }
5380
5381 static int check_owner_ref(struct btrfs_root *root,
5382                             struct extent_record *rec,
5383                             struct extent_buffer *buf)
5384 {
5385         struct extent_backref *node;
5386         struct tree_backref *back;
5387         struct btrfs_root *ref_root;
5388         struct btrfs_key key;
5389         struct btrfs_path path;
5390         struct extent_buffer *parent;
5391         int level;
5392         int found = 0;
5393         int ret;
5394
5395         list_for_each_entry(node, &rec->backrefs, list) {
5396                 if (node->is_data)
5397                         continue;
5398                 if (!node->found_ref)
5399                         continue;
5400                 if (node->full_backref)
5401                         continue;
5402                 back = to_tree_backref(node);
5403                 if (btrfs_header_owner(buf) == back->root)
5404                         return 0;
5405         }
5406         BUG_ON(rec->is_root);
5407
5408         /* try to find the block by search corresponding fs tree */
5409         key.objectid = btrfs_header_owner(buf);
5410         key.type = BTRFS_ROOT_ITEM_KEY;
5411         key.offset = (u64)-1;
5412
5413         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5414         if (IS_ERR(ref_root))
5415                 return 1;
5416
5417         level = btrfs_header_level(buf);
5418         if (level == 0)
5419                 btrfs_item_key_to_cpu(buf, &key, 0);
5420         else
5421                 btrfs_node_key_to_cpu(buf, &key, 0);
5422
5423         btrfs_init_path(&path);
5424         path.lowest_level = level + 1;
5425         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5426         if (ret < 0)
5427                 return 0;
5428
5429         parent = path.nodes[level + 1];
5430         if (parent && buf->start == btrfs_node_blockptr(parent,
5431                                                         path.slots[level + 1]))
5432                 found = 1;
5433
5434         btrfs_release_path(&path);
5435         return found ? 0 : 1;
5436 }
5437
5438 static int is_extent_tree_record(struct extent_record *rec)
5439 {
5440         struct list_head *cur = rec->backrefs.next;
5441         struct extent_backref *node;
5442         struct tree_backref *back;
5443         int is_extent = 0;
5444
5445         while(cur != &rec->backrefs) {
5446                 node = to_extent_backref(cur);
5447                 cur = cur->next;
5448                 if (node->is_data)
5449                         return 0;
5450                 back = to_tree_backref(node);
5451                 if (node->full_backref)
5452                         return 0;
5453                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5454                         is_extent = 1;
5455         }
5456         return is_extent;
5457 }
5458
5459
5460 static int record_bad_block_io(struct btrfs_fs_info *info,
5461                                struct cache_tree *extent_cache,
5462                                u64 start, u64 len)
5463 {
5464         struct extent_record *rec;
5465         struct cache_extent *cache;
5466         struct btrfs_key key;
5467
5468         cache = lookup_cache_extent(extent_cache, start, len);
5469         if (!cache)
5470                 return 0;
5471
5472         rec = container_of(cache, struct extent_record, cache);
5473         if (!is_extent_tree_record(rec))
5474                 return 0;
5475
5476         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5477         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5478 }
5479
5480 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5481                        struct extent_buffer *buf, int slot)
5482 {
5483         if (btrfs_header_level(buf)) {
5484                 struct btrfs_key_ptr ptr1, ptr2;
5485
5486                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5487                                    sizeof(struct btrfs_key_ptr));
5488                 read_extent_buffer(buf, &ptr2,
5489                                    btrfs_node_key_ptr_offset(slot + 1),
5490                                    sizeof(struct btrfs_key_ptr));
5491                 write_extent_buffer(buf, &ptr1,
5492                                     btrfs_node_key_ptr_offset(slot + 1),
5493                                     sizeof(struct btrfs_key_ptr));
5494                 write_extent_buffer(buf, &ptr2,
5495                                     btrfs_node_key_ptr_offset(slot),
5496                                     sizeof(struct btrfs_key_ptr));
5497                 if (slot == 0) {
5498                         struct btrfs_disk_key key;
5499                         btrfs_node_key(buf, &key, 0);
5500                         btrfs_fixup_low_keys(root, path, &key,
5501                                              btrfs_header_level(buf) + 1);
5502                 }
5503         } else {
5504                 struct btrfs_item *item1, *item2;
5505                 struct btrfs_key k1, k2;
5506                 char *item1_data, *item2_data;
5507                 u32 item1_offset, item2_offset, item1_size, item2_size;
5508
5509                 item1 = btrfs_item_nr(slot);
5510                 item2 = btrfs_item_nr(slot + 1);
5511                 btrfs_item_key_to_cpu(buf, &k1, slot);
5512                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5513                 item1_offset = btrfs_item_offset(buf, item1);
5514                 item2_offset = btrfs_item_offset(buf, item2);
5515                 item1_size = btrfs_item_size(buf, item1);
5516                 item2_size = btrfs_item_size(buf, item2);
5517
5518                 item1_data = malloc(item1_size);
5519                 if (!item1_data)
5520                         return -ENOMEM;
5521                 item2_data = malloc(item2_size);
5522                 if (!item2_data) {
5523                         free(item1_data);
5524                         return -ENOMEM;
5525                 }
5526
5527                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5528                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5529
5530                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5531                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5532                 free(item1_data);
5533                 free(item2_data);
5534
5535                 btrfs_set_item_offset(buf, item1, item2_offset);
5536                 btrfs_set_item_offset(buf, item2, item1_offset);
5537                 btrfs_set_item_size(buf, item1, item2_size);
5538                 btrfs_set_item_size(buf, item2, item1_size);
5539
5540                 path->slots[0] = slot;
5541                 btrfs_set_item_key_unsafe(root, path, &k2);
5542                 path->slots[0] = slot + 1;
5543                 btrfs_set_item_key_unsafe(root, path, &k1);
5544         }
5545         return 0;
5546 }
5547
5548 static int fix_key_order(struct btrfs_trans_handle *trans,
5549                          struct btrfs_root *root,
5550                          struct btrfs_path *path)
5551 {
5552         struct extent_buffer *buf;
5553         struct btrfs_key k1, k2;
5554         int i;
5555         int level = path->lowest_level;
5556         int ret = -EIO;
5557
5558         buf = path->nodes[level];
5559         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5560                 if (level) {
5561                         btrfs_node_key_to_cpu(buf, &k1, i);
5562                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5563                 } else {
5564                         btrfs_item_key_to_cpu(buf, &k1, i);
5565                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5566                 }
5567                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5568                         continue;
5569                 ret = swap_values(root, path, buf, i);
5570                 if (ret)
5571                         break;
5572                 btrfs_mark_buffer_dirty(buf);
5573                 i = 0;
5574         }
5575         return ret;
5576 }
5577
5578 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5579                              struct btrfs_root *root,
5580                              struct btrfs_path *path,
5581                              struct extent_buffer *buf, int slot)
5582 {
5583         struct btrfs_key key;
5584         int nritems = btrfs_header_nritems(buf);
5585
5586         btrfs_item_key_to_cpu(buf, &key, slot);
5587
5588         /* These are all the keys we can deal with missing. */
5589         if (key.type != BTRFS_DIR_INDEX_KEY &&
5590             key.type != BTRFS_EXTENT_ITEM_KEY &&
5591             key.type != BTRFS_METADATA_ITEM_KEY &&
5592             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5593             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5594                 return -1;
5595
5596         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5597                (unsigned long long)key.objectid, key.type,
5598                (unsigned long long)key.offset, slot, buf->start);
5599         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5600                               btrfs_item_nr_offset(slot + 1),
5601                               sizeof(struct btrfs_item) *
5602                               (nritems - slot - 1));
5603         btrfs_set_header_nritems(buf, nritems - 1);
5604         if (slot == 0) {
5605                 struct btrfs_disk_key disk_key;
5606
5607                 btrfs_item_key(buf, &disk_key, 0);
5608                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5609         }
5610         btrfs_mark_buffer_dirty(buf);
5611         return 0;
5612 }
5613
5614 static int fix_item_offset(struct btrfs_trans_handle *trans,
5615                            struct btrfs_root *root,
5616                            struct btrfs_path *path)
5617 {
5618         struct extent_buffer *buf;
5619         int i;
5620         int ret = 0;
5621
5622         /* We should only get this for leaves */
5623         BUG_ON(path->lowest_level);
5624         buf = path->nodes[0];
5625 again:
5626         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5627                 unsigned int shift = 0, offset;
5628
5629                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5630                     BTRFS_LEAF_DATA_SIZE(root)) {
5631                         if (btrfs_item_end_nr(buf, i) >
5632                             BTRFS_LEAF_DATA_SIZE(root)) {
5633                                 ret = delete_bogus_item(trans, root, path,
5634                                                         buf, i);
5635                                 if (!ret)
5636                                         goto again;
5637                                 fprintf(stderr, "item is off the end of the "
5638                                         "leaf, can't fix\n");
5639                                 ret = -EIO;
5640                                 break;
5641                         }
5642                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5643                                 btrfs_item_end_nr(buf, i);
5644                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5645                            btrfs_item_offset_nr(buf, i - 1)) {
5646                         if (btrfs_item_end_nr(buf, i) >
5647                             btrfs_item_offset_nr(buf, i - 1)) {
5648                                 ret = delete_bogus_item(trans, root, path,
5649                                                         buf, i);
5650                                 if (!ret)
5651                                         goto again;
5652                                 fprintf(stderr, "items overlap, can't fix\n");
5653                                 ret = -EIO;
5654                                 break;
5655                         }
5656                         shift = btrfs_item_offset_nr(buf, i - 1) -
5657                                 btrfs_item_end_nr(buf, i);
5658                 }
5659                 if (!shift)
5660                         continue;
5661
5662                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5663                        i, shift, (unsigned long long)buf->start);
5664                 offset = btrfs_item_offset_nr(buf, i);
5665                 memmove_extent_buffer(buf,
5666                                       btrfs_leaf_data(buf) + offset + shift,
5667                                       btrfs_leaf_data(buf) + offset,
5668                                       btrfs_item_size_nr(buf, i));
5669                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5670                                       offset + shift);
5671                 btrfs_mark_buffer_dirty(buf);
5672         }
5673
5674         /*
5675          * We may have moved things, in which case we want to exit so we don't
5676          * write those changes out.  Once we have proper abort functionality in
5677          * progs this can be changed to something nicer.
5678          */
5679         BUG_ON(ret);
5680         return ret;
5681 }
5682
5683 /*
5684  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5685  * then just return -EIO.
5686  */
5687 static int try_to_fix_bad_block(struct btrfs_root *root,
5688                                 struct extent_buffer *buf,
5689                                 enum btrfs_tree_block_status status)
5690 {
5691         struct btrfs_trans_handle *trans;
5692         struct ulist *roots;
5693         struct ulist_node *node;
5694         struct btrfs_root *search_root;
5695         struct btrfs_path path;
5696         struct ulist_iterator iter;
5697         struct btrfs_key root_key, key;
5698         int ret;
5699
5700         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5701             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5702                 return -EIO;
5703
5704         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5705         if (ret)
5706                 return -EIO;
5707
5708         btrfs_init_path(&path);
5709         ULIST_ITER_INIT(&iter);
5710         while ((node = ulist_next(roots, &iter))) {
5711                 root_key.objectid = node->val;
5712                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5713                 root_key.offset = (u64)-1;
5714
5715                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5716                 if (IS_ERR(root)) {
5717                         ret = -EIO;
5718                         break;
5719                 }
5720
5721
5722                 trans = btrfs_start_transaction(search_root, 0);
5723                 if (IS_ERR(trans)) {
5724                         ret = PTR_ERR(trans);
5725                         break;
5726                 }
5727
5728                 path.lowest_level = btrfs_header_level(buf);
5729                 path.skip_check_block = 1;
5730                 if (path.lowest_level)
5731                         btrfs_node_key_to_cpu(buf, &key, 0);
5732                 else
5733                         btrfs_item_key_to_cpu(buf, &key, 0);
5734                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5735                 if (ret) {
5736                         ret = -EIO;
5737                         btrfs_commit_transaction(trans, search_root);
5738                         break;
5739                 }
5740                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5741                         ret = fix_key_order(trans, search_root, &path);
5742                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5743                         ret = fix_item_offset(trans, search_root, &path);
5744                 if (ret) {
5745                         btrfs_commit_transaction(trans, search_root);
5746                         break;
5747                 }
5748                 btrfs_release_path(&path);
5749                 btrfs_commit_transaction(trans, search_root);
5750         }
5751         ulist_free(roots);
5752         btrfs_release_path(&path);
5753         return ret;
5754 }
5755
5756 static int check_block(struct btrfs_root *root,
5757                        struct cache_tree *extent_cache,
5758                        struct extent_buffer *buf, u64 flags)
5759 {
5760         struct extent_record *rec;
5761         struct cache_extent *cache;
5762         struct btrfs_key key;
5763         enum btrfs_tree_block_status status;
5764         int ret = 0;
5765         int level;
5766
5767         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5768         if (!cache)
5769                 return 1;
5770         rec = container_of(cache, struct extent_record, cache);
5771         rec->generation = btrfs_header_generation(buf);
5772
5773         level = btrfs_header_level(buf);
5774         if (btrfs_header_nritems(buf) > 0) {
5775
5776                 if (level == 0)
5777                         btrfs_item_key_to_cpu(buf, &key, 0);
5778                 else
5779                         btrfs_node_key_to_cpu(buf, &key, 0);
5780
5781                 rec->info_objectid = key.objectid;
5782         }
5783         rec->info_level = level;
5784
5785         if (btrfs_is_leaf(buf))
5786                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5787         else
5788                 status = btrfs_check_node(root, &rec->parent_key, buf);
5789
5790         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5791                 if (repair)
5792                         status = try_to_fix_bad_block(root, buf, status);
5793                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5794                         ret = -EIO;
5795                         fprintf(stderr, "bad block %llu\n",
5796                                 (unsigned long long)buf->start);
5797                 } else {
5798                         /*
5799                          * Signal to callers we need to start the scan over
5800                          * again since we'll have cowed blocks.
5801                          */
5802                         ret = -EAGAIN;
5803                 }
5804         } else {
5805                 rec->content_checked = 1;
5806                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5807                         rec->owner_ref_checked = 1;
5808                 else {
5809                         ret = check_owner_ref(root, rec, buf);
5810                         if (!ret)
5811                                 rec->owner_ref_checked = 1;
5812                 }
5813         }
5814         if (!ret)
5815                 maybe_free_extent_rec(extent_cache, rec);
5816         return ret;
5817 }
5818
5819 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5820                                                 u64 parent, u64 root)
5821 {
5822         struct list_head *cur = rec->backrefs.next;
5823         struct extent_backref *node;
5824         struct tree_backref *back;
5825
5826         while(cur != &rec->backrefs) {
5827                 node = to_extent_backref(cur);
5828                 cur = cur->next;
5829                 if (node->is_data)
5830                         continue;
5831                 back = to_tree_backref(node);
5832                 if (parent > 0) {
5833                         if (!node->full_backref)
5834                                 continue;
5835                         if (parent == back->parent)
5836                                 return back;
5837                 } else {
5838                         if (node->full_backref)
5839                                 continue;
5840                         if (back->root == root)
5841                                 return back;
5842                 }
5843         }
5844         return NULL;
5845 }
5846
5847 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5848                                                 u64 parent, u64 root)
5849 {
5850         struct tree_backref *ref = malloc(sizeof(*ref));
5851
5852         if (!ref)
5853                 return NULL;
5854         memset(&ref->node, 0, sizeof(ref->node));
5855         if (parent > 0) {
5856                 ref->parent = parent;
5857                 ref->node.full_backref = 1;
5858         } else {
5859                 ref->root = root;
5860                 ref->node.full_backref = 0;
5861         }
5862         list_add_tail(&ref->node.list, &rec->backrefs);
5863
5864         return ref;
5865 }
5866
5867 static struct data_backref *find_data_backref(struct extent_record *rec,
5868                                                 u64 parent, u64 root,
5869                                                 u64 owner, u64 offset,
5870                                                 int found_ref,
5871                                                 u64 disk_bytenr, u64 bytes)
5872 {
5873         struct list_head *cur = rec->backrefs.next;
5874         struct extent_backref *node;
5875         struct data_backref *back;
5876
5877         while(cur != &rec->backrefs) {
5878                 node = to_extent_backref(cur);
5879                 cur = cur->next;
5880                 if (!node->is_data)
5881                         continue;
5882                 back = to_data_backref(node);
5883                 if (parent > 0) {
5884                         if (!node->full_backref)
5885                                 continue;
5886                         if (parent == back->parent)
5887                                 return back;
5888                 } else {
5889                         if (node->full_backref)
5890                                 continue;
5891                         if (back->root == root && back->owner == owner &&
5892                             back->offset == offset) {
5893                                 if (found_ref && node->found_ref &&
5894                                     (back->bytes != bytes ||
5895                                     back->disk_bytenr != disk_bytenr))
5896                                         continue;
5897                                 return back;
5898                         }
5899                 }
5900         }
5901         return NULL;
5902 }
5903
5904 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5905                                                 u64 parent, u64 root,
5906                                                 u64 owner, u64 offset,
5907                                                 u64 max_size)
5908 {
5909         struct data_backref *ref = malloc(sizeof(*ref));
5910
5911         if (!ref)
5912                 return NULL;
5913         memset(&ref->node, 0, sizeof(ref->node));
5914         ref->node.is_data = 1;
5915
5916         if (parent > 0) {
5917                 ref->parent = parent;
5918                 ref->owner = 0;
5919                 ref->offset = 0;
5920                 ref->node.full_backref = 1;
5921         } else {
5922                 ref->root = root;
5923                 ref->owner = owner;
5924                 ref->offset = offset;
5925                 ref->node.full_backref = 0;
5926         }
5927         ref->bytes = max_size;
5928         ref->found_ref = 0;
5929         ref->num_refs = 0;
5930         list_add_tail(&ref->node.list, &rec->backrefs);
5931         if (max_size > rec->max_size)
5932                 rec->max_size = max_size;
5933         return ref;
5934 }
5935
5936 /* Check if the type of extent matches with its chunk */
5937 static void check_extent_type(struct extent_record *rec)
5938 {
5939         struct btrfs_block_group_cache *bg_cache;
5940
5941         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5942         if (!bg_cache)
5943                 return;
5944
5945         /* data extent, check chunk directly*/
5946         if (!rec->metadata) {
5947                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5948                         rec->wrong_chunk_type = 1;
5949                 return;
5950         }
5951
5952         /* metadata extent, check the obvious case first */
5953         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5954                                  BTRFS_BLOCK_GROUP_METADATA))) {
5955                 rec->wrong_chunk_type = 1;
5956                 return;
5957         }
5958
5959         /*
5960          * Check SYSTEM extent, as it's also marked as metadata, we can only
5961          * make sure it's a SYSTEM extent by its backref
5962          */
5963         if (!list_empty(&rec->backrefs)) {
5964                 struct extent_backref *node;
5965                 struct tree_backref *tback;
5966                 u64 bg_type;
5967
5968                 node = to_extent_backref(rec->backrefs.next);
5969                 if (node->is_data) {
5970                         /* tree block shouldn't have data backref */
5971                         rec->wrong_chunk_type = 1;
5972                         return;
5973                 }
5974                 tback = container_of(node, struct tree_backref, node);
5975
5976                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5977                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5978                 else
5979                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5980                 if (!(bg_cache->flags & bg_type))
5981                         rec->wrong_chunk_type = 1;
5982         }
5983 }
5984
5985 /*
5986  * Allocate a new extent record, fill default values from @tmpl and insert int
5987  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5988  * the cache, otherwise it fails.
5989  */
5990 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5991                 struct extent_record *tmpl)
5992 {
5993         struct extent_record *rec;
5994         int ret = 0;
5995
5996         rec = malloc(sizeof(*rec));
5997         if (!rec)
5998                 return -ENOMEM;
5999         rec->start = tmpl->start;
6000         rec->max_size = tmpl->max_size;
6001         rec->nr = max(tmpl->nr, tmpl->max_size);
6002         rec->found_rec = tmpl->found_rec;
6003         rec->content_checked = tmpl->content_checked;
6004         rec->owner_ref_checked = tmpl->owner_ref_checked;
6005         rec->num_duplicates = 0;
6006         rec->metadata = tmpl->metadata;
6007         rec->flag_block_full_backref = FLAG_UNSET;
6008         rec->bad_full_backref = 0;
6009         rec->crossing_stripes = 0;
6010         rec->wrong_chunk_type = 0;
6011         rec->is_root = tmpl->is_root;
6012         rec->refs = tmpl->refs;
6013         rec->extent_item_refs = tmpl->extent_item_refs;
6014         rec->parent_generation = tmpl->parent_generation;
6015         INIT_LIST_HEAD(&rec->backrefs);
6016         INIT_LIST_HEAD(&rec->dups);
6017         INIT_LIST_HEAD(&rec->list);
6018         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6019         rec->cache.start = tmpl->start;
6020         rec->cache.size = tmpl->nr;
6021         ret = insert_cache_extent(extent_cache, &rec->cache);
6022         if (ret) {
6023                 free(rec);
6024                 return ret;
6025         }
6026         bytes_used += rec->nr;
6027
6028         if (tmpl->metadata)
6029                 rec->crossing_stripes = check_crossing_stripes(global_info,
6030                                 rec->start, global_info->tree_root->nodesize);
6031         check_extent_type(rec);
6032         return ret;
6033 }
6034
6035 /*
6036  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6037  * some are hints:
6038  * - refs              - if found, increase refs
6039  * - is_root           - if found, set
6040  * - content_checked   - if found, set
6041  * - owner_ref_checked - if found, set
6042  *
6043  * If not found, create a new one, initialize and insert.
6044  */
6045 static int add_extent_rec(struct cache_tree *extent_cache,
6046                 struct extent_record *tmpl)
6047 {
6048         struct extent_record *rec;
6049         struct cache_extent *cache;
6050         int ret = 0;
6051         int dup = 0;
6052
6053         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6054         if (cache) {
6055                 rec = container_of(cache, struct extent_record, cache);
6056                 if (tmpl->refs)
6057                         rec->refs++;
6058                 if (rec->nr == 1)
6059                         rec->nr = max(tmpl->nr, tmpl->max_size);
6060
6061                 /*
6062                  * We need to make sure to reset nr to whatever the extent
6063                  * record says was the real size, this way we can compare it to
6064                  * the backrefs.
6065                  */
6066                 if (tmpl->found_rec) {
6067                         if (tmpl->start != rec->start || rec->found_rec) {
6068                                 struct extent_record *tmp;
6069
6070                                 dup = 1;
6071                                 if (list_empty(&rec->list))
6072                                         list_add_tail(&rec->list,
6073                                                       &duplicate_extents);
6074
6075                                 /*
6076                                  * We have to do this song and dance in case we
6077                                  * find an extent record that falls inside of
6078                                  * our current extent record but does not have
6079                                  * the same objectid.
6080                                  */
6081                                 tmp = malloc(sizeof(*tmp));
6082                                 if (!tmp)
6083                                         return -ENOMEM;
6084                                 tmp->start = tmpl->start;
6085                                 tmp->max_size = tmpl->max_size;
6086                                 tmp->nr = tmpl->nr;
6087                                 tmp->found_rec = 1;
6088                                 tmp->metadata = tmpl->metadata;
6089                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6090                                 INIT_LIST_HEAD(&tmp->list);
6091                                 list_add_tail(&tmp->list, &rec->dups);
6092                                 rec->num_duplicates++;
6093                         } else {
6094                                 rec->nr = tmpl->nr;
6095                                 rec->found_rec = 1;
6096                         }
6097                 }
6098
6099                 if (tmpl->extent_item_refs && !dup) {
6100                         if (rec->extent_item_refs) {
6101                                 fprintf(stderr, "block %llu rec "
6102                                         "extent_item_refs %llu, passed %llu\n",
6103                                         (unsigned long long)tmpl->start,
6104                                         (unsigned long long)
6105                                                         rec->extent_item_refs,
6106                                         (unsigned long long)tmpl->extent_item_refs);
6107                         }
6108                         rec->extent_item_refs = tmpl->extent_item_refs;
6109                 }
6110                 if (tmpl->is_root)
6111                         rec->is_root = 1;
6112                 if (tmpl->content_checked)
6113                         rec->content_checked = 1;
6114                 if (tmpl->owner_ref_checked)
6115                         rec->owner_ref_checked = 1;
6116                 memcpy(&rec->parent_key, &tmpl->parent_key,
6117                                 sizeof(tmpl->parent_key));
6118                 if (tmpl->parent_generation)
6119                         rec->parent_generation = tmpl->parent_generation;
6120                 if (rec->max_size < tmpl->max_size)
6121                         rec->max_size = tmpl->max_size;
6122
6123                 /*
6124                  * A metadata extent can't cross stripe_len boundary, otherwise
6125                  * kernel scrub won't be able to handle it.
6126                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6127                  * it.
6128                  */
6129                 if (tmpl->metadata)
6130                         rec->crossing_stripes = check_crossing_stripes(
6131                                         global_info, rec->start,
6132                                         global_info->tree_root->nodesize);
6133                 check_extent_type(rec);
6134                 maybe_free_extent_rec(extent_cache, rec);
6135                 return ret;
6136         }
6137
6138         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6139
6140         return ret;
6141 }
6142
6143 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6144                             u64 parent, u64 root, int found_ref)
6145 {
6146         struct extent_record *rec;
6147         struct tree_backref *back;
6148         struct cache_extent *cache;
6149         int ret;
6150
6151         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6152         if (!cache) {
6153                 struct extent_record tmpl;
6154
6155                 memset(&tmpl, 0, sizeof(tmpl));
6156                 tmpl.start = bytenr;
6157                 tmpl.nr = 1;
6158                 tmpl.metadata = 1;
6159
6160                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6161                 if (ret)
6162                         return ret;
6163
6164                 /* really a bug in cache_extent implement now */
6165                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6166                 if (!cache)
6167                         return -ENOENT;
6168         }
6169
6170         rec = container_of(cache, struct extent_record, cache);
6171         if (rec->start != bytenr) {
6172                 /*
6173                  * Several cause, from unaligned bytenr to over lapping extents
6174                  */
6175                 return -EEXIST;
6176         }
6177
6178         back = find_tree_backref(rec, parent, root);
6179         if (!back) {
6180                 back = alloc_tree_backref(rec, parent, root);
6181                 if (!back)
6182                         return -ENOMEM;
6183         }
6184
6185         if (found_ref) {
6186                 if (back->node.found_ref) {
6187                         fprintf(stderr, "Extent back ref already exists "
6188                                 "for %llu parent %llu root %llu \n",
6189                                 (unsigned long long)bytenr,
6190                                 (unsigned long long)parent,
6191                                 (unsigned long long)root);
6192                 }
6193                 back->node.found_ref = 1;
6194         } else {
6195                 if (back->node.found_extent_tree) {
6196                         fprintf(stderr, "Extent back ref already exists "
6197                                 "for %llu parent %llu root %llu \n",
6198                                 (unsigned long long)bytenr,
6199                                 (unsigned long long)parent,
6200                                 (unsigned long long)root);
6201                 }
6202                 back->node.found_extent_tree = 1;
6203         }
6204         check_extent_type(rec);
6205         maybe_free_extent_rec(extent_cache, rec);
6206         return 0;
6207 }
6208
6209 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6210                             u64 parent, u64 root, u64 owner, u64 offset,
6211                             u32 num_refs, int found_ref, u64 max_size)
6212 {
6213         struct extent_record *rec;
6214         struct data_backref *back;
6215         struct cache_extent *cache;
6216         int ret;
6217
6218         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6219         if (!cache) {
6220                 struct extent_record tmpl;
6221
6222                 memset(&tmpl, 0, sizeof(tmpl));
6223                 tmpl.start = bytenr;
6224                 tmpl.nr = 1;
6225                 tmpl.max_size = max_size;
6226
6227                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6228                 if (ret)
6229                         return ret;
6230
6231                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6232                 if (!cache)
6233                         abort();
6234         }
6235
6236         rec = container_of(cache, struct extent_record, cache);
6237         if (rec->max_size < max_size)
6238                 rec->max_size = max_size;
6239
6240         /*
6241          * If found_ref is set then max_size is the real size and must match the
6242          * existing refs.  So if we have already found a ref then we need to
6243          * make sure that this ref matches the existing one, otherwise we need
6244          * to add a new backref so we can notice that the backrefs don't match
6245          * and we need to figure out who is telling the truth.  This is to
6246          * account for that awful fsync bug I introduced where we'd end up with
6247          * a btrfs_file_extent_item that would have its length include multiple
6248          * prealloc extents or point inside of a prealloc extent.
6249          */
6250         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6251                                  bytenr, max_size);
6252         if (!back) {
6253                 back = alloc_data_backref(rec, parent, root, owner, offset,
6254                                           max_size);
6255                 BUG_ON(!back);
6256         }
6257
6258         if (found_ref) {
6259                 BUG_ON(num_refs != 1);
6260                 if (back->node.found_ref)
6261                         BUG_ON(back->bytes != max_size);
6262                 back->node.found_ref = 1;
6263                 back->found_ref += 1;
6264                 back->bytes = max_size;
6265                 back->disk_bytenr = bytenr;
6266                 rec->refs += 1;
6267                 rec->content_checked = 1;
6268                 rec->owner_ref_checked = 1;
6269         } else {
6270                 if (back->node.found_extent_tree) {
6271                         fprintf(stderr, "Extent back ref already exists "
6272                                 "for %llu parent %llu root %llu "
6273                                 "owner %llu offset %llu num_refs %lu\n",
6274                                 (unsigned long long)bytenr,
6275                                 (unsigned long long)parent,
6276                                 (unsigned long long)root,
6277                                 (unsigned long long)owner,
6278                                 (unsigned long long)offset,
6279                                 (unsigned long)num_refs);
6280                 }
6281                 back->num_refs = num_refs;
6282                 back->node.found_extent_tree = 1;
6283         }
6284         maybe_free_extent_rec(extent_cache, rec);
6285         return 0;
6286 }
6287
6288 static int add_pending(struct cache_tree *pending,
6289                        struct cache_tree *seen, u64 bytenr, u32 size)
6290 {
6291         int ret;
6292         ret = add_cache_extent(seen, bytenr, size);
6293         if (ret)
6294                 return ret;
6295         add_cache_extent(pending, bytenr, size);
6296         return 0;
6297 }
6298
6299 static int pick_next_pending(struct cache_tree *pending,
6300                         struct cache_tree *reada,
6301                         struct cache_tree *nodes,
6302                         u64 last, struct block_info *bits, int bits_nr,
6303                         int *reada_bits)
6304 {
6305         unsigned long node_start = last;
6306         struct cache_extent *cache;
6307         int ret;
6308
6309         cache = search_cache_extent(reada, 0);
6310         if (cache) {
6311                 bits[0].start = cache->start;
6312                 bits[0].size = cache->size;
6313                 *reada_bits = 1;
6314                 return 1;
6315         }
6316         *reada_bits = 0;
6317         if (node_start > 32768)
6318                 node_start -= 32768;
6319
6320         cache = search_cache_extent(nodes, node_start);
6321         if (!cache)
6322                 cache = search_cache_extent(nodes, 0);
6323
6324         if (!cache) {
6325                  cache = search_cache_extent(pending, 0);
6326                  if (!cache)
6327                          return 0;
6328                  ret = 0;
6329                  do {
6330                          bits[ret].start = cache->start;
6331                          bits[ret].size = cache->size;
6332                          cache = next_cache_extent(cache);
6333                          ret++;
6334                  } while (cache && ret < bits_nr);
6335                  return ret;
6336         }
6337
6338         ret = 0;
6339         do {
6340                 bits[ret].start = cache->start;
6341                 bits[ret].size = cache->size;
6342                 cache = next_cache_extent(cache);
6343                 ret++;
6344         } while (cache && ret < bits_nr);
6345
6346         if (bits_nr - ret > 8) {
6347                 u64 lookup = bits[0].start + bits[0].size;
6348                 struct cache_extent *next;
6349                 next = search_cache_extent(pending, lookup);
6350                 while(next) {
6351                         if (next->start - lookup > 32768)
6352                                 break;
6353                         bits[ret].start = next->start;
6354                         bits[ret].size = next->size;
6355                         lookup = next->start + next->size;
6356                         ret++;
6357                         if (ret == bits_nr)
6358                                 break;
6359                         next = next_cache_extent(next);
6360                         if (!next)
6361                                 break;
6362                 }
6363         }
6364         return ret;
6365 }
6366
6367 static void free_chunk_record(struct cache_extent *cache)
6368 {
6369         struct chunk_record *rec;
6370
6371         rec = container_of(cache, struct chunk_record, cache);
6372         list_del_init(&rec->list);
6373         list_del_init(&rec->dextents);
6374         free(rec);
6375 }
6376
6377 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6378 {
6379         cache_tree_free_extents(chunk_cache, free_chunk_record);
6380 }
6381
6382 static void free_device_record(struct rb_node *node)
6383 {
6384         struct device_record *rec;
6385
6386         rec = container_of(node, struct device_record, node);
6387         free(rec);
6388 }
6389
6390 FREE_RB_BASED_TREE(device_cache, free_device_record);
6391
6392 int insert_block_group_record(struct block_group_tree *tree,
6393                               struct block_group_record *bg_rec)
6394 {
6395         int ret;
6396
6397         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6398         if (ret)
6399                 return ret;
6400
6401         list_add_tail(&bg_rec->list, &tree->block_groups);
6402         return 0;
6403 }
6404
6405 static void free_block_group_record(struct cache_extent *cache)
6406 {
6407         struct block_group_record *rec;
6408
6409         rec = container_of(cache, struct block_group_record, cache);
6410         list_del_init(&rec->list);
6411         free(rec);
6412 }
6413
6414 void free_block_group_tree(struct block_group_tree *tree)
6415 {
6416         cache_tree_free_extents(&tree->tree, free_block_group_record);
6417 }
6418
6419 int insert_device_extent_record(struct device_extent_tree *tree,
6420                                 struct device_extent_record *de_rec)
6421 {
6422         int ret;
6423
6424         /*
6425          * Device extent is a bit different from the other extents, because
6426          * the extents which belong to the different devices may have the
6427          * same start and size, so we need use the special extent cache
6428          * search/insert functions.
6429          */
6430         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6431         if (ret)
6432                 return ret;
6433
6434         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6435         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6436         return 0;
6437 }
6438
6439 static void free_device_extent_record(struct cache_extent *cache)
6440 {
6441         struct device_extent_record *rec;
6442
6443         rec = container_of(cache, struct device_extent_record, cache);
6444         if (!list_empty(&rec->chunk_list))
6445                 list_del_init(&rec->chunk_list);
6446         if (!list_empty(&rec->device_list))
6447                 list_del_init(&rec->device_list);
6448         free(rec);
6449 }
6450
6451 void free_device_extent_tree(struct device_extent_tree *tree)
6452 {
6453         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6454 }
6455
6456 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6457 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6458                                  struct extent_buffer *leaf, int slot)
6459 {
6460         struct btrfs_extent_ref_v0 *ref0;
6461         struct btrfs_key key;
6462         int ret;
6463
6464         btrfs_item_key_to_cpu(leaf, &key, slot);
6465         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6466         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6467                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6468                                 0, 0);
6469         } else {
6470                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6471                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6472         }
6473         return ret;
6474 }
6475 #endif
6476
6477 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6478                                             struct btrfs_key *key,
6479                                             int slot)
6480 {
6481         struct btrfs_chunk *ptr;
6482         struct chunk_record *rec;
6483         int num_stripes, i;
6484
6485         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6486         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6487
6488         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6489         if (!rec) {
6490                 fprintf(stderr, "memory allocation failed\n");
6491                 exit(-1);
6492         }
6493
6494         INIT_LIST_HEAD(&rec->list);
6495         INIT_LIST_HEAD(&rec->dextents);
6496         rec->bg_rec = NULL;
6497
6498         rec->cache.start = key->offset;
6499         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6500
6501         rec->generation = btrfs_header_generation(leaf);
6502
6503         rec->objectid = key->objectid;
6504         rec->type = key->type;
6505         rec->offset = key->offset;
6506
6507         rec->length = rec->cache.size;
6508         rec->owner = btrfs_chunk_owner(leaf, ptr);
6509         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6510         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6511         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6512         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6513         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6514         rec->num_stripes = num_stripes;
6515         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6516
6517         for (i = 0; i < rec->num_stripes; ++i) {
6518                 rec->stripes[i].devid =
6519                         btrfs_stripe_devid_nr(leaf, ptr, i);
6520                 rec->stripes[i].offset =
6521                         btrfs_stripe_offset_nr(leaf, ptr, i);
6522                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6523                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6524                                 BTRFS_UUID_SIZE);
6525         }
6526
6527         return rec;
6528 }
6529
6530 static int process_chunk_item(struct cache_tree *chunk_cache,
6531                               struct btrfs_key *key, struct extent_buffer *eb,
6532                               int slot)
6533 {
6534         struct chunk_record *rec;
6535         struct btrfs_chunk *chunk;
6536         int ret = 0;
6537
6538         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6539         /*
6540          * Do extra check for this chunk item,
6541          *
6542          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6543          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6544          * and owner<->key_type check.
6545          */
6546         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6547                                       key->offset);
6548         if (ret < 0) {
6549                 error("chunk(%llu, %llu) is not valid, ignore it",
6550                       key->offset, btrfs_chunk_length(eb, chunk));
6551                 return 0;
6552         }
6553         rec = btrfs_new_chunk_record(eb, key, slot);
6554         ret = insert_cache_extent(chunk_cache, &rec->cache);
6555         if (ret) {
6556                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6557                         rec->offset, rec->length);
6558                 free(rec);
6559         }
6560
6561         return ret;
6562 }
6563
6564 static int process_device_item(struct rb_root *dev_cache,
6565                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6566 {
6567         struct btrfs_dev_item *ptr;
6568         struct device_record *rec;
6569         int ret = 0;
6570
6571         ptr = btrfs_item_ptr(eb,
6572                 slot, struct btrfs_dev_item);
6573
6574         rec = malloc(sizeof(*rec));
6575         if (!rec) {
6576                 fprintf(stderr, "memory allocation failed\n");
6577                 return -ENOMEM;
6578         }
6579
6580         rec->devid = key->offset;
6581         rec->generation = btrfs_header_generation(eb);
6582
6583         rec->objectid = key->objectid;
6584         rec->type = key->type;
6585         rec->offset = key->offset;
6586
6587         rec->devid = btrfs_device_id(eb, ptr);
6588         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6589         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6590
6591         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6592         if (ret) {
6593                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6594                 free(rec);
6595         }
6596
6597         return ret;
6598 }
6599
6600 struct block_group_record *
6601 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6602                              int slot)
6603 {
6604         struct btrfs_block_group_item *ptr;
6605         struct block_group_record *rec;
6606
6607         rec = calloc(1, sizeof(*rec));
6608         if (!rec) {
6609                 fprintf(stderr, "memory allocation failed\n");
6610                 exit(-1);
6611         }
6612
6613         rec->cache.start = key->objectid;
6614         rec->cache.size = key->offset;
6615
6616         rec->generation = btrfs_header_generation(leaf);
6617
6618         rec->objectid = key->objectid;
6619         rec->type = key->type;
6620         rec->offset = key->offset;
6621
6622         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6623         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6624
6625         INIT_LIST_HEAD(&rec->list);
6626
6627         return rec;
6628 }
6629
6630 static int process_block_group_item(struct block_group_tree *block_group_cache,
6631                                     struct btrfs_key *key,
6632                                     struct extent_buffer *eb, int slot)
6633 {
6634         struct block_group_record *rec;
6635         int ret = 0;
6636
6637         rec = btrfs_new_block_group_record(eb, key, slot);
6638         ret = insert_block_group_record(block_group_cache, rec);
6639         if (ret) {
6640                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6641                         rec->objectid, rec->offset);
6642                 free(rec);
6643         }
6644
6645         return ret;
6646 }
6647
6648 struct device_extent_record *
6649 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6650                                struct btrfs_key *key, int slot)
6651 {
6652         struct device_extent_record *rec;
6653         struct btrfs_dev_extent *ptr;
6654
6655         rec = calloc(1, sizeof(*rec));
6656         if (!rec) {
6657                 fprintf(stderr, "memory allocation failed\n");
6658                 exit(-1);
6659         }
6660
6661         rec->cache.objectid = key->objectid;
6662         rec->cache.start = key->offset;
6663
6664         rec->generation = btrfs_header_generation(leaf);
6665
6666         rec->objectid = key->objectid;
6667         rec->type = key->type;
6668         rec->offset = key->offset;
6669
6670         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6671         rec->chunk_objecteid =
6672                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6673         rec->chunk_offset =
6674                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6675         rec->length = btrfs_dev_extent_length(leaf, ptr);
6676         rec->cache.size = rec->length;
6677
6678         INIT_LIST_HEAD(&rec->chunk_list);
6679         INIT_LIST_HEAD(&rec->device_list);
6680
6681         return rec;
6682 }
6683
6684 static int
6685 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6686                            struct btrfs_key *key, struct extent_buffer *eb,
6687                            int slot)
6688 {
6689         struct device_extent_record *rec;
6690         int ret;
6691
6692         rec = btrfs_new_device_extent_record(eb, key, slot);
6693         ret = insert_device_extent_record(dev_extent_cache, rec);
6694         if (ret) {
6695                 fprintf(stderr,
6696                         "Device extent[%llu, %llu, %llu] existed.\n",
6697                         rec->objectid, rec->offset, rec->length);
6698                 free(rec);
6699         }
6700
6701         return ret;
6702 }
6703
6704 static int process_extent_item(struct btrfs_root *root,
6705                                struct cache_tree *extent_cache,
6706                                struct extent_buffer *eb, int slot)
6707 {
6708         struct btrfs_extent_item *ei;
6709         struct btrfs_extent_inline_ref *iref;
6710         struct btrfs_extent_data_ref *dref;
6711         struct btrfs_shared_data_ref *sref;
6712         struct btrfs_key key;
6713         struct extent_record tmpl;
6714         unsigned long end;
6715         unsigned long ptr;
6716         int ret;
6717         int type;
6718         u32 item_size = btrfs_item_size_nr(eb, slot);
6719         u64 refs = 0;
6720         u64 offset;
6721         u64 num_bytes;
6722         int metadata = 0;
6723
6724         btrfs_item_key_to_cpu(eb, &key, slot);
6725
6726         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6727                 metadata = 1;
6728                 num_bytes = root->nodesize;
6729         } else {
6730                 num_bytes = key.offset;
6731         }
6732
6733         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6734                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6735                       key.objectid, root->sectorsize);
6736                 return -EIO;
6737         }
6738         if (item_size < sizeof(*ei)) {
6739 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6740                 struct btrfs_extent_item_v0 *ei0;
6741                 BUG_ON(item_size != sizeof(*ei0));
6742                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6743                 refs = btrfs_extent_refs_v0(eb, ei0);
6744 #else
6745                 BUG();
6746 #endif
6747                 memset(&tmpl, 0, sizeof(tmpl));
6748                 tmpl.start = key.objectid;
6749                 tmpl.nr = num_bytes;
6750                 tmpl.extent_item_refs = refs;
6751                 tmpl.metadata = metadata;
6752                 tmpl.found_rec = 1;
6753                 tmpl.max_size = num_bytes;
6754
6755                 return add_extent_rec(extent_cache, &tmpl);
6756         }
6757
6758         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6759         refs = btrfs_extent_refs(eb, ei);
6760         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6761                 metadata = 1;
6762         else
6763                 metadata = 0;
6764         if (metadata && num_bytes != root->nodesize) {
6765                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6766                       num_bytes, root->nodesize);
6767                 return -EIO;
6768         }
6769         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6770                 error("ignore invalid data extent, length %llu is not aligned to %u",
6771                       num_bytes, root->sectorsize);
6772                 return -EIO;
6773         }
6774
6775         memset(&tmpl, 0, sizeof(tmpl));
6776         tmpl.start = key.objectid;
6777         tmpl.nr = num_bytes;
6778         tmpl.extent_item_refs = refs;
6779         tmpl.metadata = metadata;
6780         tmpl.found_rec = 1;
6781         tmpl.max_size = num_bytes;
6782         add_extent_rec(extent_cache, &tmpl);
6783
6784         ptr = (unsigned long)(ei + 1);
6785         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6786             key.type == BTRFS_EXTENT_ITEM_KEY)
6787                 ptr += sizeof(struct btrfs_tree_block_info);
6788
6789         end = (unsigned long)ei + item_size;
6790         while (ptr < end) {
6791                 iref = (struct btrfs_extent_inline_ref *)ptr;
6792                 type = btrfs_extent_inline_ref_type(eb, iref);
6793                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6794                 switch (type) {
6795                 case BTRFS_TREE_BLOCK_REF_KEY:
6796                         ret = add_tree_backref(extent_cache, key.objectid,
6797                                         0, offset, 0);
6798                         if (ret < 0)
6799                                 error("add_tree_backref failed: %s",
6800                                       strerror(-ret));
6801                         break;
6802                 case BTRFS_SHARED_BLOCK_REF_KEY:
6803                         ret = add_tree_backref(extent_cache, key.objectid,
6804                                         offset, 0, 0);
6805                         if (ret < 0)
6806                                 error("add_tree_backref failed: %s",
6807                                       strerror(-ret));
6808                         break;
6809                 case BTRFS_EXTENT_DATA_REF_KEY:
6810                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6811                         add_data_backref(extent_cache, key.objectid, 0,
6812                                         btrfs_extent_data_ref_root(eb, dref),
6813                                         btrfs_extent_data_ref_objectid(eb,
6814                                                                        dref),
6815                                         btrfs_extent_data_ref_offset(eb, dref),
6816                                         btrfs_extent_data_ref_count(eb, dref),
6817                                         0, num_bytes);
6818                         break;
6819                 case BTRFS_SHARED_DATA_REF_KEY:
6820                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6821                         add_data_backref(extent_cache, key.objectid, offset,
6822                                         0, 0, 0,
6823                                         btrfs_shared_data_ref_count(eb, sref),
6824                                         0, num_bytes);
6825                         break;
6826                 default:
6827                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6828                                 key.objectid, key.type, num_bytes);
6829                         goto out;
6830                 }
6831                 ptr += btrfs_extent_inline_ref_size(type);
6832         }
6833         WARN_ON(ptr > end);
6834 out:
6835         return 0;
6836 }
6837
6838 static int check_cache_range(struct btrfs_root *root,
6839                              struct btrfs_block_group_cache *cache,
6840                              u64 offset, u64 bytes)
6841 {
6842         struct btrfs_free_space *entry;
6843         u64 *logical;
6844         u64 bytenr;
6845         int stripe_len;
6846         int i, nr, ret;
6847
6848         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6849                 bytenr = btrfs_sb_offset(i);
6850                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6851                                        cache->key.objectid, bytenr, 0,
6852                                        &logical, &nr, &stripe_len);
6853                 if (ret)
6854                         return ret;
6855
6856                 while (nr--) {
6857                         if (logical[nr] + stripe_len <= offset)
6858                                 continue;
6859                         if (offset + bytes <= logical[nr])
6860                                 continue;
6861                         if (logical[nr] == offset) {
6862                                 if (stripe_len >= bytes) {
6863                                         free(logical);
6864                                         return 0;
6865                                 }
6866                                 bytes -= stripe_len;
6867                                 offset += stripe_len;
6868                         } else if (logical[nr] < offset) {
6869                                 if (logical[nr] + stripe_len >=
6870                                     offset + bytes) {
6871                                         free(logical);
6872                                         return 0;
6873                                 }
6874                                 bytes = (offset + bytes) -
6875                                         (logical[nr] + stripe_len);
6876                                 offset = logical[nr] + stripe_len;
6877                         } else {
6878                                 /*
6879                                  * Could be tricky, the super may land in the
6880                                  * middle of the area we're checking.  First
6881                                  * check the easiest case, it's at the end.
6882                                  */
6883                                 if (logical[nr] + stripe_len >=
6884                                     bytes + offset) {
6885                                         bytes = logical[nr] - offset;
6886                                         continue;
6887                                 }
6888
6889                                 /* Check the left side */
6890                                 ret = check_cache_range(root, cache,
6891                                                         offset,
6892                                                         logical[nr] - offset);
6893                                 if (ret) {
6894                                         free(logical);
6895                                         return ret;
6896                                 }
6897
6898                                 /* Now we continue with the right side */
6899                                 bytes = (offset + bytes) -
6900                                         (logical[nr] + stripe_len);
6901                                 offset = logical[nr] + stripe_len;
6902                         }
6903                 }
6904
6905                 free(logical);
6906         }
6907
6908         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6909         if (!entry) {
6910                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6911                         offset, offset+bytes);
6912                 return -EINVAL;
6913         }
6914
6915         if (entry->offset != offset) {
6916                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6917                         entry->offset);
6918                 return -EINVAL;
6919         }
6920
6921         if (entry->bytes != bytes) {
6922                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6923                         bytes, entry->bytes, offset);
6924                 return -EINVAL;
6925         }
6926
6927         unlink_free_space(cache->free_space_ctl, entry);
6928         free(entry);
6929         return 0;
6930 }
6931
6932 static int verify_space_cache(struct btrfs_root *root,
6933                               struct btrfs_block_group_cache *cache)
6934 {
6935         struct btrfs_path path;
6936         struct extent_buffer *leaf;
6937         struct btrfs_key key;
6938         u64 last;
6939         int ret = 0;
6940
6941         root = root->fs_info->extent_root;
6942
6943         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6944
6945         btrfs_init_path(&path);
6946         key.objectid = last;
6947         key.offset = 0;
6948         key.type = BTRFS_EXTENT_ITEM_KEY;
6949         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6950         if (ret < 0)
6951                 goto out;
6952         ret = 0;
6953         while (1) {
6954                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6955                         ret = btrfs_next_leaf(root, &path);
6956                         if (ret < 0)
6957                                 goto out;
6958                         if (ret > 0) {
6959                                 ret = 0;
6960                                 break;
6961                         }
6962                 }
6963                 leaf = path.nodes[0];
6964                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6965                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6966                         break;
6967                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6968                     key.type != BTRFS_METADATA_ITEM_KEY) {
6969                         path.slots[0]++;
6970                         continue;
6971                 }
6972
6973                 if (last == key.objectid) {
6974                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6975                                 last = key.objectid + key.offset;
6976                         else
6977                                 last = key.objectid + root->nodesize;
6978                         path.slots[0]++;
6979                         continue;
6980                 }
6981
6982                 ret = check_cache_range(root, cache, last,
6983                                         key.objectid - last);
6984                 if (ret)
6985                         break;
6986                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6987                         last = key.objectid + key.offset;
6988                 else
6989                         last = key.objectid + root->nodesize;
6990                 path.slots[0]++;
6991         }
6992
6993         if (last < cache->key.objectid + cache->key.offset)
6994                 ret = check_cache_range(root, cache, last,
6995                                         cache->key.objectid +
6996                                         cache->key.offset - last);
6997
6998 out:
6999         btrfs_release_path(&path);
7000
7001         if (!ret &&
7002             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7003                 fprintf(stderr, "There are still entries left in the space "
7004                         "cache\n");
7005                 ret = -EINVAL;
7006         }
7007
7008         return ret;
7009 }
7010
7011 static int check_space_cache(struct btrfs_root *root)
7012 {
7013         struct btrfs_block_group_cache *cache;
7014         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7015         int ret;
7016         int error = 0;
7017
7018         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7019             btrfs_super_generation(root->fs_info->super_copy) !=
7020             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7021                 printf("cache and super generation don't match, space cache "
7022                        "will be invalidated\n");
7023                 return 0;
7024         }
7025
7026         if (ctx.progress_enabled) {
7027                 ctx.tp = TASK_FREE_SPACE;
7028                 task_start(ctx.info);
7029         }
7030
7031         while (1) {
7032                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7033                 if (!cache)
7034                         break;
7035
7036                 start = cache->key.objectid + cache->key.offset;
7037                 if (!cache->free_space_ctl) {
7038                         if (btrfs_init_free_space_ctl(cache,
7039                                                       root->sectorsize)) {
7040                                 ret = -ENOMEM;
7041                                 break;
7042                         }
7043                 } else {
7044                         btrfs_remove_free_space_cache(cache);
7045                 }
7046
7047                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7048                         ret = exclude_super_stripes(root, cache);
7049                         if (ret) {
7050                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7051                                         strerror(-ret));
7052                                 error++;
7053                                 continue;
7054                         }
7055                         ret = load_free_space_tree(root->fs_info, cache);
7056                         free_excluded_extents(root, cache);
7057                         if (ret < 0) {
7058                                 fprintf(stderr, "could not load free space tree: %s\n",
7059                                         strerror(-ret));
7060                                 error++;
7061                                 continue;
7062                         }
7063                         error += ret;
7064                 } else {
7065                         ret = load_free_space_cache(root->fs_info, cache);
7066                         if (!ret)
7067                                 continue;
7068                 }
7069
7070                 ret = verify_space_cache(root, cache);
7071                 if (ret) {
7072                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7073                                 cache->key.objectid);
7074                         error++;
7075                 }
7076         }
7077
7078         task_stop(ctx.info);
7079
7080         return error ? -EINVAL : 0;
7081 }
7082
7083 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7084                         u64 num_bytes, unsigned long leaf_offset,
7085                         struct extent_buffer *eb) {
7086
7087         u64 offset = 0;
7088         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7089         char *data;
7090         unsigned long csum_offset;
7091         u32 csum;
7092         u32 csum_expected;
7093         u64 read_len;
7094         u64 data_checked = 0;
7095         u64 tmp;
7096         int ret = 0;
7097         int mirror;
7098         int num_copies;
7099
7100         if (num_bytes % root->sectorsize)
7101                 return -EINVAL;
7102
7103         data = malloc(num_bytes);
7104         if (!data)
7105                 return -ENOMEM;
7106
7107         while (offset < num_bytes) {
7108                 mirror = 0;
7109 again:
7110                 read_len = num_bytes - offset;
7111                 /* read as much space once a time */
7112                 ret = read_extent_data(root, data + offset,
7113                                 bytenr + offset, &read_len, mirror);
7114                 if (ret)
7115                         goto out;
7116                 data_checked = 0;
7117                 /* verify every 4k data's checksum */
7118                 while (data_checked < read_len) {
7119                         csum = ~(u32)0;
7120                         tmp = offset + data_checked;
7121
7122                         csum = btrfs_csum_data((char *)data + tmp,
7123                                                csum, root->sectorsize);
7124                         btrfs_csum_final(csum, (u8 *)&csum);
7125
7126                         csum_offset = leaf_offset +
7127                                  tmp / root->sectorsize * csum_size;
7128                         read_extent_buffer(eb, (char *)&csum_expected,
7129                                            csum_offset, csum_size);
7130                         /* try another mirror */
7131                         if (csum != csum_expected) {
7132                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7133                                                 mirror, bytenr + tmp,
7134                                                 csum, csum_expected);
7135                                 num_copies = btrfs_num_copies(
7136                                                 &root->fs_info->mapping_tree,
7137                                                 bytenr, num_bytes);
7138                                 if (mirror < num_copies - 1) {
7139                                         mirror += 1;
7140                                         goto again;
7141                                 }
7142                         }
7143                         data_checked += root->sectorsize;
7144                 }
7145                 offset += read_len;
7146         }
7147 out:
7148         free(data);
7149         return ret;
7150 }
7151
7152 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7153                                u64 num_bytes)
7154 {
7155         struct btrfs_path path;
7156         struct extent_buffer *leaf;
7157         struct btrfs_key key;
7158         int ret;
7159
7160         btrfs_init_path(&path);
7161         key.objectid = bytenr;
7162         key.type = BTRFS_EXTENT_ITEM_KEY;
7163         key.offset = (u64)-1;
7164
7165 again:
7166         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7167                                 0, 0);
7168         if (ret < 0) {
7169                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7170                 btrfs_release_path(&path);
7171                 return ret;
7172         } else if (ret) {
7173                 if (path.slots[0] > 0) {
7174                         path.slots[0]--;
7175                 } else {
7176                         ret = btrfs_prev_leaf(root, &path);
7177                         if (ret < 0) {
7178                                 goto out;
7179                         } else if (ret > 0) {
7180                                 ret = 0;
7181                                 goto out;
7182                         }
7183                 }
7184         }
7185
7186         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7187
7188         /*
7189          * Block group items come before extent items if they have the same
7190          * bytenr, so walk back one more just in case.  Dear future traveller,
7191          * first congrats on mastering time travel.  Now if it's not too much
7192          * trouble could you go back to 2006 and tell Chris to make the
7193          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7194          * EXTENT_ITEM_KEY please?
7195          */
7196         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7197                 if (path.slots[0] > 0) {
7198                         path.slots[0]--;
7199                 } else {
7200                         ret = btrfs_prev_leaf(root, &path);
7201                         if (ret < 0) {
7202                                 goto out;
7203                         } else if (ret > 0) {
7204                                 ret = 0;
7205                                 goto out;
7206                         }
7207                 }
7208                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7209         }
7210
7211         while (num_bytes) {
7212                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7213                         ret = btrfs_next_leaf(root, &path);
7214                         if (ret < 0) {
7215                                 fprintf(stderr, "Error going to next leaf "
7216                                         "%d\n", ret);
7217                                 btrfs_release_path(&path);
7218                                 return ret;
7219                         } else if (ret) {
7220                                 break;
7221                         }
7222                 }
7223                 leaf = path.nodes[0];
7224                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7225                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7226                         path.slots[0]++;
7227                         continue;
7228                 }
7229                 if (key.objectid + key.offset < bytenr) {
7230                         path.slots[0]++;
7231                         continue;
7232                 }
7233                 if (key.objectid > bytenr + num_bytes)
7234                         break;
7235
7236                 if (key.objectid == bytenr) {
7237                         if (key.offset >= num_bytes) {
7238                                 num_bytes = 0;
7239                                 break;
7240                         }
7241                         num_bytes -= key.offset;
7242                         bytenr += key.offset;
7243                 } else if (key.objectid < bytenr) {
7244                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7245                                 num_bytes = 0;
7246                                 break;
7247                         }
7248                         num_bytes = (bytenr + num_bytes) -
7249                                 (key.objectid + key.offset);
7250                         bytenr = key.objectid + key.offset;
7251                 } else {
7252                         if (key.objectid + key.offset < bytenr + num_bytes) {
7253                                 u64 new_start = key.objectid + key.offset;
7254                                 u64 new_bytes = bytenr + num_bytes - new_start;
7255
7256                                 /*
7257                                  * Weird case, the extent is in the middle of
7258                                  * our range, we'll have to search one side
7259                                  * and then the other.  Not sure if this happens
7260                                  * in real life, but no harm in coding it up
7261                                  * anyway just in case.
7262                                  */
7263                                 btrfs_release_path(&path);
7264                                 ret = check_extent_exists(root, new_start,
7265                                                           new_bytes);
7266                                 if (ret) {
7267                                         fprintf(stderr, "Right section didn't "
7268                                                 "have a record\n");
7269                                         break;
7270                                 }
7271                                 num_bytes = key.objectid - bytenr;
7272                                 goto again;
7273                         }
7274                         num_bytes = key.objectid - bytenr;
7275                 }
7276                 path.slots[0]++;
7277         }
7278         ret = 0;
7279
7280 out:
7281         if (num_bytes && !ret) {
7282                 fprintf(stderr, "There are no extents for csum range "
7283                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7284                 ret = 1;
7285         }
7286
7287         btrfs_release_path(&path);
7288         return ret;
7289 }
7290
7291 static int check_csums(struct btrfs_root *root)
7292 {
7293         struct btrfs_path path;
7294         struct extent_buffer *leaf;
7295         struct btrfs_key key;
7296         u64 offset = 0, num_bytes = 0;
7297         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7298         int errors = 0;
7299         int ret;
7300         u64 data_len;
7301         unsigned long leaf_offset;
7302
7303         root = root->fs_info->csum_root;
7304         if (!extent_buffer_uptodate(root->node)) {
7305                 fprintf(stderr, "No valid csum tree found\n");
7306                 return -ENOENT;
7307         }
7308
7309         btrfs_init_path(&path);
7310         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7311         key.type = BTRFS_EXTENT_CSUM_KEY;
7312         key.offset = 0;
7313         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7314         if (ret < 0) {
7315                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7316                 btrfs_release_path(&path);
7317                 return ret;
7318         }
7319
7320         if (ret > 0 && path.slots[0])
7321                 path.slots[0]--;
7322         ret = 0;
7323
7324         while (1) {
7325                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7326                         ret = btrfs_next_leaf(root, &path);
7327                         if (ret < 0) {
7328                                 fprintf(stderr, "Error going to next leaf "
7329                                         "%d\n", ret);
7330                                 break;
7331                         }
7332                         if (ret)
7333                                 break;
7334                 }
7335                 leaf = path.nodes[0];
7336
7337                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7338                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7339                         path.slots[0]++;
7340                         continue;
7341                 }
7342
7343                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7344                               csum_size) * root->sectorsize;
7345                 if (!check_data_csum)
7346                         goto skip_csum_check;
7347                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7348                 ret = check_extent_csums(root, key.offset, data_len,
7349                                          leaf_offset, leaf);
7350                 if (ret)
7351                         break;
7352 skip_csum_check:
7353                 if (!num_bytes) {
7354                         offset = key.offset;
7355                 } else if (key.offset != offset + num_bytes) {
7356                         ret = check_extent_exists(root, offset, num_bytes);
7357                         if (ret) {
7358                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7359                                         "there is no extent record\n",
7360                                         offset, offset+num_bytes);
7361                                 errors++;
7362                         }
7363                         offset = key.offset;
7364                         num_bytes = 0;
7365                 }
7366                 num_bytes += data_len;
7367                 path.slots[0]++;
7368         }
7369
7370         btrfs_release_path(&path);
7371         return errors;
7372 }
7373
7374 static int is_dropped_key(struct btrfs_key *key,
7375                           struct btrfs_key *drop_key) {
7376         if (key->objectid < drop_key->objectid)
7377                 return 1;
7378         else if (key->objectid == drop_key->objectid) {
7379                 if (key->type < drop_key->type)
7380                         return 1;
7381                 else if (key->type == drop_key->type) {
7382                         if (key->offset < drop_key->offset)
7383                                 return 1;
7384                 }
7385         }
7386         return 0;
7387 }
7388
7389 /*
7390  * Here are the rules for FULL_BACKREF.
7391  *
7392  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7393  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7394  *      FULL_BACKREF set.
7395  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7396  *    if it happened after the relocation occurred since we'll have dropped the
7397  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7398  *    have no real way to know for sure.
7399  *
7400  * We process the blocks one root at a time, and we start from the lowest root
7401  * objectid and go to the highest.  So we can just lookup the owner backref for
7402  * the record and if we don't find it then we know it doesn't exist and we have
7403  * a FULL BACKREF.
7404  *
7405  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7406  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7407  * be set or not and then we can check later once we've gathered all the refs.
7408  */
7409 static int calc_extent_flag(struct btrfs_root *root,
7410                            struct cache_tree *extent_cache,
7411                            struct extent_buffer *buf,
7412                            struct root_item_record *ri,
7413                            u64 *flags)
7414 {
7415         struct extent_record *rec;
7416         struct cache_extent *cache;
7417         struct tree_backref *tback;
7418         u64 owner = 0;
7419
7420         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7421         /* we have added this extent before */
7422         if (!cache)
7423                 return -ENOENT;
7424
7425         rec = container_of(cache, struct extent_record, cache);
7426
7427         /*
7428          * Except file/reloc tree, we can not have
7429          * FULL BACKREF MODE
7430          */
7431         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7432                 goto normal;
7433         /*
7434          * root node
7435          */
7436         if (buf->start == ri->bytenr)
7437                 goto normal;
7438
7439         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7440                 goto full_backref;
7441
7442         owner = btrfs_header_owner(buf);
7443         if (owner == ri->objectid)
7444                 goto normal;
7445
7446         tback = find_tree_backref(rec, 0, owner);
7447         if (!tback)
7448                 goto full_backref;
7449 normal:
7450         *flags = 0;
7451         if (rec->flag_block_full_backref != FLAG_UNSET &&
7452             rec->flag_block_full_backref != 0)
7453                 rec->bad_full_backref = 1;
7454         return 0;
7455 full_backref:
7456         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7457         if (rec->flag_block_full_backref != FLAG_UNSET &&
7458             rec->flag_block_full_backref != 1)
7459                 rec->bad_full_backref = 1;
7460         return 0;
7461 }
7462
7463 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7464 {
7465         fprintf(stderr, "Invalid key type(");
7466         print_key_type(stderr, 0, key_type);
7467         fprintf(stderr, ") found in root(");
7468         print_objectid(stderr, rootid, 0);
7469         fprintf(stderr, ")\n");
7470 }
7471
7472 /*
7473  * Check if the key is valid with its extent buffer.
7474  *
7475  * This is a early check in case invalid key exists in a extent buffer
7476  * This is not comprehensive yet, but should prevent wrong key/item passed
7477  * further
7478  */
7479 static int check_type_with_root(u64 rootid, u8 key_type)
7480 {
7481         switch (key_type) {
7482         /* Only valid in chunk tree */
7483         case BTRFS_DEV_ITEM_KEY:
7484         case BTRFS_CHUNK_ITEM_KEY:
7485                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7486                         goto err;
7487                 break;
7488         /* valid in csum and log tree */
7489         case BTRFS_CSUM_TREE_OBJECTID:
7490                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7491                       is_fstree(rootid)))
7492                         goto err;
7493                 break;
7494         case BTRFS_EXTENT_ITEM_KEY:
7495         case BTRFS_METADATA_ITEM_KEY:
7496         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7497                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7498                         goto err;
7499                 break;
7500         case BTRFS_ROOT_ITEM_KEY:
7501                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7502                         goto err;
7503                 break;
7504         case BTRFS_DEV_EXTENT_KEY:
7505                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7506                         goto err;
7507                 break;
7508         }
7509         return 0;
7510 err:
7511         report_mismatch_key_root(key_type, rootid);
7512         return -EINVAL;
7513 }
7514
7515 static int run_next_block(struct btrfs_root *root,
7516                           struct block_info *bits,
7517                           int bits_nr,
7518                           u64 *last,
7519                           struct cache_tree *pending,
7520                           struct cache_tree *seen,
7521                           struct cache_tree *reada,
7522                           struct cache_tree *nodes,
7523                           struct cache_tree *extent_cache,
7524                           struct cache_tree *chunk_cache,
7525                           struct rb_root *dev_cache,
7526                           struct block_group_tree *block_group_cache,
7527                           struct device_extent_tree *dev_extent_cache,
7528                           struct root_item_record *ri)
7529 {
7530         struct extent_buffer *buf;
7531         struct extent_record *rec = NULL;
7532         u64 bytenr;
7533         u32 size;
7534         u64 parent;
7535         u64 owner;
7536         u64 flags;
7537         u64 ptr;
7538         u64 gen = 0;
7539         int ret = 0;
7540         int i;
7541         int nritems;
7542         struct btrfs_key key;
7543         struct cache_extent *cache;
7544         int reada_bits;
7545
7546         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7547                                     bits_nr, &reada_bits);
7548         if (nritems == 0)
7549                 return 1;
7550
7551         if (!reada_bits) {
7552                 for(i = 0; i < nritems; i++) {
7553                         ret = add_cache_extent(reada, bits[i].start,
7554                                                bits[i].size);
7555                         if (ret == -EEXIST)
7556                                 continue;
7557
7558                         /* fixme, get the parent transid */
7559                         readahead_tree_block(root, bits[i].start,
7560                                              bits[i].size, 0);
7561                 }
7562         }
7563         *last = bits[0].start;
7564         bytenr = bits[0].start;
7565         size = bits[0].size;
7566
7567         cache = lookup_cache_extent(pending, bytenr, size);
7568         if (cache) {
7569                 remove_cache_extent(pending, cache);
7570                 free(cache);
7571         }
7572         cache = lookup_cache_extent(reada, bytenr, size);
7573         if (cache) {
7574                 remove_cache_extent(reada, cache);
7575                 free(cache);
7576         }
7577         cache = lookup_cache_extent(nodes, bytenr, size);
7578         if (cache) {
7579                 remove_cache_extent(nodes, cache);
7580                 free(cache);
7581         }
7582         cache = lookup_cache_extent(extent_cache, bytenr, size);
7583         if (cache) {
7584                 rec = container_of(cache, struct extent_record, cache);
7585                 gen = rec->parent_generation;
7586         }
7587
7588         /* fixme, get the real parent transid */
7589         buf = read_tree_block(root, bytenr, size, gen);
7590         if (!extent_buffer_uptodate(buf)) {
7591                 record_bad_block_io(root->fs_info,
7592                                     extent_cache, bytenr, size);
7593                 goto out;
7594         }
7595
7596         nritems = btrfs_header_nritems(buf);
7597
7598         flags = 0;
7599         if (!init_extent_tree) {
7600                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7601                                        btrfs_header_level(buf), 1, NULL,
7602                                        &flags);
7603                 if (ret < 0) {
7604                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7605                         if (ret < 0) {
7606                                 fprintf(stderr, "Couldn't calc extent flags\n");
7607                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7608                         }
7609                 }
7610         } else {
7611                 flags = 0;
7612                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7613                 if (ret < 0) {
7614                         fprintf(stderr, "Couldn't calc extent flags\n");
7615                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7616                 }
7617         }
7618
7619         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7620                 if (ri != NULL &&
7621                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7622                     ri->objectid == btrfs_header_owner(buf)) {
7623                         /*
7624                          * Ok we got to this block from it's original owner and
7625                          * we have FULL_BACKREF set.  Relocation can leave
7626                          * converted blocks over so this is altogether possible,
7627                          * however it's not possible if the generation > the
7628                          * last snapshot, so check for this case.
7629                          */
7630                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7631                             btrfs_header_generation(buf) > ri->last_snapshot) {
7632                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7633                                 rec->bad_full_backref = 1;
7634                         }
7635                 }
7636         } else {
7637                 if (ri != NULL &&
7638                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7639                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7640                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7641                         rec->bad_full_backref = 1;
7642                 }
7643         }
7644
7645         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7646                 rec->flag_block_full_backref = 1;
7647                 parent = bytenr;
7648                 owner = 0;
7649         } else {
7650                 rec->flag_block_full_backref = 0;
7651                 parent = 0;
7652                 owner = btrfs_header_owner(buf);
7653         }
7654
7655         ret = check_block(root, extent_cache, buf, flags);
7656         if (ret)
7657                 goto out;
7658
7659         if (btrfs_is_leaf(buf)) {
7660                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7661                 for (i = 0; i < nritems; i++) {
7662                         struct btrfs_file_extent_item *fi;
7663                         btrfs_item_key_to_cpu(buf, &key, i);
7664                         /*
7665                          * Check key type against the leaf owner.
7666                          * Could filter quite a lot of early error if
7667                          * owner is correct
7668                          */
7669                         if (check_type_with_root(btrfs_header_owner(buf),
7670                                                  key.type)) {
7671                                 fprintf(stderr, "ignoring invalid key\n");
7672                                 continue;
7673                         }
7674                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7675                                 process_extent_item(root, extent_cache, buf,
7676                                                     i);
7677                                 continue;
7678                         }
7679                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7680                                 process_extent_item(root, extent_cache, buf,
7681                                                     i);
7682                                 continue;
7683                         }
7684                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7685                                 total_csum_bytes +=
7686                                         btrfs_item_size_nr(buf, i);
7687                                 continue;
7688                         }
7689                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7690                                 process_chunk_item(chunk_cache, &key, buf, i);
7691                                 continue;
7692                         }
7693                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7694                                 process_device_item(dev_cache, &key, buf, i);
7695                                 continue;
7696                         }
7697                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7698                                 process_block_group_item(block_group_cache,
7699                                         &key, buf, i);
7700                                 continue;
7701                         }
7702                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7703                                 process_device_extent_item(dev_extent_cache,
7704                                         &key, buf, i);
7705                                 continue;
7706
7707                         }
7708                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7709 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7710                                 process_extent_ref_v0(extent_cache, buf, i);
7711 #else
7712                                 BUG();
7713 #endif
7714                                 continue;
7715                         }
7716
7717                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7718                                 ret = add_tree_backref(extent_cache,
7719                                                 key.objectid, 0, key.offset, 0);
7720                                 if (ret < 0)
7721                                         error("add_tree_backref failed: %s",
7722                                               strerror(-ret));
7723                                 continue;
7724                         }
7725                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7726                                 ret = add_tree_backref(extent_cache,
7727                                                 key.objectid, key.offset, 0, 0);
7728                                 if (ret < 0)
7729                                         error("add_tree_backref failed: %s",
7730                                               strerror(-ret));
7731                                 continue;
7732                         }
7733                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7734                                 struct btrfs_extent_data_ref *ref;
7735                                 ref = btrfs_item_ptr(buf, i,
7736                                                 struct btrfs_extent_data_ref);
7737                                 add_data_backref(extent_cache,
7738                                         key.objectid, 0,
7739                                         btrfs_extent_data_ref_root(buf, ref),
7740                                         btrfs_extent_data_ref_objectid(buf,
7741                                                                        ref),
7742                                         btrfs_extent_data_ref_offset(buf, ref),
7743                                         btrfs_extent_data_ref_count(buf, ref),
7744                                         0, root->sectorsize);
7745                                 continue;
7746                         }
7747                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7748                                 struct btrfs_shared_data_ref *ref;
7749                                 ref = btrfs_item_ptr(buf, i,
7750                                                 struct btrfs_shared_data_ref);
7751                                 add_data_backref(extent_cache,
7752                                         key.objectid, key.offset, 0, 0, 0,
7753                                         btrfs_shared_data_ref_count(buf, ref),
7754                                         0, root->sectorsize);
7755                                 continue;
7756                         }
7757                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7758                                 struct bad_item *bad;
7759
7760                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7761                                         continue;
7762                                 if (!owner)
7763                                         continue;
7764                                 bad = malloc(sizeof(struct bad_item));
7765                                 if (!bad)
7766                                         continue;
7767                                 INIT_LIST_HEAD(&bad->list);
7768                                 memcpy(&bad->key, &key,
7769                                        sizeof(struct btrfs_key));
7770                                 bad->root_id = owner;
7771                                 list_add_tail(&bad->list, &delete_items);
7772                                 continue;
7773                         }
7774                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7775                                 continue;
7776                         fi = btrfs_item_ptr(buf, i,
7777                                             struct btrfs_file_extent_item);
7778                         if (btrfs_file_extent_type(buf, fi) ==
7779                             BTRFS_FILE_EXTENT_INLINE)
7780                                 continue;
7781                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7782                                 continue;
7783
7784                         data_bytes_allocated +=
7785                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7786                         if (data_bytes_allocated < root->sectorsize) {
7787                                 abort();
7788                         }
7789                         data_bytes_referenced +=
7790                                 btrfs_file_extent_num_bytes(buf, fi);
7791                         add_data_backref(extent_cache,
7792                                 btrfs_file_extent_disk_bytenr(buf, fi),
7793                                 parent, owner, key.objectid, key.offset -
7794                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7795                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7796                 }
7797         } else {
7798                 int level;
7799                 struct btrfs_key first_key;
7800
7801                 first_key.objectid = 0;
7802
7803                 if (nritems > 0)
7804                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7805                 level = btrfs_header_level(buf);
7806                 for (i = 0; i < nritems; i++) {
7807                         struct extent_record tmpl;
7808
7809                         ptr = btrfs_node_blockptr(buf, i);
7810                         size = root->nodesize;
7811                         btrfs_node_key_to_cpu(buf, &key, i);
7812                         if (ri != NULL) {
7813                                 if ((level == ri->drop_level)
7814                                     && is_dropped_key(&key, &ri->drop_key)) {
7815                                         continue;
7816                                 }
7817                         }
7818
7819                         memset(&tmpl, 0, sizeof(tmpl));
7820                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7821                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7822                         tmpl.start = ptr;
7823                         tmpl.nr = size;
7824                         tmpl.refs = 1;
7825                         tmpl.metadata = 1;
7826                         tmpl.max_size = size;
7827                         ret = add_extent_rec(extent_cache, &tmpl);
7828                         if (ret < 0)
7829                                 goto out;
7830
7831                         ret = add_tree_backref(extent_cache, ptr, parent,
7832                                         owner, 1);
7833                         if (ret < 0) {
7834                                 error("add_tree_backref failed: %s",
7835                                       strerror(-ret));
7836                                 continue;
7837                         }
7838
7839                         if (level > 1) {
7840                                 add_pending(nodes, seen, ptr, size);
7841                         } else {
7842                                 add_pending(pending, seen, ptr, size);
7843                         }
7844                 }
7845                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7846                                       nritems) * sizeof(struct btrfs_key_ptr);
7847         }
7848         total_btree_bytes += buf->len;
7849         if (fs_root_objectid(btrfs_header_owner(buf)))
7850                 total_fs_tree_bytes += buf->len;
7851         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7852                 total_extent_tree_bytes += buf->len;
7853         if (!found_old_backref &&
7854             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7855             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7856             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7857                 found_old_backref = 1;
7858 out:
7859         free_extent_buffer(buf);
7860         return ret;
7861 }
7862
7863 static int add_root_to_pending(struct extent_buffer *buf,
7864                                struct cache_tree *extent_cache,
7865                                struct cache_tree *pending,
7866                                struct cache_tree *seen,
7867                                struct cache_tree *nodes,
7868                                u64 objectid)
7869 {
7870         struct extent_record tmpl;
7871         int ret;
7872
7873         if (btrfs_header_level(buf) > 0)
7874                 add_pending(nodes, seen, buf->start, buf->len);
7875         else
7876                 add_pending(pending, seen, buf->start, buf->len);
7877
7878         memset(&tmpl, 0, sizeof(tmpl));
7879         tmpl.start = buf->start;
7880         tmpl.nr = buf->len;
7881         tmpl.is_root = 1;
7882         tmpl.refs = 1;
7883         tmpl.metadata = 1;
7884         tmpl.max_size = buf->len;
7885         add_extent_rec(extent_cache, &tmpl);
7886
7887         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7888             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7889                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7890                                 0, 1);
7891         else
7892                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7893                                 1);
7894         return ret;
7895 }
7896
7897 /* as we fix the tree, we might be deleting blocks that
7898  * we're tracking for repair.  This hook makes sure we
7899  * remove any backrefs for blocks as we are fixing them.
7900  */
7901 static int free_extent_hook(struct btrfs_trans_handle *trans,
7902                             struct btrfs_root *root,
7903                             u64 bytenr, u64 num_bytes, u64 parent,
7904                             u64 root_objectid, u64 owner, u64 offset,
7905                             int refs_to_drop)
7906 {
7907         struct extent_record *rec;
7908         struct cache_extent *cache;
7909         int is_data;
7910         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7911
7912         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7913         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7914         if (!cache)
7915                 return 0;
7916
7917         rec = container_of(cache, struct extent_record, cache);
7918         if (is_data) {
7919                 struct data_backref *back;
7920                 back = find_data_backref(rec, parent, root_objectid, owner,
7921                                          offset, 1, bytenr, num_bytes);
7922                 if (!back)
7923                         goto out;
7924                 if (back->node.found_ref) {
7925                         back->found_ref -= refs_to_drop;
7926                         if (rec->refs)
7927                                 rec->refs -= refs_to_drop;
7928                 }
7929                 if (back->node.found_extent_tree) {
7930                         back->num_refs -= refs_to_drop;
7931                         if (rec->extent_item_refs)
7932                                 rec->extent_item_refs -= refs_to_drop;
7933                 }
7934                 if (back->found_ref == 0)
7935                         back->node.found_ref = 0;
7936                 if (back->num_refs == 0)
7937                         back->node.found_extent_tree = 0;
7938
7939                 if (!back->node.found_extent_tree && back->node.found_ref) {
7940                         list_del(&back->node.list);
7941                         free(back);
7942                 }
7943         } else {
7944                 struct tree_backref *back;
7945                 back = find_tree_backref(rec, parent, root_objectid);
7946                 if (!back)
7947                         goto out;
7948                 if (back->node.found_ref) {
7949                         if (rec->refs)
7950                                 rec->refs--;
7951                         back->node.found_ref = 0;
7952                 }
7953                 if (back->node.found_extent_tree) {
7954                         if (rec->extent_item_refs)
7955                                 rec->extent_item_refs--;
7956                         back->node.found_extent_tree = 0;
7957                 }
7958                 if (!back->node.found_extent_tree && back->node.found_ref) {
7959                         list_del(&back->node.list);
7960                         free(back);
7961                 }
7962         }
7963         maybe_free_extent_rec(extent_cache, rec);
7964 out:
7965         return 0;
7966 }
7967
7968 static int delete_extent_records(struct btrfs_trans_handle *trans,
7969                                  struct btrfs_root *root,
7970                                  struct btrfs_path *path,
7971                                  u64 bytenr)
7972 {
7973         struct btrfs_key key;
7974         struct btrfs_key found_key;
7975         struct extent_buffer *leaf;
7976         int ret;
7977         int slot;
7978
7979
7980         key.objectid = bytenr;
7981         key.type = (u8)-1;
7982         key.offset = (u64)-1;
7983
7984         while(1) {
7985                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7986                                         &key, path, 0, 1);
7987                 if (ret < 0)
7988                         break;
7989
7990                 if (ret > 0) {
7991                         ret = 0;
7992                         if (path->slots[0] == 0)
7993                                 break;
7994                         path->slots[0]--;
7995                 }
7996                 ret = 0;
7997
7998                 leaf = path->nodes[0];
7999                 slot = path->slots[0];
8000
8001                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8002                 if (found_key.objectid != bytenr)
8003                         break;
8004
8005                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8006                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8007                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8008                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8009                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8010                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8011                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8012                         btrfs_release_path(path);
8013                         if (found_key.type == 0) {
8014                                 if (found_key.offset == 0)
8015                                         break;
8016                                 key.offset = found_key.offset - 1;
8017                                 key.type = found_key.type;
8018                         }
8019                         key.type = found_key.type - 1;
8020                         key.offset = (u64)-1;
8021                         continue;
8022                 }
8023
8024                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8025                         found_key.objectid, found_key.type, found_key.offset);
8026
8027                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8028                 if (ret)
8029                         break;
8030                 btrfs_release_path(path);
8031
8032                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8033                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8034                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8035                                 found_key.offset : root->nodesize;
8036
8037                         ret = btrfs_update_block_group(trans, root, bytenr,
8038                                                        bytes, 0, 0);
8039                         if (ret)
8040                                 break;
8041                 }
8042         }
8043
8044         btrfs_release_path(path);
8045         return ret;
8046 }
8047
8048 /*
8049  * for a single backref, this will allocate a new extent
8050  * and add the backref to it.
8051  */
8052 static int record_extent(struct btrfs_trans_handle *trans,
8053                          struct btrfs_fs_info *info,
8054                          struct btrfs_path *path,
8055                          struct extent_record *rec,
8056                          struct extent_backref *back,
8057                          int allocated, u64 flags)
8058 {
8059         int ret = 0;
8060         struct btrfs_root *extent_root = info->extent_root;
8061         struct extent_buffer *leaf;
8062         struct btrfs_key ins_key;
8063         struct btrfs_extent_item *ei;
8064         struct data_backref *dback;
8065         struct btrfs_tree_block_info *bi;
8066
8067         if (!back->is_data)
8068                 rec->max_size = max_t(u64, rec->max_size,
8069                                     info->extent_root->nodesize);
8070
8071         if (!allocated) {
8072                 u32 item_size = sizeof(*ei);
8073
8074                 if (!back->is_data)
8075                         item_size += sizeof(*bi);
8076
8077                 ins_key.objectid = rec->start;
8078                 ins_key.offset = rec->max_size;
8079                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8080
8081                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8082                                         &ins_key, item_size);
8083                 if (ret)
8084                         goto fail;
8085
8086                 leaf = path->nodes[0];
8087                 ei = btrfs_item_ptr(leaf, path->slots[0],
8088                                     struct btrfs_extent_item);
8089
8090                 btrfs_set_extent_refs(leaf, ei, 0);
8091                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8092
8093                 if (back->is_data) {
8094                         btrfs_set_extent_flags(leaf, ei,
8095                                                BTRFS_EXTENT_FLAG_DATA);
8096                 } else {
8097                         struct btrfs_disk_key copy_key;;
8098
8099                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8100                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8101                                              sizeof(*bi));
8102
8103                         btrfs_set_disk_key_objectid(&copy_key,
8104                                                     rec->info_objectid);
8105                         btrfs_set_disk_key_type(&copy_key, 0);
8106                         btrfs_set_disk_key_offset(&copy_key, 0);
8107
8108                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8109                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8110
8111                         btrfs_set_extent_flags(leaf, ei,
8112                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8113                 }
8114
8115                 btrfs_mark_buffer_dirty(leaf);
8116                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8117                                                rec->max_size, 1, 0);
8118                 if (ret)
8119                         goto fail;
8120                 btrfs_release_path(path);
8121         }
8122
8123         if (back->is_data) {
8124                 u64 parent;
8125                 int i;
8126
8127                 dback = to_data_backref(back);
8128                 if (back->full_backref)
8129                         parent = dback->parent;
8130                 else
8131                         parent = 0;
8132
8133                 for (i = 0; i < dback->found_ref; i++) {
8134                         /* if parent != 0, we're doing a full backref
8135                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8136                          * just makes the backref allocator create a data
8137                          * backref
8138                          */
8139                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8140                                                    rec->start, rec->max_size,
8141                                                    parent,
8142                                                    dback->root,
8143                                                    parent ?
8144                                                    BTRFS_FIRST_FREE_OBJECTID :
8145                                                    dback->owner,
8146                                                    dback->offset);
8147                         if (ret)
8148                                 break;
8149                 }
8150                 fprintf(stderr, "adding new data backref"
8151                                 " on %llu %s %llu owner %llu"
8152                                 " offset %llu found %d\n",
8153                                 (unsigned long long)rec->start,
8154                                 back->full_backref ?
8155                                 "parent" : "root",
8156                                 back->full_backref ?
8157                                 (unsigned long long)parent :
8158                                 (unsigned long long)dback->root,
8159                                 (unsigned long long)dback->owner,
8160                                 (unsigned long long)dback->offset,
8161                                 dback->found_ref);
8162         } else {
8163                 u64 parent;
8164                 struct tree_backref *tback;
8165
8166                 tback = to_tree_backref(back);
8167                 if (back->full_backref)
8168                         parent = tback->parent;
8169                 else
8170                         parent = 0;
8171
8172                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8173                                            rec->start, rec->max_size,
8174                                            parent, tback->root, 0, 0);
8175                 fprintf(stderr, "adding new tree backref on "
8176                         "start %llu len %llu parent %llu root %llu\n",
8177                         rec->start, rec->max_size, parent, tback->root);
8178         }
8179 fail:
8180         btrfs_release_path(path);
8181         return ret;
8182 }
8183
8184 static struct extent_entry *find_entry(struct list_head *entries,
8185                                        u64 bytenr, u64 bytes)
8186 {
8187         struct extent_entry *entry = NULL;
8188
8189         list_for_each_entry(entry, entries, list) {
8190                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8191                         return entry;
8192         }
8193
8194         return NULL;
8195 }
8196
8197 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8198 {
8199         struct extent_entry *entry, *best = NULL, *prev = NULL;
8200
8201         list_for_each_entry(entry, entries, list) {
8202                 /*
8203                  * If there are as many broken entries as entries then we know
8204                  * not to trust this particular entry.
8205                  */
8206                 if (entry->broken == entry->count)
8207                         continue;
8208
8209                 /*
8210                  * Special case, when there are only two entries and 'best' is
8211                  * the first one
8212                  */
8213                 if (!prev) {
8214                         best = entry;
8215                         prev = entry;
8216                         continue;
8217                 }
8218
8219                 /*
8220                  * If our current entry == best then we can't be sure our best
8221                  * is really the best, so we need to keep searching.
8222                  */
8223                 if (best && best->count == entry->count) {
8224                         prev = entry;
8225                         best = NULL;
8226                         continue;
8227                 }
8228
8229                 /* Prev == entry, not good enough, have to keep searching */
8230                 if (!prev->broken && prev->count == entry->count)
8231                         continue;
8232
8233                 if (!best)
8234                         best = (prev->count > entry->count) ? prev : entry;
8235                 else if (best->count < entry->count)
8236                         best = entry;
8237                 prev = entry;
8238         }
8239
8240         return best;
8241 }
8242
8243 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8244                       struct data_backref *dback, struct extent_entry *entry)
8245 {
8246         struct btrfs_trans_handle *trans;
8247         struct btrfs_root *root;
8248         struct btrfs_file_extent_item *fi;
8249         struct extent_buffer *leaf;
8250         struct btrfs_key key;
8251         u64 bytenr, bytes;
8252         int ret, err;
8253
8254         key.objectid = dback->root;
8255         key.type = BTRFS_ROOT_ITEM_KEY;
8256         key.offset = (u64)-1;
8257         root = btrfs_read_fs_root(info, &key);
8258         if (IS_ERR(root)) {
8259                 fprintf(stderr, "Couldn't find root for our ref\n");
8260                 return -EINVAL;
8261         }
8262
8263         /*
8264          * The backref points to the original offset of the extent if it was
8265          * split, so we need to search down to the offset we have and then walk
8266          * forward until we find the backref we're looking for.
8267          */
8268         key.objectid = dback->owner;
8269         key.type = BTRFS_EXTENT_DATA_KEY;
8270         key.offset = dback->offset;
8271         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8272         if (ret < 0) {
8273                 fprintf(stderr, "Error looking up ref %d\n", ret);
8274                 return ret;
8275         }
8276
8277         while (1) {
8278                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8279                         ret = btrfs_next_leaf(root, path);
8280                         if (ret) {
8281                                 fprintf(stderr, "Couldn't find our ref, next\n");
8282                                 return -EINVAL;
8283                         }
8284                 }
8285                 leaf = path->nodes[0];
8286                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8287                 if (key.objectid != dback->owner ||
8288                     key.type != BTRFS_EXTENT_DATA_KEY) {
8289                         fprintf(stderr, "Couldn't find our ref, search\n");
8290                         return -EINVAL;
8291                 }
8292                 fi = btrfs_item_ptr(leaf, path->slots[0],
8293                                     struct btrfs_file_extent_item);
8294                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8295                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8296
8297                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8298                         break;
8299                 path->slots[0]++;
8300         }
8301
8302         btrfs_release_path(path);
8303
8304         trans = btrfs_start_transaction(root, 1);
8305         if (IS_ERR(trans))
8306                 return PTR_ERR(trans);
8307
8308         /*
8309          * Ok we have the key of the file extent we want to fix, now we can cow
8310          * down to the thing and fix it.
8311          */
8312         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8313         if (ret < 0) {
8314                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8315                         key.objectid, key.type, key.offset, ret);
8316                 goto out;
8317         }
8318         if (ret > 0) {
8319                 fprintf(stderr, "Well that's odd, we just found this key "
8320                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8321                         key.offset);
8322                 ret = -EINVAL;
8323                 goto out;
8324         }
8325         leaf = path->nodes[0];
8326         fi = btrfs_item_ptr(leaf, path->slots[0],
8327                             struct btrfs_file_extent_item);
8328
8329         if (btrfs_file_extent_compression(leaf, fi) &&
8330             dback->disk_bytenr != entry->bytenr) {
8331                 fprintf(stderr, "Ref doesn't match the record start and is "
8332                         "compressed, please take a btrfs-image of this file "
8333                         "system and send it to a btrfs developer so they can "
8334                         "complete this functionality for bytenr %Lu\n",
8335                         dback->disk_bytenr);
8336                 ret = -EINVAL;
8337                 goto out;
8338         }
8339
8340         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8341                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8342         } else if (dback->disk_bytenr > entry->bytenr) {
8343                 u64 off_diff, offset;
8344
8345                 off_diff = dback->disk_bytenr - entry->bytenr;
8346                 offset = btrfs_file_extent_offset(leaf, fi);
8347                 if (dback->disk_bytenr + offset +
8348                     btrfs_file_extent_num_bytes(leaf, fi) >
8349                     entry->bytenr + entry->bytes) {
8350                         fprintf(stderr, "Ref is past the entry end, please "
8351                                 "take a btrfs-image of this file system and "
8352                                 "send it to a btrfs developer, ref %Lu\n",
8353                                 dback->disk_bytenr);
8354                         ret = -EINVAL;
8355                         goto out;
8356                 }
8357                 offset += off_diff;
8358                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8359                 btrfs_set_file_extent_offset(leaf, fi, offset);
8360         } else if (dback->disk_bytenr < entry->bytenr) {
8361                 u64 offset;
8362
8363                 offset = btrfs_file_extent_offset(leaf, fi);
8364                 if (dback->disk_bytenr + offset < entry->bytenr) {
8365                         fprintf(stderr, "Ref is before the entry start, please"
8366                                 " take a btrfs-image of this file system and "
8367                                 "send it to a btrfs developer, ref %Lu\n",
8368                                 dback->disk_bytenr);
8369                         ret = -EINVAL;
8370                         goto out;
8371                 }
8372
8373                 offset += dback->disk_bytenr;
8374                 offset -= entry->bytenr;
8375                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8376                 btrfs_set_file_extent_offset(leaf, fi, offset);
8377         }
8378
8379         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8380
8381         /*
8382          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8383          * only do this if we aren't using compression, otherwise it's a
8384          * trickier case.
8385          */
8386         if (!btrfs_file_extent_compression(leaf, fi))
8387                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8388         else
8389                 printf("ram bytes may be wrong?\n");
8390         btrfs_mark_buffer_dirty(leaf);
8391 out:
8392         err = btrfs_commit_transaction(trans, root);
8393         btrfs_release_path(path);
8394         return ret ? ret : err;
8395 }
8396
8397 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8398                            struct extent_record *rec)
8399 {
8400         struct extent_backref *back;
8401         struct data_backref *dback;
8402         struct extent_entry *entry, *best = NULL;
8403         LIST_HEAD(entries);
8404         int nr_entries = 0;
8405         int broken_entries = 0;
8406         int ret = 0;
8407         short mismatch = 0;
8408
8409         /*
8410          * Metadata is easy and the backrefs should always agree on bytenr and
8411          * size, if not we've got bigger issues.
8412          */
8413         if (rec->metadata)
8414                 return 0;
8415
8416         list_for_each_entry(back, &rec->backrefs, list) {
8417                 if (back->full_backref || !back->is_data)
8418                         continue;
8419
8420                 dback = to_data_backref(back);
8421
8422                 /*
8423                  * We only pay attention to backrefs that we found a real
8424                  * backref for.
8425                  */
8426                 if (dback->found_ref == 0)
8427                         continue;
8428
8429                 /*
8430                  * For now we only catch when the bytes don't match, not the
8431                  * bytenr.  We can easily do this at the same time, but I want
8432                  * to have a fs image to test on before we just add repair
8433                  * functionality willy-nilly so we know we won't screw up the
8434                  * repair.
8435                  */
8436
8437                 entry = find_entry(&entries, dback->disk_bytenr,
8438                                    dback->bytes);
8439                 if (!entry) {
8440                         entry = malloc(sizeof(struct extent_entry));
8441                         if (!entry) {
8442                                 ret = -ENOMEM;
8443                                 goto out;
8444                         }
8445                         memset(entry, 0, sizeof(*entry));
8446                         entry->bytenr = dback->disk_bytenr;
8447                         entry->bytes = dback->bytes;
8448                         list_add_tail(&entry->list, &entries);
8449                         nr_entries++;
8450                 }
8451
8452                 /*
8453                  * If we only have on entry we may think the entries agree when
8454                  * in reality they don't so we have to do some extra checking.
8455                  */
8456                 if (dback->disk_bytenr != rec->start ||
8457                     dback->bytes != rec->nr || back->broken)
8458                         mismatch = 1;
8459
8460                 if (back->broken) {
8461                         entry->broken++;
8462                         broken_entries++;
8463                 }
8464
8465                 entry->count++;
8466         }
8467
8468         /* Yay all the backrefs agree, carry on good sir */
8469         if (nr_entries <= 1 && !mismatch)
8470                 goto out;
8471
8472         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8473                 "%Lu\n", rec->start);
8474
8475         /*
8476          * First we want to see if the backrefs can agree amongst themselves who
8477          * is right, so figure out which one of the entries has the highest
8478          * count.
8479          */
8480         best = find_most_right_entry(&entries);
8481
8482         /*
8483          * Ok so we may have an even split between what the backrefs think, so
8484          * this is where we use the extent ref to see what it thinks.
8485          */
8486         if (!best) {
8487                 entry = find_entry(&entries, rec->start, rec->nr);
8488                 if (!entry && (!broken_entries || !rec->found_rec)) {
8489                         fprintf(stderr, "Backrefs don't agree with each other "
8490                                 "and extent record doesn't agree with anybody,"
8491                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8492                                 rec->start, rec->nr);
8493                         ret = -EINVAL;
8494                         goto out;
8495                 } else if (!entry) {
8496                         /*
8497                          * Ok our backrefs were broken, we'll assume this is the
8498                          * correct value and add an entry for this range.
8499                          */
8500                         entry = malloc(sizeof(struct extent_entry));
8501                         if (!entry) {
8502                                 ret = -ENOMEM;
8503                                 goto out;
8504                         }
8505                         memset(entry, 0, sizeof(*entry));
8506                         entry->bytenr = rec->start;
8507                         entry->bytes = rec->nr;
8508                         list_add_tail(&entry->list, &entries);
8509                         nr_entries++;
8510                 }
8511                 entry->count++;
8512                 best = find_most_right_entry(&entries);
8513                 if (!best) {
8514                         fprintf(stderr, "Backrefs and extent record evenly "
8515                                 "split on who is right, this is going to "
8516                                 "require user input to fix bytenr %Lu bytes "
8517                                 "%Lu\n", rec->start, rec->nr);
8518                         ret = -EINVAL;
8519                         goto out;
8520                 }
8521         }
8522
8523         /*
8524          * I don't think this can happen currently as we'll abort() if we catch
8525          * this case higher up, but in case somebody removes that we still can't
8526          * deal with it properly here yet, so just bail out of that's the case.
8527          */
8528         if (best->bytenr != rec->start) {
8529                 fprintf(stderr, "Extent start and backref starts don't match, "
8530                         "please use btrfs-image on this file system and send "
8531                         "it to a btrfs developer so they can make fsck fix "
8532                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8533                         rec->start, rec->nr);
8534                 ret = -EINVAL;
8535                 goto out;
8536         }
8537
8538         /*
8539          * Ok great we all agreed on an extent record, let's go find the real
8540          * references and fix up the ones that don't match.
8541          */
8542         list_for_each_entry(back, &rec->backrefs, list) {
8543                 if (back->full_backref || !back->is_data)
8544                         continue;
8545
8546                 dback = to_data_backref(back);
8547
8548                 /*
8549                  * Still ignoring backrefs that don't have a real ref attached
8550                  * to them.
8551                  */
8552                 if (dback->found_ref == 0)
8553                         continue;
8554
8555                 if (dback->bytes == best->bytes &&
8556                     dback->disk_bytenr == best->bytenr)
8557                         continue;
8558
8559                 ret = repair_ref(info, path, dback, best);
8560                 if (ret)
8561                         goto out;
8562         }
8563
8564         /*
8565          * Ok we messed with the actual refs, which means we need to drop our
8566          * entire cache and go back and rescan.  I know this is a huge pain and
8567          * adds a lot of extra work, but it's the only way to be safe.  Once all
8568          * the backrefs agree we may not need to do anything to the extent
8569          * record itself.
8570          */
8571         ret = -EAGAIN;
8572 out:
8573         while (!list_empty(&entries)) {
8574                 entry = list_entry(entries.next, struct extent_entry, list);
8575                 list_del_init(&entry->list);
8576                 free(entry);
8577         }
8578         return ret;
8579 }
8580
8581 static int process_duplicates(struct btrfs_root *root,
8582                               struct cache_tree *extent_cache,
8583                               struct extent_record *rec)
8584 {
8585         struct extent_record *good, *tmp;
8586         struct cache_extent *cache;
8587         int ret;
8588
8589         /*
8590          * If we found a extent record for this extent then return, or if we
8591          * have more than one duplicate we are likely going to need to delete
8592          * something.
8593          */
8594         if (rec->found_rec || rec->num_duplicates > 1)
8595                 return 0;
8596
8597         /* Shouldn't happen but just in case */
8598         BUG_ON(!rec->num_duplicates);
8599
8600         /*
8601          * So this happens if we end up with a backref that doesn't match the
8602          * actual extent entry.  So either the backref is bad or the extent
8603          * entry is bad.  Either way we want to have the extent_record actually
8604          * reflect what we found in the extent_tree, so we need to take the
8605          * duplicate out and use that as the extent_record since the only way we
8606          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8607          */
8608         remove_cache_extent(extent_cache, &rec->cache);
8609
8610         good = to_extent_record(rec->dups.next);
8611         list_del_init(&good->list);
8612         INIT_LIST_HEAD(&good->backrefs);
8613         INIT_LIST_HEAD(&good->dups);
8614         good->cache.start = good->start;
8615         good->cache.size = good->nr;
8616         good->content_checked = 0;
8617         good->owner_ref_checked = 0;
8618         good->num_duplicates = 0;
8619         good->refs = rec->refs;
8620         list_splice_init(&rec->backrefs, &good->backrefs);
8621         while (1) {
8622                 cache = lookup_cache_extent(extent_cache, good->start,
8623                                             good->nr);
8624                 if (!cache)
8625                         break;
8626                 tmp = container_of(cache, struct extent_record, cache);
8627
8628                 /*
8629                  * If we find another overlapping extent and it's found_rec is
8630                  * set then it's a duplicate and we need to try and delete
8631                  * something.
8632                  */
8633                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8634                         if (list_empty(&good->list))
8635                                 list_add_tail(&good->list,
8636                                               &duplicate_extents);
8637                         good->num_duplicates += tmp->num_duplicates + 1;
8638                         list_splice_init(&tmp->dups, &good->dups);
8639                         list_del_init(&tmp->list);
8640                         list_add_tail(&tmp->list, &good->dups);
8641                         remove_cache_extent(extent_cache, &tmp->cache);
8642                         continue;
8643                 }
8644
8645                 /*
8646                  * Ok we have another non extent item backed extent rec, so lets
8647                  * just add it to this extent and carry on like we did above.
8648                  */
8649                 good->refs += tmp->refs;
8650                 list_splice_init(&tmp->backrefs, &good->backrefs);
8651                 remove_cache_extent(extent_cache, &tmp->cache);
8652                 free(tmp);
8653         }
8654         ret = insert_cache_extent(extent_cache, &good->cache);
8655         BUG_ON(ret);
8656         free(rec);
8657         return good->num_duplicates ? 0 : 1;
8658 }
8659
8660 static int delete_duplicate_records(struct btrfs_root *root,
8661                                     struct extent_record *rec)
8662 {
8663         struct btrfs_trans_handle *trans;
8664         LIST_HEAD(delete_list);
8665         struct btrfs_path path;
8666         struct extent_record *tmp, *good, *n;
8667         int nr_del = 0;
8668         int ret = 0, err;
8669         struct btrfs_key key;
8670
8671         btrfs_init_path(&path);
8672
8673         good = rec;
8674         /* Find the record that covers all of the duplicates. */
8675         list_for_each_entry(tmp, &rec->dups, list) {
8676                 if (good->start < tmp->start)
8677                         continue;
8678                 if (good->nr > tmp->nr)
8679                         continue;
8680
8681                 if (tmp->start + tmp->nr < good->start + good->nr) {
8682                         fprintf(stderr, "Ok we have overlapping extents that "
8683                                 "aren't completely covered by each other, this "
8684                                 "is going to require more careful thought.  "
8685                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8686                                 tmp->start, tmp->nr, good->start, good->nr);
8687                         abort();
8688                 }
8689                 good = tmp;
8690         }
8691
8692         if (good != rec)
8693                 list_add_tail(&rec->list, &delete_list);
8694
8695         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8696                 if (tmp == good)
8697                         continue;
8698                 list_move_tail(&tmp->list, &delete_list);
8699         }
8700
8701         root = root->fs_info->extent_root;
8702         trans = btrfs_start_transaction(root, 1);
8703         if (IS_ERR(trans)) {
8704                 ret = PTR_ERR(trans);
8705                 goto out;
8706         }
8707
8708         list_for_each_entry(tmp, &delete_list, list) {
8709                 if (tmp->found_rec == 0)
8710                         continue;
8711                 key.objectid = tmp->start;
8712                 key.type = BTRFS_EXTENT_ITEM_KEY;
8713                 key.offset = tmp->nr;
8714
8715                 /* Shouldn't happen but just in case */
8716                 if (tmp->metadata) {
8717                         fprintf(stderr, "Well this shouldn't happen, extent "
8718                                 "record overlaps but is metadata? "
8719                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8720                         abort();
8721                 }
8722
8723                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8724                 if (ret) {
8725                         if (ret > 0)
8726                                 ret = -EINVAL;
8727                         break;
8728                 }
8729                 ret = btrfs_del_item(trans, root, &path);
8730                 if (ret)
8731                         break;
8732                 btrfs_release_path(&path);
8733                 nr_del++;
8734         }
8735         err = btrfs_commit_transaction(trans, root);
8736         if (err && !ret)
8737                 ret = err;
8738 out:
8739         while (!list_empty(&delete_list)) {
8740                 tmp = to_extent_record(delete_list.next);
8741                 list_del_init(&tmp->list);
8742                 if (tmp == rec)
8743                         continue;
8744                 free(tmp);
8745         }
8746
8747         while (!list_empty(&rec->dups)) {
8748                 tmp = to_extent_record(rec->dups.next);
8749                 list_del_init(&tmp->list);
8750                 free(tmp);
8751         }
8752
8753         btrfs_release_path(&path);
8754
8755         if (!ret && !nr_del)
8756                 rec->num_duplicates = 0;
8757
8758         return ret ? ret : nr_del;
8759 }
8760
8761 static int find_possible_backrefs(struct btrfs_fs_info *info,
8762                                   struct btrfs_path *path,
8763                                   struct cache_tree *extent_cache,
8764                                   struct extent_record *rec)
8765 {
8766         struct btrfs_root *root;
8767         struct extent_backref *back;
8768         struct data_backref *dback;
8769         struct cache_extent *cache;
8770         struct btrfs_file_extent_item *fi;
8771         struct btrfs_key key;
8772         u64 bytenr, bytes;
8773         int ret;
8774
8775         list_for_each_entry(back, &rec->backrefs, list) {
8776                 /* Don't care about full backrefs (poor unloved backrefs) */
8777                 if (back->full_backref || !back->is_data)
8778                         continue;
8779
8780                 dback = to_data_backref(back);
8781
8782                 /* We found this one, we don't need to do a lookup */
8783                 if (dback->found_ref)
8784                         continue;
8785
8786                 key.objectid = dback->root;
8787                 key.type = BTRFS_ROOT_ITEM_KEY;
8788                 key.offset = (u64)-1;
8789
8790                 root = btrfs_read_fs_root(info, &key);
8791
8792                 /* No root, definitely a bad ref, skip */
8793                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8794                         continue;
8795                 /* Other err, exit */
8796                 if (IS_ERR(root))
8797                         return PTR_ERR(root);
8798
8799                 key.objectid = dback->owner;
8800                 key.type = BTRFS_EXTENT_DATA_KEY;
8801                 key.offset = dback->offset;
8802                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8803                 if (ret) {
8804                         btrfs_release_path(path);
8805                         if (ret < 0)
8806                                 return ret;
8807                         /* Didn't find it, we can carry on */
8808                         ret = 0;
8809                         continue;
8810                 }
8811
8812                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8813                                     struct btrfs_file_extent_item);
8814                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8815                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8816                 btrfs_release_path(path);
8817                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8818                 if (cache) {
8819                         struct extent_record *tmp;
8820                         tmp = container_of(cache, struct extent_record, cache);
8821
8822                         /*
8823                          * If we found an extent record for the bytenr for this
8824                          * particular backref then we can't add it to our
8825                          * current extent record.  We only want to add backrefs
8826                          * that don't have a corresponding extent item in the
8827                          * extent tree since they likely belong to this record
8828                          * and we need to fix it if it doesn't match bytenrs.
8829                          */
8830                         if  (tmp->found_rec)
8831                                 continue;
8832                 }
8833
8834                 dback->found_ref += 1;
8835                 dback->disk_bytenr = bytenr;
8836                 dback->bytes = bytes;
8837
8838                 /*
8839                  * Set this so the verify backref code knows not to trust the
8840                  * values in this backref.
8841                  */
8842                 back->broken = 1;
8843         }
8844
8845         return 0;
8846 }
8847
8848 /*
8849  * Record orphan data ref into corresponding root.
8850  *
8851  * Return 0 if the extent item contains data ref and recorded.
8852  * Return 1 if the extent item contains no useful data ref
8853  *   On that case, it may contains only shared_dataref or metadata backref
8854  *   or the file extent exists(this should be handled by the extent bytenr
8855  *   recovery routine)
8856  * Return <0 if something goes wrong.
8857  */
8858 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8859                                       struct extent_record *rec)
8860 {
8861         struct btrfs_key key;
8862         struct btrfs_root *dest_root;
8863         struct extent_backref *back;
8864         struct data_backref *dback;
8865         struct orphan_data_extent *orphan;
8866         struct btrfs_path path;
8867         int recorded_data_ref = 0;
8868         int ret = 0;
8869
8870         if (rec->metadata)
8871                 return 1;
8872         btrfs_init_path(&path);
8873         list_for_each_entry(back, &rec->backrefs, list) {
8874                 if (back->full_backref || !back->is_data ||
8875                     !back->found_extent_tree)
8876                         continue;
8877                 dback = to_data_backref(back);
8878                 if (dback->found_ref)
8879                         continue;
8880                 key.objectid = dback->root;
8881                 key.type = BTRFS_ROOT_ITEM_KEY;
8882                 key.offset = (u64)-1;
8883
8884                 dest_root = btrfs_read_fs_root(fs_info, &key);
8885
8886                 /* For non-exist root we just skip it */
8887                 if (IS_ERR(dest_root) || !dest_root)
8888                         continue;
8889
8890                 key.objectid = dback->owner;
8891                 key.type = BTRFS_EXTENT_DATA_KEY;
8892                 key.offset = dback->offset;
8893
8894                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8895                 btrfs_release_path(&path);
8896                 /*
8897                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8898                  * we need to record it for inode/file extent rebuild.
8899                  * For ret > 0, we record it only for file extent rebuild.
8900                  * For ret == 0, the file extent exists but only bytenr
8901                  * mismatch, let the original bytenr fix routine to handle,
8902                  * don't record it.
8903                  */
8904                 if (ret == 0)
8905                         continue;
8906                 ret = 0;
8907                 orphan = malloc(sizeof(*orphan));
8908                 if (!orphan) {
8909                         ret = -ENOMEM;
8910                         goto out;
8911                 }
8912                 INIT_LIST_HEAD(&orphan->list);
8913                 orphan->root = dback->root;
8914                 orphan->objectid = dback->owner;
8915                 orphan->offset = dback->offset;
8916                 orphan->disk_bytenr = rec->cache.start;
8917                 orphan->disk_len = rec->cache.size;
8918                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8919                 recorded_data_ref = 1;
8920         }
8921 out:
8922         btrfs_release_path(&path);
8923         if (!ret)
8924                 return !recorded_data_ref;
8925         else
8926                 return ret;
8927 }
8928
8929 /*
8930  * when an incorrect extent item is found, this will delete
8931  * all of the existing entries for it and recreate them
8932  * based on what the tree scan found.
8933  */
8934 static int fixup_extent_refs(struct btrfs_fs_info *info,
8935                              struct cache_tree *extent_cache,
8936                              struct extent_record *rec)
8937 {
8938         struct btrfs_trans_handle *trans = NULL;
8939         int ret;
8940         struct btrfs_path path;
8941         struct list_head *cur = rec->backrefs.next;
8942         struct cache_extent *cache;
8943         struct extent_backref *back;
8944         int allocated = 0;
8945         u64 flags = 0;
8946
8947         if (rec->flag_block_full_backref)
8948                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8949
8950         btrfs_init_path(&path);
8951         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8952                 /*
8953                  * Sometimes the backrefs themselves are so broken they don't
8954                  * get attached to any meaningful rec, so first go back and
8955                  * check any of our backrefs that we couldn't find and throw
8956                  * them into the list if we find the backref so that
8957                  * verify_backrefs can figure out what to do.
8958                  */
8959                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8960                 if (ret < 0)
8961                         goto out;
8962         }
8963
8964         /* step one, make sure all of the backrefs agree */
8965         ret = verify_backrefs(info, &path, rec);
8966         if (ret < 0)
8967                 goto out;
8968
8969         trans = btrfs_start_transaction(info->extent_root, 1);
8970         if (IS_ERR(trans)) {
8971                 ret = PTR_ERR(trans);
8972                 goto out;
8973         }
8974
8975         /* step two, delete all the existing records */
8976         ret = delete_extent_records(trans, info->extent_root, &path,
8977                                     rec->start);
8978
8979         if (ret < 0)
8980                 goto out;
8981
8982         /* was this block corrupt?  If so, don't add references to it */
8983         cache = lookup_cache_extent(info->corrupt_blocks,
8984                                     rec->start, rec->max_size);
8985         if (cache) {
8986                 ret = 0;
8987                 goto out;
8988         }
8989
8990         /* step three, recreate all the refs we did find */
8991         while(cur != &rec->backrefs) {
8992                 back = to_extent_backref(cur);
8993                 cur = cur->next;
8994
8995                 /*
8996                  * if we didn't find any references, don't create a
8997                  * new extent record
8998                  */
8999                 if (!back->found_ref)
9000                         continue;
9001
9002                 rec->bad_full_backref = 0;
9003                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9004                 allocated = 1;
9005
9006                 if (ret)
9007                         goto out;
9008         }
9009 out:
9010         if (trans) {
9011                 int err = btrfs_commit_transaction(trans, info->extent_root);
9012                 if (!ret)
9013                         ret = err;
9014         }
9015
9016         if (!ret)
9017                 fprintf(stderr, "Repaired extent references for %llu\n",
9018                                 (unsigned long long)rec->start);
9019
9020         btrfs_release_path(&path);
9021         return ret;
9022 }
9023
9024 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9025                               struct extent_record *rec)
9026 {
9027         struct btrfs_trans_handle *trans;
9028         struct btrfs_root *root = fs_info->extent_root;
9029         struct btrfs_path path;
9030         struct btrfs_extent_item *ei;
9031         struct btrfs_key key;
9032         u64 flags;
9033         int ret = 0;
9034
9035         key.objectid = rec->start;
9036         if (rec->metadata) {
9037                 key.type = BTRFS_METADATA_ITEM_KEY;
9038                 key.offset = rec->info_level;
9039         } else {
9040                 key.type = BTRFS_EXTENT_ITEM_KEY;
9041                 key.offset = rec->max_size;
9042         }
9043
9044         trans = btrfs_start_transaction(root, 0);
9045         if (IS_ERR(trans))
9046                 return PTR_ERR(trans);
9047
9048         btrfs_init_path(&path);
9049         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9050         if (ret < 0) {
9051                 btrfs_release_path(&path);
9052                 btrfs_commit_transaction(trans, root);
9053                 return ret;
9054         } else if (ret) {
9055                 fprintf(stderr, "Didn't find extent for %llu\n",
9056                         (unsigned long long)rec->start);
9057                 btrfs_release_path(&path);
9058                 btrfs_commit_transaction(trans, root);
9059                 return -ENOENT;
9060         }
9061
9062         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9063                             struct btrfs_extent_item);
9064         flags = btrfs_extent_flags(path.nodes[0], ei);
9065         if (rec->flag_block_full_backref) {
9066                 fprintf(stderr, "setting full backref on %llu\n",
9067                         (unsigned long long)key.objectid);
9068                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9069         } else {
9070                 fprintf(stderr, "clearing full backref on %llu\n",
9071                         (unsigned long long)key.objectid);
9072                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9073         }
9074         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9075         btrfs_mark_buffer_dirty(path.nodes[0]);
9076         btrfs_release_path(&path);
9077         ret = btrfs_commit_transaction(trans, root);
9078         if (!ret)
9079                 fprintf(stderr, "Repaired extent flags for %llu\n",
9080                                 (unsigned long long)rec->start);
9081
9082         return ret;
9083 }
9084
9085 /* right now we only prune from the extent allocation tree */
9086 static int prune_one_block(struct btrfs_trans_handle *trans,
9087                            struct btrfs_fs_info *info,
9088                            struct btrfs_corrupt_block *corrupt)
9089 {
9090         int ret;
9091         struct btrfs_path path;
9092         struct extent_buffer *eb;
9093         u64 found;
9094         int slot;
9095         int nritems;
9096         int level = corrupt->level + 1;
9097
9098         btrfs_init_path(&path);
9099 again:
9100         /* we want to stop at the parent to our busted block */
9101         path.lowest_level = level;
9102
9103         ret = btrfs_search_slot(trans, info->extent_root,
9104                                 &corrupt->key, &path, -1, 1);
9105
9106         if (ret < 0)
9107                 goto out;
9108
9109         eb = path.nodes[level];
9110         if (!eb) {
9111                 ret = -ENOENT;
9112                 goto out;
9113         }
9114
9115         /*
9116          * hopefully the search gave us the block we want to prune,
9117          * lets try that first
9118          */
9119         slot = path.slots[level];
9120         found =  btrfs_node_blockptr(eb, slot);
9121         if (found == corrupt->cache.start)
9122                 goto del_ptr;
9123
9124         nritems = btrfs_header_nritems(eb);
9125
9126         /* the search failed, lets scan this node and hope we find it */
9127         for (slot = 0; slot < nritems; slot++) {
9128                 found =  btrfs_node_blockptr(eb, slot);
9129                 if (found == corrupt->cache.start)
9130                         goto del_ptr;
9131         }
9132         /*
9133          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9134          * to this block
9135          */
9136         if (eb == info->extent_root->node) {
9137                 ret = -ENOENT;
9138                 goto out;
9139         } else {
9140                 level++;
9141                 btrfs_release_path(&path);
9142                 goto again;
9143         }
9144
9145 del_ptr:
9146         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9147         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9148
9149 out:
9150         btrfs_release_path(&path);
9151         return ret;
9152 }
9153
9154 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9155 {
9156         struct btrfs_trans_handle *trans = NULL;
9157         struct cache_extent *cache;
9158         struct btrfs_corrupt_block *corrupt;
9159
9160         while (1) {
9161                 cache = search_cache_extent(info->corrupt_blocks, 0);
9162                 if (!cache)
9163                         break;
9164                 if (!trans) {
9165                         trans = btrfs_start_transaction(info->extent_root, 1);
9166                         if (IS_ERR(trans))
9167                                 return PTR_ERR(trans);
9168                 }
9169                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9170                 prune_one_block(trans, info, corrupt);
9171                 remove_cache_extent(info->corrupt_blocks, cache);
9172         }
9173         if (trans)
9174                 return btrfs_commit_transaction(trans, info->extent_root);
9175         return 0;
9176 }
9177
9178 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9179 {
9180         struct btrfs_block_group_cache *cache;
9181         u64 start, end;
9182         int ret;
9183
9184         while (1) {
9185                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9186                                             &start, &end, EXTENT_DIRTY);
9187                 if (ret)
9188                         break;
9189                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9190                                    GFP_NOFS);
9191         }
9192
9193         start = 0;
9194         while (1) {
9195                 cache = btrfs_lookup_first_block_group(fs_info, start);
9196                 if (!cache)
9197                         break;
9198                 if (cache->cached)
9199                         cache->cached = 0;
9200                 start = cache->key.objectid + cache->key.offset;
9201         }
9202 }
9203
9204 static int check_extent_refs(struct btrfs_root *root,
9205                              struct cache_tree *extent_cache)
9206 {
9207         struct extent_record *rec;
9208         struct cache_extent *cache;
9209         int ret = 0;
9210         int had_dups = 0;
9211
9212         if (repair) {
9213                 /*
9214                  * if we're doing a repair, we have to make sure
9215                  * we don't allocate from the problem extents.
9216                  * In the worst case, this will be all the
9217                  * extents in the FS
9218                  */
9219                 cache = search_cache_extent(extent_cache, 0);
9220                 while(cache) {
9221                         rec = container_of(cache, struct extent_record, cache);
9222                         set_extent_dirty(root->fs_info->excluded_extents,
9223                                          rec->start,
9224                                          rec->start + rec->max_size - 1);
9225                         cache = next_cache_extent(cache);
9226                 }
9227
9228                 /* pin down all the corrupted blocks too */
9229                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9230                 while(cache) {
9231                         set_extent_dirty(root->fs_info->excluded_extents,
9232                                          cache->start,
9233                                          cache->start + cache->size - 1);
9234                         cache = next_cache_extent(cache);
9235                 }
9236                 prune_corrupt_blocks(root->fs_info);
9237                 reset_cached_block_groups(root->fs_info);
9238         }
9239
9240         reset_cached_block_groups(root->fs_info);
9241
9242         /*
9243          * We need to delete any duplicate entries we find first otherwise we
9244          * could mess up the extent tree when we have backrefs that actually
9245          * belong to a different extent item and not the weird duplicate one.
9246          */
9247         while (repair && !list_empty(&duplicate_extents)) {
9248                 rec = to_extent_record(duplicate_extents.next);
9249                 list_del_init(&rec->list);
9250
9251                 /* Sometimes we can find a backref before we find an actual
9252                  * extent, so we need to process it a little bit to see if there
9253                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9254                  * if this is a backref screwup.  If we need to delete stuff
9255                  * process_duplicates() will return 0, otherwise it will return
9256                  * 1 and we
9257                  */
9258                 if (process_duplicates(root, extent_cache, rec))
9259                         continue;
9260                 ret = delete_duplicate_records(root, rec);
9261                 if (ret < 0)
9262                         return ret;
9263                 /*
9264                  * delete_duplicate_records will return the number of entries
9265                  * deleted, so if it's greater than 0 then we know we actually
9266                  * did something and we need to remove.
9267                  */
9268                 if (ret)
9269                         had_dups = 1;
9270         }
9271
9272         if (had_dups)
9273                 return -EAGAIN;
9274
9275         while(1) {
9276                 int cur_err = 0;
9277                 int fix = 0;
9278
9279                 cache = search_cache_extent(extent_cache, 0);
9280                 if (!cache)
9281                         break;
9282                 rec = container_of(cache, struct extent_record, cache);
9283                 if (rec->num_duplicates) {
9284                         fprintf(stderr, "extent item %llu has multiple extent "
9285                                 "items\n", (unsigned long long)rec->start);
9286                         cur_err = 1;
9287                 }
9288
9289                 if (rec->refs != rec->extent_item_refs) {
9290                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9291                                 (unsigned long long)rec->start,
9292                                 (unsigned long long)rec->nr);
9293                         fprintf(stderr, "extent item %llu, found %llu\n",
9294                                 (unsigned long long)rec->extent_item_refs,
9295                                 (unsigned long long)rec->refs);
9296                         ret = record_orphan_data_extents(root->fs_info, rec);
9297                         if (ret < 0)
9298                                 goto repair_abort;
9299                         fix = ret;
9300                         cur_err = 1;
9301                 }
9302                 if (all_backpointers_checked(rec, 1)) {
9303                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9304                                 (unsigned long long)rec->start,
9305                                 (unsigned long long)rec->nr);
9306                         fix = 1;
9307                         cur_err = 1;
9308                 }
9309                 if (!rec->owner_ref_checked) {
9310                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9311                                 (unsigned long long)rec->start,
9312                                 (unsigned long long)rec->nr);
9313                         fix = 1;
9314                         cur_err = 1;
9315                 }
9316
9317                 if (repair && fix) {
9318                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9319                         if (ret)
9320                                 goto repair_abort;
9321                 }
9322
9323
9324                 if (rec->bad_full_backref) {
9325                         fprintf(stderr, "bad full backref, on [%llu]\n",
9326                                 (unsigned long long)rec->start);
9327                         if (repair) {
9328                                 ret = fixup_extent_flags(root->fs_info, rec);
9329                                 if (ret)
9330                                         goto repair_abort;
9331                                 fix = 1;
9332                         }
9333                         cur_err = 1;
9334                 }
9335                 /*
9336                  * Although it's not a extent ref's problem, we reuse this
9337                  * routine for error reporting.
9338                  * No repair function yet.
9339                  */
9340                 if (rec->crossing_stripes) {
9341                         fprintf(stderr,
9342                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9343                                 rec->start, rec->start + rec->max_size);
9344                         cur_err = 1;
9345                 }
9346
9347                 if (rec->wrong_chunk_type) {
9348                         fprintf(stderr,
9349                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9350                                 rec->start, rec->start + rec->max_size);
9351                         cur_err = 1;
9352                 }
9353
9354                 remove_cache_extent(extent_cache, cache);
9355                 free_all_extent_backrefs(rec);
9356                 if (!init_extent_tree && repair && (!cur_err || fix))
9357                         clear_extent_dirty(root->fs_info->excluded_extents,
9358                                            rec->start,
9359                                            rec->start + rec->max_size - 1,
9360                                            GFP_NOFS);
9361                 free(rec);
9362         }
9363 repair_abort:
9364         if (repair) {
9365                 if (ret && ret != -EAGAIN) {
9366                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9367                         exit(1);
9368                 } else if (!ret) {
9369                         struct btrfs_trans_handle *trans;
9370
9371                         root = root->fs_info->extent_root;
9372                         trans = btrfs_start_transaction(root, 1);
9373                         if (IS_ERR(trans)) {
9374                                 ret = PTR_ERR(trans);
9375                                 goto repair_abort;
9376                         }
9377
9378                         btrfs_fix_block_accounting(trans, root);
9379                         ret = btrfs_commit_transaction(trans, root);
9380                         if (ret)
9381                                 goto repair_abort;
9382                 }
9383                 return ret;
9384         }
9385         return 0;
9386 }
9387
9388 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9389 {
9390         u64 stripe_size;
9391
9392         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9393                 stripe_size = length;
9394                 stripe_size /= num_stripes;
9395         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9396                 stripe_size = length * 2;
9397                 stripe_size /= num_stripes;
9398         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9399                 stripe_size = length;
9400                 stripe_size /= (num_stripes - 1);
9401         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9402                 stripe_size = length;
9403                 stripe_size /= (num_stripes - 2);
9404         } else {
9405                 stripe_size = length;
9406         }
9407         return stripe_size;
9408 }
9409
9410 /*
9411  * Check the chunk with its block group/dev list ref:
9412  * Return 0 if all refs seems valid.
9413  * Return 1 if part of refs seems valid, need later check for rebuild ref
9414  * like missing block group and needs to search extent tree to rebuild them.
9415  * Return -1 if essential refs are missing and unable to rebuild.
9416  */
9417 static int check_chunk_refs(struct chunk_record *chunk_rec,
9418                             struct block_group_tree *block_group_cache,
9419                             struct device_extent_tree *dev_extent_cache,
9420                             int silent)
9421 {
9422         struct cache_extent *block_group_item;
9423         struct block_group_record *block_group_rec;
9424         struct cache_extent *dev_extent_item;
9425         struct device_extent_record *dev_extent_rec;
9426         u64 devid;
9427         u64 offset;
9428         u64 length;
9429         int metadump_v2 = 0;
9430         int i;
9431         int ret = 0;
9432
9433         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9434                                                chunk_rec->offset,
9435                                                chunk_rec->length);
9436         if (block_group_item) {
9437                 block_group_rec = container_of(block_group_item,
9438                                                struct block_group_record,
9439                                                cache);
9440                 if (chunk_rec->length != block_group_rec->offset ||
9441                     chunk_rec->offset != block_group_rec->objectid ||
9442                     (!metadump_v2 &&
9443                      chunk_rec->type_flags != block_group_rec->flags)) {
9444                         if (!silent)
9445                                 fprintf(stderr,
9446                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9447                                         chunk_rec->objectid,
9448                                         chunk_rec->type,
9449                                         chunk_rec->offset,
9450                                         chunk_rec->length,
9451                                         chunk_rec->offset,
9452                                         chunk_rec->type_flags,
9453                                         block_group_rec->objectid,
9454                                         block_group_rec->type,
9455                                         block_group_rec->offset,
9456                                         block_group_rec->offset,
9457                                         block_group_rec->objectid,
9458                                         block_group_rec->flags);
9459                         ret = -1;
9460                 } else {
9461                         list_del_init(&block_group_rec->list);
9462                         chunk_rec->bg_rec = block_group_rec;
9463                 }
9464         } else {
9465                 if (!silent)
9466                         fprintf(stderr,
9467                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9468                                 chunk_rec->objectid,
9469                                 chunk_rec->type,
9470                                 chunk_rec->offset,
9471                                 chunk_rec->length,
9472                                 chunk_rec->offset,
9473                                 chunk_rec->type_flags);
9474                 ret = 1;
9475         }
9476
9477         if (metadump_v2)
9478                 return ret;
9479
9480         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9481                                     chunk_rec->num_stripes);
9482         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9483                 devid = chunk_rec->stripes[i].devid;
9484                 offset = chunk_rec->stripes[i].offset;
9485                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9486                                                        devid, offset, length);
9487                 if (dev_extent_item) {
9488                         dev_extent_rec = container_of(dev_extent_item,
9489                                                 struct device_extent_record,
9490                                                 cache);
9491                         if (dev_extent_rec->objectid != devid ||
9492                             dev_extent_rec->offset != offset ||
9493                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9494                             dev_extent_rec->length != length) {
9495                                 if (!silent)
9496                                         fprintf(stderr,
9497                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9498                                                 chunk_rec->objectid,
9499                                                 chunk_rec->type,
9500                                                 chunk_rec->offset,
9501                                                 chunk_rec->stripes[i].devid,
9502                                                 chunk_rec->stripes[i].offset,
9503                                                 dev_extent_rec->objectid,
9504                                                 dev_extent_rec->offset,
9505                                                 dev_extent_rec->length);
9506                                 ret = -1;
9507                         } else {
9508                                 list_move(&dev_extent_rec->chunk_list,
9509                                           &chunk_rec->dextents);
9510                         }
9511                 } else {
9512                         if (!silent)
9513                                 fprintf(stderr,
9514                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9515                                         chunk_rec->objectid,
9516                                         chunk_rec->type,
9517                                         chunk_rec->offset,
9518                                         chunk_rec->stripes[i].devid,
9519                                         chunk_rec->stripes[i].offset);
9520                         ret = -1;
9521                 }
9522         }
9523         return ret;
9524 }
9525
9526 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9527 int check_chunks(struct cache_tree *chunk_cache,
9528                  struct block_group_tree *block_group_cache,
9529                  struct device_extent_tree *dev_extent_cache,
9530                  struct list_head *good, struct list_head *bad,
9531                  struct list_head *rebuild, int silent)
9532 {
9533         struct cache_extent *chunk_item;
9534         struct chunk_record *chunk_rec;
9535         struct block_group_record *bg_rec;
9536         struct device_extent_record *dext_rec;
9537         int err;
9538         int ret = 0;
9539
9540         chunk_item = first_cache_extent(chunk_cache);
9541         while (chunk_item) {
9542                 chunk_rec = container_of(chunk_item, struct chunk_record,
9543                                          cache);
9544                 err = check_chunk_refs(chunk_rec, block_group_cache,
9545                                        dev_extent_cache, silent);
9546                 if (err < 0)
9547                         ret = err;
9548                 if (err == 0 && good)
9549                         list_add_tail(&chunk_rec->list, good);
9550                 if (err > 0 && rebuild)
9551                         list_add_tail(&chunk_rec->list, rebuild);
9552                 if (err < 0 && bad)
9553                         list_add_tail(&chunk_rec->list, bad);
9554                 chunk_item = next_cache_extent(chunk_item);
9555         }
9556
9557         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9558                 if (!silent)
9559                         fprintf(stderr,
9560                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9561                                 bg_rec->objectid,
9562                                 bg_rec->offset,
9563                                 bg_rec->flags);
9564                 if (!ret)
9565                         ret = 1;
9566         }
9567
9568         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9569                             chunk_list) {
9570                 if (!silent)
9571                         fprintf(stderr,
9572                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9573                                 dext_rec->objectid,
9574                                 dext_rec->offset,
9575                                 dext_rec->length);
9576                 if (!ret)
9577                         ret = 1;
9578         }
9579         return ret;
9580 }
9581
9582
9583 static int check_device_used(struct device_record *dev_rec,
9584                              struct device_extent_tree *dext_cache)
9585 {
9586         struct cache_extent *cache;
9587         struct device_extent_record *dev_extent_rec;
9588         u64 total_byte = 0;
9589
9590         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9591         while (cache) {
9592                 dev_extent_rec = container_of(cache,
9593                                               struct device_extent_record,
9594                                               cache);
9595                 if (dev_extent_rec->objectid != dev_rec->devid)
9596                         break;
9597
9598                 list_del_init(&dev_extent_rec->device_list);
9599                 total_byte += dev_extent_rec->length;
9600                 cache = next_cache_extent(cache);
9601         }
9602
9603         if (total_byte != dev_rec->byte_used) {
9604                 fprintf(stderr,
9605                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9606                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9607                         dev_rec->type, dev_rec->offset);
9608                 return -1;
9609         } else {
9610                 return 0;
9611         }
9612 }
9613
9614 /* check btrfs_dev_item -> btrfs_dev_extent */
9615 static int check_devices(struct rb_root *dev_cache,
9616                          struct device_extent_tree *dev_extent_cache)
9617 {
9618         struct rb_node *dev_node;
9619         struct device_record *dev_rec;
9620         struct device_extent_record *dext_rec;
9621         int err;
9622         int ret = 0;
9623
9624         dev_node = rb_first(dev_cache);
9625         while (dev_node) {
9626                 dev_rec = container_of(dev_node, struct device_record, node);
9627                 err = check_device_used(dev_rec, dev_extent_cache);
9628                 if (err)
9629                         ret = err;
9630
9631                 dev_node = rb_next(dev_node);
9632         }
9633         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9634                             device_list) {
9635                 fprintf(stderr,
9636                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9637                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9638                 if (!ret)
9639                         ret = 1;
9640         }
9641         return ret;
9642 }
9643
9644 static int add_root_item_to_list(struct list_head *head,
9645                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9646                                   u8 level, u8 drop_level,
9647                                   int level_size, struct btrfs_key *drop_key)
9648 {
9649
9650         struct root_item_record *ri_rec;
9651         ri_rec = malloc(sizeof(*ri_rec));
9652         if (!ri_rec)
9653                 return -ENOMEM;
9654         ri_rec->bytenr = bytenr;
9655         ri_rec->objectid = objectid;
9656         ri_rec->level = level;
9657         ri_rec->level_size = level_size;
9658         ri_rec->drop_level = drop_level;
9659         ri_rec->last_snapshot = last_snapshot;
9660         if (drop_key)
9661                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9662         list_add_tail(&ri_rec->list, head);
9663
9664         return 0;
9665 }
9666
9667 static void free_root_item_list(struct list_head *list)
9668 {
9669         struct root_item_record *ri_rec;
9670
9671         while (!list_empty(list)) {
9672                 ri_rec = list_first_entry(list, struct root_item_record,
9673                                           list);
9674                 list_del_init(&ri_rec->list);
9675                 free(ri_rec);
9676         }
9677 }
9678
9679 static int deal_root_from_list(struct list_head *list,
9680                                struct btrfs_root *root,
9681                                struct block_info *bits,
9682                                int bits_nr,
9683                                struct cache_tree *pending,
9684                                struct cache_tree *seen,
9685                                struct cache_tree *reada,
9686                                struct cache_tree *nodes,
9687                                struct cache_tree *extent_cache,
9688                                struct cache_tree *chunk_cache,
9689                                struct rb_root *dev_cache,
9690                                struct block_group_tree *block_group_cache,
9691                                struct device_extent_tree *dev_extent_cache)
9692 {
9693         int ret = 0;
9694         u64 last;
9695
9696         while (!list_empty(list)) {
9697                 struct root_item_record *rec;
9698                 struct extent_buffer *buf;
9699                 rec = list_entry(list->next,
9700                                  struct root_item_record, list);
9701                 last = 0;
9702                 buf = read_tree_block(root->fs_info->tree_root,
9703                                       rec->bytenr, rec->level_size, 0);
9704                 if (!extent_buffer_uptodate(buf)) {
9705                         free_extent_buffer(buf);
9706                         ret = -EIO;
9707                         break;
9708                 }
9709                 ret = add_root_to_pending(buf, extent_cache, pending,
9710                                     seen, nodes, rec->objectid);
9711                 if (ret < 0)
9712                         break;
9713                 /*
9714                  * To rebuild extent tree, we need deal with snapshot
9715                  * one by one, otherwise we deal with node firstly which
9716                  * can maximize readahead.
9717                  */
9718                 while (1) {
9719                         ret = run_next_block(root, bits, bits_nr, &last,
9720                                              pending, seen, reada, nodes,
9721                                              extent_cache, chunk_cache,
9722                                              dev_cache, block_group_cache,
9723                                              dev_extent_cache, rec);
9724                         if (ret != 0)
9725                                 break;
9726                 }
9727                 free_extent_buffer(buf);
9728                 list_del(&rec->list);
9729                 free(rec);
9730                 if (ret < 0)
9731                         break;
9732         }
9733         while (ret >= 0) {
9734                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9735                                      reada, nodes, extent_cache, chunk_cache,
9736                                      dev_cache, block_group_cache,
9737                                      dev_extent_cache, NULL);
9738                 if (ret != 0) {
9739                         if (ret > 0)
9740                                 ret = 0;
9741                         break;
9742                 }
9743         }
9744         return ret;
9745 }
9746
9747 static int check_chunks_and_extents(struct btrfs_root *root)
9748 {
9749         struct rb_root dev_cache;
9750         struct cache_tree chunk_cache;
9751         struct block_group_tree block_group_cache;
9752         struct device_extent_tree dev_extent_cache;
9753         struct cache_tree extent_cache;
9754         struct cache_tree seen;
9755         struct cache_tree pending;
9756         struct cache_tree reada;
9757         struct cache_tree nodes;
9758         struct extent_io_tree excluded_extents;
9759         struct cache_tree corrupt_blocks;
9760         struct btrfs_path path;
9761         struct btrfs_key key;
9762         struct btrfs_key found_key;
9763         int ret, err = 0;
9764         struct block_info *bits;
9765         int bits_nr;
9766         struct extent_buffer *leaf;
9767         int slot;
9768         struct btrfs_root_item ri;
9769         struct list_head dropping_trees;
9770         struct list_head normal_trees;
9771         struct btrfs_root *root1;
9772         u64 objectid;
9773         u32 level_size;
9774         u8 level;
9775
9776         dev_cache = RB_ROOT;
9777         cache_tree_init(&chunk_cache);
9778         block_group_tree_init(&block_group_cache);
9779         device_extent_tree_init(&dev_extent_cache);
9780
9781         cache_tree_init(&extent_cache);
9782         cache_tree_init(&seen);
9783         cache_tree_init(&pending);
9784         cache_tree_init(&nodes);
9785         cache_tree_init(&reada);
9786         cache_tree_init(&corrupt_blocks);
9787         extent_io_tree_init(&excluded_extents);
9788         INIT_LIST_HEAD(&dropping_trees);
9789         INIT_LIST_HEAD(&normal_trees);
9790
9791         if (repair) {
9792                 root->fs_info->excluded_extents = &excluded_extents;
9793                 root->fs_info->fsck_extent_cache = &extent_cache;
9794                 root->fs_info->free_extent_hook = free_extent_hook;
9795                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9796         }
9797
9798         bits_nr = 1024;
9799         bits = malloc(bits_nr * sizeof(struct block_info));
9800         if (!bits) {
9801                 perror("malloc");
9802                 exit(1);
9803         }
9804
9805         if (ctx.progress_enabled) {
9806                 ctx.tp = TASK_EXTENTS;
9807                 task_start(ctx.info);
9808         }
9809
9810 again:
9811         root1 = root->fs_info->tree_root;
9812         level = btrfs_header_level(root1->node);
9813         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9814                                     root1->node->start, 0, level, 0,
9815                                     root1->nodesize, NULL);
9816         if (ret < 0)
9817                 goto out;
9818         root1 = root->fs_info->chunk_root;
9819         level = btrfs_header_level(root1->node);
9820         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9821                                     root1->node->start, 0, level, 0,
9822                                     root1->nodesize, NULL);
9823         if (ret < 0)
9824                 goto out;
9825         btrfs_init_path(&path);
9826         key.offset = 0;
9827         key.objectid = 0;
9828         key.type = BTRFS_ROOT_ITEM_KEY;
9829         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9830                                         &key, &path, 0, 0);
9831         if (ret < 0)
9832                 goto out;
9833         while(1) {
9834                 leaf = path.nodes[0];
9835                 slot = path.slots[0];
9836                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9837                         ret = btrfs_next_leaf(root, &path);
9838                         if (ret != 0)
9839                                 break;
9840                         leaf = path.nodes[0];
9841                         slot = path.slots[0];
9842                 }
9843                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9844                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9845                         unsigned long offset;
9846                         u64 last_snapshot;
9847
9848                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9849                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9850                         last_snapshot = btrfs_root_last_snapshot(&ri);
9851                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9852                                 level = btrfs_root_level(&ri);
9853                                 level_size = root->nodesize;
9854                                 ret = add_root_item_to_list(&normal_trees,
9855                                                 found_key.objectid,
9856                                                 btrfs_root_bytenr(&ri),
9857                                                 last_snapshot, level,
9858                                                 0, level_size, NULL);
9859                                 if (ret < 0)
9860                                         goto out;
9861                         } else {
9862                                 level = btrfs_root_level(&ri);
9863                                 level_size = root->nodesize;
9864                                 objectid = found_key.objectid;
9865                                 btrfs_disk_key_to_cpu(&found_key,
9866                                                       &ri.drop_progress);
9867                                 ret = add_root_item_to_list(&dropping_trees,
9868                                                 objectid,
9869                                                 btrfs_root_bytenr(&ri),
9870                                                 last_snapshot, level,
9871                                                 ri.drop_level,
9872                                                 level_size, &found_key);
9873                                 if (ret < 0)
9874                                         goto out;
9875                         }
9876                 }
9877                 path.slots[0]++;
9878         }
9879         btrfs_release_path(&path);
9880
9881         /*
9882          * check_block can return -EAGAIN if it fixes something, please keep
9883          * this in mind when dealing with return values from these functions, if
9884          * we get -EAGAIN we want to fall through and restart the loop.
9885          */
9886         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9887                                   &seen, &reada, &nodes, &extent_cache,
9888                                   &chunk_cache, &dev_cache, &block_group_cache,
9889                                   &dev_extent_cache);
9890         if (ret < 0) {
9891                 if (ret == -EAGAIN)
9892                         goto loop;
9893                 goto out;
9894         }
9895         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9896                                   &pending, &seen, &reada, &nodes,
9897                                   &extent_cache, &chunk_cache, &dev_cache,
9898                                   &block_group_cache, &dev_extent_cache);
9899         if (ret < 0) {
9900                 if (ret == -EAGAIN)
9901                         goto loop;
9902                 goto out;
9903         }
9904
9905         ret = check_chunks(&chunk_cache, &block_group_cache,
9906                            &dev_extent_cache, NULL, NULL, NULL, 0);
9907         if (ret) {
9908                 if (ret == -EAGAIN)
9909                         goto loop;
9910                 err = ret;
9911         }
9912
9913         ret = check_extent_refs(root, &extent_cache);
9914         if (ret < 0) {
9915                 if (ret == -EAGAIN)
9916                         goto loop;
9917                 goto out;
9918         }
9919
9920         ret = check_devices(&dev_cache, &dev_extent_cache);
9921         if (ret && err)
9922                 ret = err;
9923
9924 out:
9925         task_stop(ctx.info);
9926         if (repair) {
9927                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9928                 extent_io_tree_cleanup(&excluded_extents);
9929                 root->fs_info->fsck_extent_cache = NULL;
9930                 root->fs_info->free_extent_hook = NULL;
9931                 root->fs_info->corrupt_blocks = NULL;
9932                 root->fs_info->excluded_extents = NULL;
9933         }
9934         free(bits);
9935         free_chunk_cache_tree(&chunk_cache);
9936         free_device_cache_tree(&dev_cache);
9937         free_block_group_tree(&block_group_cache);
9938         free_device_extent_tree(&dev_extent_cache);
9939         free_extent_cache_tree(&seen);
9940         free_extent_cache_tree(&pending);
9941         free_extent_cache_tree(&reada);
9942         free_extent_cache_tree(&nodes);
9943         return ret;
9944 loop:
9945         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9946         free_extent_cache_tree(&seen);
9947         free_extent_cache_tree(&pending);
9948         free_extent_cache_tree(&reada);
9949         free_extent_cache_tree(&nodes);
9950         free_chunk_cache_tree(&chunk_cache);
9951         free_block_group_tree(&block_group_cache);
9952         free_device_cache_tree(&dev_cache);
9953         free_device_extent_tree(&dev_extent_cache);
9954         free_extent_record_cache(root->fs_info, &extent_cache);
9955         free_root_item_list(&normal_trees);
9956         free_root_item_list(&dropping_trees);
9957         extent_io_tree_cleanup(&excluded_extents);
9958         goto again;
9959 }
9960
9961 /*
9962  * Check backrefs of a tree block given by @bytenr or @eb.
9963  *
9964  * @root:       the root containing the @bytenr or @eb
9965  * @eb:         tree block extent buffer, can be NULL
9966  * @bytenr:     bytenr of the tree block to search
9967  * @level:      tree level of the tree block
9968  * @owner:      owner of the tree block
9969  *
9970  * Return >0 for any error found and output error message
9971  * Return 0 for no error found
9972  */
9973 static int check_tree_block_ref(struct btrfs_root *root,
9974                                 struct extent_buffer *eb, u64 bytenr,
9975                                 int level, u64 owner)
9976 {
9977         struct btrfs_key key;
9978         struct btrfs_root *extent_root = root->fs_info->extent_root;
9979         struct btrfs_path path;
9980         struct btrfs_extent_item *ei;
9981         struct btrfs_extent_inline_ref *iref;
9982         struct extent_buffer *leaf;
9983         unsigned long end;
9984         unsigned long ptr;
9985         int slot;
9986         int skinny_level;
9987         int type;
9988         u32 nodesize = root->nodesize;
9989         u32 item_size;
9990         u64 offset;
9991         int tree_reloc_root = 0;
9992         int found_ref = 0;
9993         int err = 0;
9994         int ret;
9995
9996         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9997             btrfs_header_bytenr(root->node) == bytenr)
9998                 tree_reloc_root = 1;
9999
10000         btrfs_init_path(&path);
10001         key.objectid = bytenr;
10002         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10003                 key.type = BTRFS_METADATA_ITEM_KEY;
10004         else
10005                 key.type = BTRFS_EXTENT_ITEM_KEY;
10006         key.offset = (u64)-1;
10007
10008         /* Search for the backref in extent tree */
10009         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10010         if (ret < 0) {
10011                 err |= BACKREF_MISSING;
10012                 goto out;
10013         }
10014         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10015         if (ret) {
10016                 err |= BACKREF_MISSING;
10017                 goto out;
10018         }
10019
10020         leaf = path.nodes[0];
10021         slot = path.slots[0];
10022         btrfs_item_key_to_cpu(leaf, &key, slot);
10023
10024         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10025
10026         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10027                 skinny_level = (int)key.offset;
10028                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10029         } else {
10030                 struct btrfs_tree_block_info *info;
10031
10032                 info = (struct btrfs_tree_block_info *)(ei + 1);
10033                 skinny_level = btrfs_tree_block_level(leaf, info);
10034                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10035         }
10036
10037         if (eb) {
10038                 u64 header_gen;
10039                 u64 extent_gen;
10040
10041                 if (!(btrfs_extent_flags(leaf, ei) &
10042                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10043                         error(
10044                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10045                                 key.objectid, nodesize,
10046                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10047                         err = BACKREF_MISMATCH;
10048                 }
10049                 header_gen = btrfs_header_generation(eb);
10050                 extent_gen = btrfs_extent_generation(leaf, ei);
10051                 if (header_gen != extent_gen) {
10052                         error(
10053         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10054                                 key.objectid, nodesize, header_gen,
10055                                 extent_gen);
10056                         err = BACKREF_MISMATCH;
10057                 }
10058                 if (level != skinny_level) {
10059                         error(
10060                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10061                                 key.objectid, nodesize, level, skinny_level);
10062                         err = BACKREF_MISMATCH;
10063                 }
10064                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10065                         error(
10066                         "extent[%llu %u] is referred by other roots than %llu",
10067                                 key.objectid, nodesize, root->objectid);
10068                         err = BACKREF_MISMATCH;
10069                 }
10070         }
10071
10072         /*
10073          * Iterate the extent/metadata item to find the exact backref
10074          */
10075         item_size = btrfs_item_size_nr(leaf, slot);
10076         ptr = (unsigned long)iref;
10077         end = (unsigned long)ei + item_size;
10078         while (ptr < end) {
10079                 iref = (struct btrfs_extent_inline_ref *)ptr;
10080                 type = btrfs_extent_inline_ref_type(leaf, iref);
10081                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10082
10083                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10084                         (offset == root->objectid || offset == owner)) {
10085                         found_ref = 1;
10086                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10087                         /*
10088                          * Backref of tree reloc root points to itself, no need
10089                          * to check backref any more.
10090                          */
10091                         if (tree_reloc_root)
10092                                 found_ref = 1;
10093                         else
10094                         /* Check if the backref points to valid referencer */
10095                                 found_ref = !check_tree_block_ref(root, NULL,
10096                                                 offset, level + 1, owner);
10097                 }
10098
10099                 if (found_ref)
10100                         break;
10101                 ptr += btrfs_extent_inline_ref_size(type);
10102         }
10103
10104         /*
10105          * Inlined extent item doesn't have what we need, check
10106          * TREE_BLOCK_REF_KEY
10107          */
10108         if (!found_ref) {
10109                 btrfs_release_path(&path);
10110                 key.objectid = bytenr;
10111                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10112                 key.offset = root->objectid;
10113
10114                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10115                 if (!ret)
10116                         found_ref = 1;
10117         }
10118         if (!found_ref)
10119                 err |= BACKREF_MISSING;
10120 out:
10121         btrfs_release_path(&path);
10122         if (eb && (err & BACKREF_MISSING))
10123                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10124                         bytenr, nodesize, owner, level);
10125         return err;
10126 }
10127
10128 /*
10129  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10130  *
10131  * Return >0 any error found and output error message
10132  * Return 0 for no error found
10133  */
10134 static int check_extent_data_item(struct btrfs_root *root,
10135                                   struct extent_buffer *eb, int slot)
10136 {
10137         struct btrfs_file_extent_item *fi;
10138         struct btrfs_path path;
10139         struct btrfs_root *extent_root = root->fs_info->extent_root;
10140         struct btrfs_key fi_key;
10141         struct btrfs_key dbref_key;
10142         struct extent_buffer *leaf;
10143         struct btrfs_extent_item *ei;
10144         struct btrfs_extent_inline_ref *iref;
10145         struct btrfs_extent_data_ref *dref;
10146         u64 owner;
10147         u64 disk_bytenr;
10148         u64 disk_num_bytes;
10149         u64 extent_num_bytes;
10150         u64 extent_flags;
10151         u32 item_size;
10152         unsigned long end;
10153         unsigned long ptr;
10154         int type;
10155         u64 ref_root;
10156         int found_dbackref = 0;
10157         int err = 0;
10158         int ret;
10159
10160         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10161         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10162
10163         /* Nothing to check for hole and inline data extents */
10164         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10165             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10166                 return 0;
10167
10168         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10169         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10170         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10171
10172         /* Check unaligned disk_num_bytes and num_bytes */
10173         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10174                 error(
10175 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10176                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10177                         root->sectorsize);
10178                 err |= BYTES_UNALIGNED;
10179         } else {
10180                 data_bytes_allocated += disk_num_bytes;
10181         }
10182         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10183                 error(
10184 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10185                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10186                         root->sectorsize);
10187                 err |= BYTES_UNALIGNED;
10188         } else {
10189                 data_bytes_referenced += extent_num_bytes;
10190         }
10191         owner = btrfs_header_owner(eb);
10192
10193         /* Check the extent item of the file extent in extent tree */
10194         btrfs_init_path(&path);
10195         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10196         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10197         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10198
10199         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10200         if (ret) {
10201                 err |= BACKREF_MISSING;
10202                 goto error;
10203         }
10204
10205         leaf = path.nodes[0];
10206         slot = path.slots[0];
10207         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10208
10209         extent_flags = btrfs_extent_flags(leaf, ei);
10210
10211         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10212                 error(
10213                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10214                     disk_bytenr, disk_num_bytes,
10215                     BTRFS_EXTENT_FLAG_DATA);
10216                 err |= BACKREF_MISMATCH;
10217         }
10218
10219         /* Check data backref inside that extent item */
10220         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10221         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10222         ptr = (unsigned long)iref;
10223         end = (unsigned long)ei + item_size;
10224         while (ptr < end) {
10225                 iref = (struct btrfs_extent_inline_ref *)ptr;
10226                 type = btrfs_extent_inline_ref_type(leaf, iref);
10227                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10228
10229                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10230                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10231                         if (ref_root == owner || ref_root == root->objectid)
10232                                 found_dbackref = 1;
10233                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10234                         found_dbackref = !check_tree_block_ref(root, NULL,
10235                                 btrfs_extent_inline_ref_offset(leaf, iref),
10236                                 0, owner);
10237                 }
10238
10239                 if (found_dbackref)
10240                         break;
10241                 ptr += btrfs_extent_inline_ref_size(type);
10242         }
10243
10244         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10245         if (!found_dbackref) {
10246                 btrfs_release_path(&path);
10247
10248                 btrfs_init_path(&path);
10249                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10250                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10251                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10252                                 fi_key.objectid, fi_key.offset);
10253
10254                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10255                                         &dbref_key, &path, 0, 0);
10256                 if (!ret)
10257                         found_dbackref = 1;
10258         }
10259
10260         if (!found_dbackref)
10261                 err |= BACKREF_MISSING;
10262 error:
10263         btrfs_release_path(&path);
10264         if (err & BACKREF_MISSING) {
10265                 error("data extent[%llu %llu] backref lost",
10266                       disk_bytenr, disk_num_bytes);
10267         }
10268         return err;
10269 }
10270
10271 /*
10272  * Get real tree block level for the case like shared block
10273  * Return >= 0 as tree level
10274  * Return <0 for error
10275  */
10276 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10277 {
10278         struct extent_buffer *eb;
10279         struct btrfs_path path;
10280         struct btrfs_key key;
10281         struct btrfs_extent_item *ei;
10282         u64 flags;
10283         u64 transid;
10284         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10285         u8 backref_level;
10286         u8 header_level;
10287         int ret;
10288
10289         /* Search extent tree for extent generation and level */
10290         key.objectid = bytenr;
10291         key.type = BTRFS_METADATA_ITEM_KEY;
10292         key.offset = (u64)-1;
10293
10294         btrfs_init_path(&path);
10295         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10296         if (ret < 0)
10297                 goto release_out;
10298         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10299         if (ret < 0)
10300                 goto release_out;
10301         if (ret > 0) {
10302                 ret = -ENOENT;
10303                 goto release_out;
10304         }
10305
10306         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10307         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10308                             struct btrfs_extent_item);
10309         flags = btrfs_extent_flags(path.nodes[0], ei);
10310         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10311                 ret = -ENOENT;
10312                 goto release_out;
10313         }
10314
10315         /* Get transid for later read_tree_block() check */
10316         transid = btrfs_extent_generation(path.nodes[0], ei);
10317
10318         /* Get backref level as one source */
10319         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10320                 backref_level = key.offset;
10321         } else {
10322                 struct btrfs_tree_block_info *info;
10323
10324                 info = (struct btrfs_tree_block_info *)(ei + 1);
10325                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10326         }
10327         btrfs_release_path(&path);
10328
10329         /* Get level from tree block as an alternative source */
10330         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10331         if (!extent_buffer_uptodate(eb)) {
10332                 free_extent_buffer(eb);
10333                 return -EIO;
10334         }
10335         header_level = btrfs_header_level(eb);
10336         free_extent_buffer(eb);
10337
10338         if (header_level != backref_level)
10339                 return -EIO;
10340         return header_level;
10341
10342 release_out:
10343         btrfs_release_path(&path);
10344         return ret;
10345 }
10346
10347 /*
10348  * Check if a tree block backref is valid (points to a valid tree block)
10349  * if level == -1, level will be resolved
10350  * Return >0 for any error found and print error message
10351  */
10352 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10353                                     u64 bytenr, int level)
10354 {
10355         struct btrfs_root *root;
10356         struct btrfs_key key;
10357         struct btrfs_path path;
10358         struct extent_buffer *eb;
10359         struct extent_buffer *node;
10360         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10361         int err = 0;
10362         int ret;
10363
10364         /* Query level for level == -1 special case */
10365         if (level == -1)
10366                 level = query_tree_block_level(fs_info, bytenr);
10367         if (level < 0) {
10368                 err |= REFERENCER_MISSING;
10369                 goto out;
10370         }
10371
10372         key.objectid = root_id;
10373         key.type = BTRFS_ROOT_ITEM_KEY;
10374         key.offset = (u64)-1;
10375
10376         root = btrfs_read_fs_root(fs_info, &key);
10377         if (IS_ERR(root)) {
10378                 err |= REFERENCER_MISSING;
10379                 goto out;
10380         }
10381
10382         /* Read out the tree block to get item/node key */
10383         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10384         if (!extent_buffer_uptodate(eb)) {
10385                 err |= REFERENCER_MISSING;
10386                 free_extent_buffer(eb);
10387                 goto out;
10388         }
10389
10390         /* Empty tree, no need to check key */
10391         if (!btrfs_header_nritems(eb) && !level) {
10392                 free_extent_buffer(eb);
10393                 goto out;
10394         }
10395
10396         if (level)
10397                 btrfs_node_key_to_cpu(eb, &key, 0);
10398         else
10399                 btrfs_item_key_to_cpu(eb, &key, 0);
10400
10401         free_extent_buffer(eb);
10402
10403         btrfs_init_path(&path);
10404         path.lowest_level = level;
10405         /* Search with the first key, to ensure we can reach it */
10406         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10407         if (ret < 0) {
10408                 err |= REFERENCER_MISSING;
10409                 goto release_out;
10410         }
10411
10412         node = path.nodes[level];
10413         if (btrfs_header_bytenr(node) != bytenr) {
10414                 error(
10415         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10416                         bytenr, nodesize, bytenr,
10417                         btrfs_header_bytenr(node));
10418                 err |= REFERENCER_MISMATCH;
10419         }
10420         if (btrfs_header_level(node) != level) {
10421                 error(
10422         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10423                         bytenr, nodesize, level,
10424                         btrfs_header_level(node));
10425                 err |= REFERENCER_MISMATCH;
10426         }
10427
10428 release_out:
10429         btrfs_release_path(&path);
10430 out:
10431         if (err & REFERENCER_MISSING) {
10432                 if (level < 0)
10433                         error("extent [%llu %d] lost referencer (owner: %llu)",
10434                                 bytenr, nodesize, root_id);
10435                 else
10436                         error(
10437                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10438                                 bytenr, nodesize, root_id, level);
10439         }
10440
10441         return err;
10442 }
10443
10444 /*
10445  * Check if tree block @eb is tree reloc root.
10446  * Return 0 if it's not or any problem happens
10447  * Return 1 if it's a tree reloc root
10448  */
10449 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10450                                  struct extent_buffer *eb)
10451 {
10452         struct btrfs_root *tree_reloc_root;
10453         struct btrfs_key key;
10454         u64 bytenr = btrfs_header_bytenr(eb);
10455         u64 owner = btrfs_header_owner(eb);
10456         int ret = 0;
10457
10458         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10459         key.offset = owner;
10460         key.type = BTRFS_ROOT_ITEM_KEY;
10461
10462         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10463         if (IS_ERR(tree_reloc_root))
10464                 return 0;
10465
10466         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10467                 ret = 1;
10468         btrfs_free_fs_root(tree_reloc_root);
10469         return ret;
10470 }
10471
10472 /*
10473  * Check referencer for shared block backref
10474  * If level == -1, this function will resolve the level.
10475  */
10476 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10477                                      u64 parent, u64 bytenr, int level)
10478 {
10479         struct extent_buffer *eb;
10480         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10481         u32 nr;
10482         int found_parent = 0;
10483         int i;
10484
10485         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10486         if (!extent_buffer_uptodate(eb))
10487                 goto out;
10488
10489         if (level == -1)
10490                 level = query_tree_block_level(fs_info, bytenr);
10491         if (level < 0)
10492                 goto out;
10493
10494         /* It's possible it's a tree reloc root */
10495         if (parent == bytenr) {
10496                 if (is_tree_reloc_root(fs_info, eb))
10497                         found_parent = 1;
10498                 goto out;
10499         }
10500
10501         if (level + 1 != btrfs_header_level(eb))
10502                 goto out;
10503
10504         nr = btrfs_header_nritems(eb);
10505         for (i = 0; i < nr; i++) {
10506                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10507                         found_parent = 1;
10508                         break;
10509                 }
10510         }
10511 out:
10512         free_extent_buffer(eb);
10513         if (!found_parent) {
10514                 error(
10515         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10516                         bytenr, nodesize, parent, level);
10517                 return REFERENCER_MISSING;
10518         }
10519         return 0;
10520 }
10521
10522 /*
10523  * Check referencer for normal (inlined) data ref
10524  * If len == 0, it will be resolved by searching in extent tree
10525  */
10526 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10527                                      u64 root_id, u64 objectid, u64 offset,
10528                                      u64 bytenr, u64 len, u32 count)
10529 {
10530         struct btrfs_root *root;
10531         struct btrfs_root *extent_root = fs_info->extent_root;
10532         struct btrfs_key key;
10533         struct btrfs_path path;
10534         struct extent_buffer *leaf;
10535         struct btrfs_file_extent_item *fi;
10536         u32 found_count = 0;
10537         int slot;
10538         int ret = 0;
10539
10540         if (!len) {
10541                 key.objectid = bytenr;
10542                 key.type = BTRFS_EXTENT_ITEM_KEY;
10543                 key.offset = (u64)-1;
10544
10545                 btrfs_init_path(&path);
10546                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10547                 if (ret < 0)
10548                         goto out;
10549                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10550                 if (ret)
10551                         goto out;
10552                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10553                 if (key.objectid != bytenr ||
10554                     key.type != BTRFS_EXTENT_ITEM_KEY)
10555                         goto out;
10556                 len = key.offset;
10557                 btrfs_release_path(&path);
10558         }
10559         key.objectid = root_id;
10560         key.type = BTRFS_ROOT_ITEM_KEY;
10561         key.offset = (u64)-1;
10562         btrfs_init_path(&path);
10563
10564         root = btrfs_read_fs_root(fs_info, &key);
10565         if (IS_ERR(root))
10566                 goto out;
10567
10568         key.objectid = objectid;
10569         key.type = BTRFS_EXTENT_DATA_KEY;
10570         /*
10571          * It can be nasty as data backref offset is
10572          * file offset - file extent offset, which is smaller or
10573          * equal to original backref offset.  The only special case is
10574          * overflow.  So we need to special check and do further search.
10575          */
10576         key.offset = offset & (1ULL << 63) ? 0 : offset;
10577
10578         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10579         if (ret < 0)
10580                 goto out;
10581
10582         /*
10583          * Search afterwards to get correct one
10584          * NOTE: As we must do a comprehensive check on the data backref to
10585          * make sure the dref count also matches, we must iterate all file
10586          * extents for that inode.
10587          */
10588         while (1) {
10589                 leaf = path.nodes[0];
10590                 slot = path.slots[0];
10591
10592                 btrfs_item_key_to_cpu(leaf, &key, slot);
10593                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10594                         break;
10595                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10596                 /*
10597                  * Except normal disk bytenr and disk num bytes, we still
10598                  * need to do extra check on dbackref offset as
10599                  * dbackref offset = file_offset - file_extent_offset
10600                  */
10601                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10602                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10603                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10604                     offset)
10605                         found_count++;
10606
10607                 ret = btrfs_next_item(root, &path);
10608                 if (ret)
10609                         break;
10610         }
10611 out:
10612         btrfs_release_path(&path);
10613         if (found_count != count) {
10614                 error(
10615 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10616                         bytenr, len, root_id, objectid, offset, count, found_count);
10617                 return REFERENCER_MISSING;
10618         }
10619         return 0;
10620 }
10621
10622 /*
10623  * Check if the referencer of a shared data backref exists
10624  */
10625 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10626                                      u64 parent, u64 bytenr)
10627 {
10628         struct extent_buffer *eb;
10629         struct btrfs_key key;
10630         struct btrfs_file_extent_item *fi;
10631         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10632         u32 nr;
10633         int found_parent = 0;
10634         int i;
10635
10636         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10637         if (!extent_buffer_uptodate(eb))
10638                 goto out;
10639
10640         nr = btrfs_header_nritems(eb);
10641         for (i = 0; i < nr; i++) {
10642                 btrfs_item_key_to_cpu(eb, &key, i);
10643                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10644                         continue;
10645
10646                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10647                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10648                         continue;
10649
10650                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10651                         found_parent = 1;
10652                         break;
10653                 }
10654         }
10655
10656 out:
10657         free_extent_buffer(eb);
10658         if (!found_parent) {
10659                 error("shared extent %llu referencer lost (parent: %llu)",
10660                         bytenr, parent);
10661                 return REFERENCER_MISSING;
10662         }
10663         return 0;
10664 }
10665
10666 /*
10667  * This function will check a given extent item, including its backref and
10668  * itself (like crossing stripe boundary and type)
10669  *
10670  * Since we don't use extent_record anymore, introduce new error bit
10671  */
10672 static int check_extent_item(struct btrfs_fs_info *fs_info,
10673                              struct extent_buffer *eb, int slot)
10674 {
10675         struct btrfs_extent_item *ei;
10676         struct btrfs_extent_inline_ref *iref;
10677         struct btrfs_extent_data_ref *dref;
10678         unsigned long end;
10679         unsigned long ptr;
10680         int type;
10681         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10682         u32 item_size = btrfs_item_size_nr(eb, slot);
10683         u64 flags;
10684         u64 offset;
10685         int metadata = 0;
10686         int level;
10687         struct btrfs_key key;
10688         int ret;
10689         int err = 0;
10690
10691         btrfs_item_key_to_cpu(eb, &key, slot);
10692         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10693                 bytes_used += key.offset;
10694         else
10695                 bytes_used += nodesize;
10696
10697         if (item_size < sizeof(*ei)) {
10698                 /*
10699                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10700                  * old thing when on disk format is still un-determined.
10701                  * No need to care about it anymore
10702                  */
10703                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10704                 return -ENOTTY;
10705         }
10706
10707         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10708         flags = btrfs_extent_flags(eb, ei);
10709
10710         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10711                 metadata = 1;
10712         if (metadata && check_crossing_stripes(global_info, key.objectid,
10713                                                eb->len)) {
10714                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10715                       key.objectid, key.objectid + nodesize);
10716                 err |= CROSSING_STRIPE_BOUNDARY;
10717         }
10718
10719         ptr = (unsigned long)(ei + 1);
10720
10721         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10722                 /* Old EXTENT_ITEM metadata */
10723                 struct btrfs_tree_block_info *info;
10724
10725                 info = (struct btrfs_tree_block_info *)ptr;
10726                 level = btrfs_tree_block_level(eb, info);
10727                 ptr += sizeof(struct btrfs_tree_block_info);
10728         } else {
10729                 /* New METADATA_ITEM */
10730                 level = key.offset;
10731         }
10732         end = (unsigned long)ei + item_size;
10733
10734         if (ptr >= end) {
10735                 err |= ITEM_SIZE_MISMATCH;
10736                 goto out;
10737         }
10738
10739         /* Now check every backref in this extent item */
10740 next:
10741         iref = (struct btrfs_extent_inline_ref *)ptr;
10742         type = btrfs_extent_inline_ref_type(eb, iref);
10743         offset = btrfs_extent_inline_ref_offset(eb, iref);
10744         switch (type) {
10745         case BTRFS_TREE_BLOCK_REF_KEY:
10746                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10747                                                level);
10748                 err |= ret;
10749                 break;
10750         case BTRFS_SHARED_BLOCK_REF_KEY:
10751                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10752                                                  level);
10753                 err |= ret;
10754                 break;
10755         case BTRFS_EXTENT_DATA_REF_KEY:
10756                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10757                 ret = check_extent_data_backref(fs_info,
10758                                 btrfs_extent_data_ref_root(eb, dref),
10759                                 btrfs_extent_data_ref_objectid(eb, dref),
10760                                 btrfs_extent_data_ref_offset(eb, dref),
10761                                 key.objectid, key.offset,
10762                                 btrfs_extent_data_ref_count(eb, dref));
10763                 err |= ret;
10764                 break;
10765         case BTRFS_SHARED_DATA_REF_KEY:
10766                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10767                 err |= ret;
10768                 break;
10769         default:
10770                 error("extent[%llu %d %llu] has unknown ref type: %d",
10771                         key.objectid, key.type, key.offset, type);
10772                 err |= UNKNOWN_TYPE;
10773                 goto out;
10774         }
10775
10776         ptr += btrfs_extent_inline_ref_size(type);
10777         if (ptr < end)
10778                 goto next;
10779
10780 out:
10781         return err;
10782 }
10783
10784 /*
10785  * Check if a dev extent item is referred correctly by its chunk
10786  */
10787 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10788                                  struct extent_buffer *eb, int slot)
10789 {
10790         struct btrfs_root *chunk_root = fs_info->chunk_root;
10791         struct btrfs_dev_extent *ptr;
10792         struct btrfs_path path;
10793         struct btrfs_key chunk_key;
10794         struct btrfs_key devext_key;
10795         struct btrfs_chunk *chunk;
10796         struct extent_buffer *l;
10797         int num_stripes;
10798         u64 length;
10799         int i;
10800         int found_chunk = 0;
10801         int ret;
10802
10803         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10804         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10805         length = btrfs_dev_extent_length(eb, ptr);
10806
10807         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10808         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10809         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10810
10811         btrfs_init_path(&path);
10812         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10813         if (ret)
10814                 goto out;
10815
10816         l = path.nodes[0];
10817         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10818         if (btrfs_chunk_length(l, chunk) != length)
10819                 goto out;
10820
10821         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10822         for (i = 0; i < num_stripes; i++) {
10823                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10824                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10825
10826                 if (devid == devext_key.objectid &&
10827                     offset == devext_key.offset) {
10828                         found_chunk = 1;
10829                         break;
10830                 }
10831         }
10832 out:
10833         btrfs_release_path(&path);
10834         if (!found_chunk) {
10835                 error(
10836                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10837                         devext_key.objectid, devext_key.offset, length);
10838                 return REFERENCER_MISSING;
10839         }
10840         return 0;
10841 }
10842
10843 /*
10844  * Check if the used space is correct with the dev item
10845  */
10846 static int check_dev_item(struct btrfs_fs_info *fs_info,
10847                           struct extent_buffer *eb, int slot)
10848 {
10849         struct btrfs_root *dev_root = fs_info->dev_root;
10850         struct btrfs_dev_item *dev_item;
10851         struct btrfs_path path;
10852         struct btrfs_key key;
10853         struct btrfs_dev_extent *ptr;
10854         u64 dev_id;
10855         u64 used;
10856         u64 total = 0;
10857         int ret;
10858
10859         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10860         dev_id = btrfs_device_id(eb, dev_item);
10861         used = btrfs_device_bytes_used(eb, dev_item);
10862
10863         key.objectid = dev_id;
10864         key.type = BTRFS_DEV_EXTENT_KEY;
10865         key.offset = 0;
10866
10867         btrfs_init_path(&path);
10868         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10869         if (ret < 0) {
10870                 btrfs_item_key_to_cpu(eb, &key, slot);
10871                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10872                         key.objectid, key.type, key.offset);
10873                 btrfs_release_path(&path);
10874                 return REFERENCER_MISSING;
10875         }
10876
10877         /* Iterate dev_extents to calculate the used space of a device */
10878         while (1) {
10879                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10880
10881                 if (key.objectid > dev_id)
10882                         break;
10883                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10884                         goto next;
10885
10886                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10887                                      struct btrfs_dev_extent);
10888                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10889 next:
10890                 ret = btrfs_next_item(dev_root, &path);
10891                 if (ret)
10892                         break;
10893         }
10894         btrfs_release_path(&path);
10895
10896         if (used != total) {
10897                 btrfs_item_key_to_cpu(eb, &key, slot);
10898                 error(
10899 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10900                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10901                         BTRFS_DEV_EXTENT_KEY, dev_id);
10902                 return ACCOUNTING_MISMATCH;
10903         }
10904         return 0;
10905 }
10906
10907 /*
10908  * Check a block group item with its referener (chunk) and its used space
10909  * with extent/metadata item
10910  */
10911 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10912                                   struct extent_buffer *eb, int slot)
10913 {
10914         struct btrfs_root *extent_root = fs_info->extent_root;
10915         struct btrfs_root *chunk_root = fs_info->chunk_root;
10916         struct btrfs_block_group_item *bi;
10917         struct btrfs_block_group_item bg_item;
10918         struct btrfs_path path;
10919         struct btrfs_key bg_key;
10920         struct btrfs_key chunk_key;
10921         struct btrfs_key extent_key;
10922         struct btrfs_chunk *chunk;
10923         struct extent_buffer *leaf;
10924         struct btrfs_extent_item *ei;
10925         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10926         u64 flags;
10927         u64 bg_flags;
10928         u64 used;
10929         u64 total = 0;
10930         int ret;
10931         int err = 0;
10932
10933         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10934         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10935         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10936         used = btrfs_block_group_used(&bg_item);
10937         bg_flags = btrfs_block_group_flags(&bg_item);
10938
10939         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10940         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10941         chunk_key.offset = bg_key.objectid;
10942
10943         btrfs_init_path(&path);
10944         /* Search for the referencer chunk */
10945         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10946         if (ret) {
10947                 error(
10948                 "block group[%llu %llu] did not find the related chunk item",
10949                         bg_key.objectid, bg_key.offset);
10950                 err |= REFERENCER_MISSING;
10951         } else {
10952                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10953                                         struct btrfs_chunk);
10954                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10955                                                 bg_key.offset) {
10956                         error(
10957         "block group[%llu %llu] related chunk item length does not match",
10958                                 bg_key.objectid, bg_key.offset);
10959                         err |= REFERENCER_MISMATCH;
10960                 }
10961         }
10962         btrfs_release_path(&path);
10963
10964         /* Search from the block group bytenr */
10965         extent_key.objectid = bg_key.objectid;
10966         extent_key.type = 0;
10967         extent_key.offset = 0;
10968
10969         btrfs_init_path(&path);
10970         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10971         if (ret < 0)
10972                 goto out;
10973
10974         /* Iterate extent tree to account used space */
10975         while (1) {
10976                 leaf = path.nodes[0];
10977                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10978                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10979                         break;
10980
10981                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10982                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10983                         goto next;
10984                 if (extent_key.objectid < bg_key.objectid)
10985                         goto next;
10986
10987                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10988                         total += nodesize;
10989                 else
10990                         total += extent_key.offset;
10991
10992                 ei = btrfs_item_ptr(leaf, path.slots[0],
10993                                     struct btrfs_extent_item);
10994                 flags = btrfs_extent_flags(leaf, ei);
10995                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10996                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10997                                 error(
10998                         "bad extent[%llu, %llu) type mismatch with chunk",
10999                                         extent_key.objectid,
11000                                         extent_key.objectid + extent_key.offset);
11001                                 err |= CHUNK_TYPE_MISMATCH;
11002                         }
11003                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11004                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11005                                     BTRFS_BLOCK_GROUP_METADATA))) {
11006                                 error(
11007                         "bad extent[%llu, %llu) type mismatch with chunk",
11008                                         extent_key.objectid,
11009                                         extent_key.objectid + nodesize);
11010                                 err |= CHUNK_TYPE_MISMATCH;
11011                         }
11012                 }
11013 next:
11014                 ret = btrfs_next_item(extent_root, &path);
11015                 if (ret)
11016                         break;
11017         }
11018
11019 out:
11020         btrfs_release_path(&path);
11021
11022         if (total != used) {
11023                 error(
11024                 "block group[%llu %llu] used %llu but extent items used %llu",
11025                         bg_key.objectid, bg_key.offset, used, total);
11026                 err |= ACCOUNTING_MISMATCH;
11027         }
11028         return err;
11029 }
11030
11031 /*
11032  * Check a chunk item.
11033  * Including checking all referred dev_extents and block group
11034  */
11035 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11036                             struct extent_buffer *eb, int slot)
11037 {
11038         struct btrfs_root *extent_root = fs_info->extent_root;
11039         struct btrfs_root *dev_root = fs_info->dev_root;
11040         struct btrfs_path path;
11041         struct btrfs_key chunk_key;
11042         struct btrfs_key bg_key;
11043         struct btrfs_key devext_key;
11044         struct btrfs_chunk *chunk;
11045         struct extent_buffer *leaf;
11046         struct btrfs_block_group_item *bi;
11047         struct btrfs_block_group_item bg_item;
11048         struct btrfs_dev_extent *ptr;
11049         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11050         u64 length;
11051         u64 chunk_end;
11052         u64 type;
11053         u64 profile;
11054         int num_stripes;
11055         u64 offset;
11056         u64 objectid;
11057         int i;
11058         int ret;
11059         int err = 0;
11060
11061         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11062         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11063         length = btrfs_chunk_length(eb, chunk);
11064         chunk_end = chunk_key.offset + length;
11065         if (!IS_ALIGNED(length, sectorsize)) {
11066                 error("chunk[%llu %llu) not aligned to %u",
11067                         chunk_key.offset, chunk_end, sectorsize);
11068                 err |= BYTES_UNALIGNED;
11069                 goto out;
11070         }
11071
11072         type = btrfs_chunk_type(eb, chunk);
11073         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11074         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11075                 error("chunk[%llu %llu) has no chunk type",
11076                         chunk_key.offset, chunk_end);
11077                 err |= UNKNOWN_TYPE;
11078         }
11079         if (profile && (profile & (profile - 1))) {
11080                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11081                         chunk_key.offset, chunk_end, profile);
11082                 err |= UNKNOWN_TYPE;
11083         }
11084
11085         bg_key.objectid = chunk_key.offset;
11086         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11087         bg_key.offset = length;
11088
11089         btrfs_init_path(&path);
11090         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11091         if (ret) {
11092                 error(
11093                 "chunk[%llu %llu) did not find the related block group item",
11094                         chunk_key.offset, chunk_end);
11095                 err |= REFERENCER_MISSING;
11096         } else{
11097                 leaf = path.nodes[0];
11098                 bi = btrfs_item_ptr(leaf, path.slots[0],
11099                                     struct btrfs_block_group_item);
11100                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11101                                    sizeof(bg_item));
11102                 if (btrfs_block_group_flags(&bg_item) != type) {
11103                         error(
11104 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11105                                 chunk_key.offset, chunk_end, type,
11106                                 btrfs_block_group_flags(&bg_item));
11107                         err |= REFERENCER_MISSING;
11108                 }
11109         }
11110
11111         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11112         for (i = 0; i < num_stripes; i++) {
11113                 btrfs_release_path(&path);
11114                 btrfs_init_path(&path);
11115                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11116                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11117                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11118
11119                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11120                                         0, 0);
11121                 if (ret)
11122                         goto not_match_dev;
11123
11124                 leaf = path.nodes[0];
11125                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11126                                      struct btrfs_dev_extent);
11127                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11128                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11129                 if (objectid != chunk_key.objectid ||
11130                     offset != chunk_key.offset ||
11131                     btrfs_dev_extent_length(leaf, ptr) != length)
11132                         goto not_match_dev;
11133                 continue;
11134 not_match_dev:
11135                 err |= BACKREF_MISSING;
11136                 error(
11137                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11138                         chunk_key.objectid, chunk_end, i);
11139                 continue;
11140         }
11141         btrfs_release_path(&path);
11142 out:
11143         return err;
11144 }
11145
11146 /*
11147  * Main entry function to check known items and update related accounting info
11148  */
11149 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11150 {
11151         struct btrfs_fs_info *fs_info = root->fs_info;
11152         struct btrfs_key key;
11153         int slot = 0;
11154         int type;
11155         struct btrfs_extent_data_ref *dref;
11156         int ret;
11157         int err = 0;
11158
11159 next:
11160         btrfs_item_key_to_cpu(eb, &key, slot);
11161         type = key.type;
11162
11163         switch (type) {
11164         case BTRFS_EXTENT_DATA_KEY:
11165                 ret = check_extent_data_item(root, eb, slot);
11166                 err |= ret;
11167                 break;
11168         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11169                 ret = check_block_group_item(fs_info, eb, slot);
11170                 err |= ret;
11171                 break;
11172         case BTRFS_DEV_ITEM_KEY:
11173                 ret = check_dev_item(fs_info, eb, slot);
11174                 err |= ret;
11175                 break;
11176         case BTRFS_CHUNK_ITEM_KEY:
11177                 ret = check_chunk_item(fs_info, eb, slot);
11178                 err |= ret;
11179                 break;
11180         case BTRFS_DEV_EXTENT_KEY:
11181                 ret = check_dev_extent_item(fs_info, eb, slot);
11182                 err |= ret;
11183                 break;
11184         case BTRFS_EXTENT_ITEM_KEY:
11185         case BTRFS_METADATA_ITEM_KEY:
11186                 ret = check_extent_item(fs_info, eb, slot);
11187                 err |= ret;
11188                 break;
11189         case BTRFS_EXTENT_CSUM_KEY:
11190                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11191                 break;
11192         case BTRFS_TREE_BLOCK_REF_KEY:
11193                 ret = check_tree_block_backref(fs_info, key.offset,
11194                                                key.objectid, -1);
11195                 err |= ret;
11196                 break;
11197         case BTRFS_EXTENT_DATA_REF_KEY:
11198                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11199                 ret = check_extent_data_backref(fs_info,
11200                                 btrfs_extent_data_ref_root(eb, dref),
11201                                 btrfs_extent_data_ref_objectid(eb, dref),
11202                                 btrfs_extent_data_ref_offset(eb, dref),
11203                                 key.objectid, 0,
11204                                 btrfs_extent_data_ref_count(eb, dref));
11205                 err |= ret;
11206                 break;
11207         case BTRFS_SHARED_BLOCK_REF_KEY:
11208                 ret = check_shared_block_backref(fs_info, key.offset,
11209                                                  key.objectid, -1);
11210                 err |= ret;
11211                 break;
11212         case BTRFS_SHARED_DATA_REF_KEY:
11213                 ret = check_shared_data_backref(fs_info, key.offset,
11214                                                 key.objectid);
11215                 err |= ret;
11216                 break;
11217         default:
11218                 break;
11219         }
11220
11221         if (++slot < btrfs_header_nritems(eb))
11222                 goto next;
11223
11224         return err;
11225 }
11226
11227 /*
11228  * Helper function for later fs/subvol tree check.  To determine if a tree
11229  * block should be checked.
11230  * This function will ensure only the direct referencer with lowest rootid to
11231  * check a fs/subvolume tree block.
11232  *
11233  * Backref check at extent tree would detect errors like missing subvolume
11234  * tree, so we can do aggressive check to reduce duplicated checks.
11235  */
11236 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11237 {
11238         struct btrfs_root *extent_root = root->fs_info->extent_root;
11239         struct btrfs_key key;
11240         struct btrfs_path path;
11241         struct extent_buffer *leaf;
11242         int slot;
11243         struct btrfs_extent_item *ei;
11244         unsigned long ptr;
11245         unsigned long end;
11246         int type;
11247         u32 item_size;
11248         u64 offset;
11249         struct btrfs_extent_inline_ref *iref;
11250         int ret;
11251
11252         btrfs_init_path(&path);
11253         key.objectid = btrfs_header_bytenr(eb);
11254         key.type = BTRFS_METADATA_ITEM_KEY;
11255         key.offset = (u64)-1;
11256
11257         /*
11258          * Any failure in backref resolving means we can't determine
11259          * whom the tree block belongs to.
11260          * So in that case, we need to check that tree block
11261          */
11262         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11263         if (ret < 0)
11264                 goto need_check;
11265
11266         ret = btrfs_previous_extent_item(extent_root, &path,
11267                                          btrfs_header_bytenr(eb));
11268         if (ret)
11269                 goto need_check;
11270
11271         leaf = path.nodes[0];
11272         slot = path.slots[0];
11273         btrfs_item_key_to_cpu(leaf, &key, slot);
11274         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11275
11276         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11277                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11278         } else {
11279                 struct btrfs_tree_block_info *info;
11280
11281                 info = (struct btrfs_tree_block_info *)(ei + 1);
11282                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11283         }
11284
11285         item_size = btrfs_item_size_nr(leaf, slot);
11286         ptr = (unsigned long)iref;
11287         end = (unsigned long)ei + item_size;
11288         while (ptr < end) {
11289                 iref = (struct btrfs_extent_inline_ref *)ptr;
11290                 type = btrfs_extent_inline_ref_type(leaf, iref);
11291                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11292
11293                 /*
11294                  * We only check the tree block if current root is
11295                  * the lowest referencer of it.
11296                  */
11297                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11298                     offset < root->objectid) {
11299                         btrfs_release_path(&path);
11300                         return 0;
11301                 }
11302
11303                 ptr += btrfs_extent_inline_ref_size(type);
11304         }
11305         /*
11306          * Normally we should also check keyed tree block ref, but that may be
11307          * very time consuming.  Inlined ref should already make us skip a lot
11308          * of refs now.  So skip search keyed tree block ref.
11309          */
11310
11311 need_check:
11312         btrfs_release_path(&path);
11313         return 1;
11314 }
11315
11316 /*
11317  * Traversal function for tree block. We will do:
11318  * 1) Skip shared fs/subvolume tree blocks
11319  * 2) Update related bytes accounting
11320  * 3) Pre-order traversal
11321  */
11322 static int traverse_tree_block(struct btrfs_root *root,
11323                                 struct extent_buffer *node)
11324 {
11325         struct extent_buffer *eb;
11326         struct btrfs_key key;
11327         struct btrfs_key drop_key;
11328         int level;
11329         u64 nr;
11330         int i;
11331         int err = 0;
11332         int ret;
11333
11334         /*
11335          * Skip shared fs/subvolume tree block, in that case they will
11336          * be checked by referencer with lowest rootid
11337          */
11338         if (is_fstree(root->objectid) && !should_check(root, node))
11339                 return 0;
11340
11341         /* Update bytes accounting */
11342         total_btree_bytes += node->len;
11343         if (fs_root_objectid(btrfs_header_owner(node)))
11344                 total_fs_tree_bytes += node->len;
11345         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11346                 total_extent_tree_bytes += node->len;
11347         if (!found_old_backref &&
11348             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11349             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11350             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11351                 found_old_backref = 1;
11352
11353         /* pre-order tranversal, check itself first */
11354         level = btrfs_header_level(node);
11355         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11356                                    btrfs_header_level(node),
11357                                    btrfs_header_owner(node));
11358         err |= ret;
11359         if (err)
11360                 error(
11361         "check %s failed root %llu bytenr %llu level %d, force continue check",
11362                         level ? "node":"leaf", root->objectid,
11363                         btrfs_header_bytenr(node), btrfs_header_level(node));
11364
11365         if (!level) {
11366                 btree_space_waste += btrfs_leaf_free_space(root, node);
11367                 ret = check_leaf_items(root, node);
11368                 err |= ret;
11369                 return err;
11370         }
11371
11372         nr = btrfs_header_nritems(node);
11373         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11374         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11375                 sizeof(struct btrfs_key_ptr);
11376
11377         /* Then check all its children */
11378         for (i = 0; i < nr; i++) {
11379                 u64 blocknr = btrfs_node_blockptr(node, i);
11380
11381                 btrfs_node_key_to_cpu(node, &key, i);
11382                 if (level == root->root_item.drop_level &&
11383                     is_dropped_key(&key, &drop_key))
11384                         continue;
11385
11386                 /*
11387                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11388                  * to call the function itself.
11389                  */
11390                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11391                 if (extent_buffer_uptodate(eb)) {
11392                         ret = traverse_tree_block(root, eb);
11393                         err |= ret;
11394                 }
11395                 free_extent_buffer(eb);
11396         }
11397
11398         return err;
11399 }
11400
11401 /*
11402  * Low memory usage version check_chunks_and_extents.
11403  */
11404 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11405 {
11406         struct btrfs_path path;
11407         struct btrfs_key key;
11408         struct btrfs_root *root1;
11409         struct btrfs_root *cur_root;
11410         int err = 0;
11411         int ret;
11412
11413         root1 = root->fs_info->chunk_root;
11414         ret = traverse_tree_block(root1, root1->node);
11415         err |= ret;
11416
11417         root1 = root->fs_info->tree_root;
11418         ret = traverse_tree_block(root1, root1->node);
11419         err |= ret;
11420
11421         btrfs_init_path(&path);
11422         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11423         key.offset = 0;
11424         key.type = BTRFS_ROOT_ITEM_KEY;
11425
11426         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11427         if (ret) {
11428                 error("cannot find extent treet in tree_root");
11429                 goto out;
11430         }
11431
11432         while (1) {
11433                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11434                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11435                         goto next;
11436                 key.offset = (u64)-1;
11437
11438                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11439                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11440                                         &key);
11441                 else
11442                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11443                 if (IS_ERR(cur_root) || !cur_root) {
11444                         error("failed to read tree: %lld", key.objectid);
11445                         goto next;
11446                 }
11447
11448                 ret = traverse_tree_block(cur_root, cur_root->node);
11449                 err |= ret;
11450
11451                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11452                         btrfs_free_fs_root(cur_root);
11453 next:
11454                 ret = btrfs_next_item(root1, &path);
11455                 if (ret)
11456                         goto out;
11457         }
11458
11459 out:
11460         btrfs_release_path(&path);
11461         return err;
11462 }
11463
11464 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11465                            struct btrfs_root *root, int overwrite)
11466 {
11467         struct extent_buffer *c;
11468         struct extent_buffer *old = root->node;
11469         int level;
11470         int ret;
11471         struct btrfs_disk_key disk_key = {0,0,0};
11472
11473         level = 0;
11474
11475         if (overwrite) {
11476                 c = old;
11477                 extent_buffer_get(c);
11478                 goto init;
11479         }
11480         c = btrfs_alloc_free_block(trans, root,
11481                                    root->nodesize,
11482                                    root->root_key.objectid,
11483                                    &disk_key, level, 0, 0);
11484         if (IS_ERR(c)) {
11485                 c = old;
11486                 extent_buffer_get(c);
11487                 overwrite = 1;
11488         }
11489 init:
11490         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11491         btrfs_set_header_level(c, level);
11492         btrfs_set_header_bytenr(c, c->start);
11493         btrfs_set_header_generation(c, trans->transid);
11494         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11495         btrfs_set_header_owner(c, root->root_key.objectid);
11496
11497         write_extent_buffer(c, root->fs_info->fsid,
11498                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11499
11500         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11501                             btrfs_header_chunk_tree_uuid(c),
11502                             BTRFS_UUID_SIZE);
11503
11504         btrfs_mark_buffer_dirty(c);
11505         /*
11506          * this case can happen in the following case:
11507          *
11508          * 1.overwrite previous root.
11509          *
11510          * 2.reinit reloc data root, this is because we skip pin
11511          * down reloc data tree before which means we can allocate
11512          * same block bytenr here.
11513          */
11514         if (old->start == c->start) {
11515                 btrfs_set_root_generation(&root->root_item,
11516                                           trans->transid);
11517                 root->root_item.level = btrfs_header_level(root->node);
11518                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11519                                         &root->root_key, &root->root_item);
11520                 if (ret) {
11521                         free_extent_buffer(c);
11522                         return ret;
11523                 }
11524         }
11525         free_extent_buffer(old);
11526         root->node = c;
11527         add_root_to_dirty_list(root);
11528         return 0;
11529 }
11530
11531 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11532                                 struct extent_buffer *eb, int tree_root)
11533 {
11534         struct extent_buffer *tmp;
11535         struct btrfs_root_item *ri;
11536         struct btrfs_key key;
11537         u64 bytenr;
11538         u32 nodesize;
11539         int level = btrfs_header_level(eb);
11540         int nritems;
11541         int ret;
11542         int i;
11543
11544         /*
11545          * If we have pinned this block before, don't pin it again.
11546          * This can not only avoid forever loop with broken filesystem
11547          * but also give us some speedups.
11548          */
11549         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11550                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11551                 return 0;
11552
11553         btrfs_pin_extent(fs_info, eb->start, eb->len);
11554
11555         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11556         nritems = btrfs_header_nritems(eb);
11557         for (i = 0; i < nritems; i++) {
11558                 if (level == 0) {
11559                         btrfs_item_key_to_cpu(eb, &key, i);
11560                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11561                                 continue;
11562                         /* Skip the extent root and reloc roots */
11563                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11564                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11565                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11566                                 continue;
11567                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11568                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11569
11570                         /*
11571                          * If at any point we start needing the real root we
11572                          * will have to build a stump root for the root we are
11573                          * in, but for now this doesn't actually use the root so
11574                          * just pass in extent_root.
11575                          */
11576                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11577                                               nodesize, 0);
11578                         if (!extent_buffer_uptodate(tmp)) {
11579                                 fprintf(stderr, "Error reading root block\n");
11580                                 return -EIO;
11581                         }
11582                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11583                         free_extent_buffer(tmp);
11584                         if (ret)
11585                                 return ret;
11586                 } else {
11587                         bytenr = btrfs_node_blockptr(eb, i);
11588
11589                         /* If we aren't the tree root don't read the block */
11590                         if (level == 1 && !tree_root) {
11591                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11592                                 continue;
11593                         }
11594
11595                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11596                                               nodesize, 0);
11597                         if (!extent_buffer_uptodate(tmp)) {
11598                                 fprintf(stderr, "Error reading tree block\n");
11599                                 return -EIO;
11600                         }
11601                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11602                         free_extent_buffer(tmp);
11603                         if (ret)
11604                                 return ret;
11605                 }
11606         }
11607
11608         return 0;
11609 }
11610
11611 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11612 {
11613         int ret;
11614
11615         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11616         if (ret)
11617                 return ret;
11618
11619         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11620 }
11621
11622 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11623 {
11624         struct btrfs_block_group_cache *cache;
11625         struct btrfs_path path;
11626         struct extent_buffer *leaf;
11627         struct btrfs_chunk *chunk;
11628         struct btrfs_key key;
11629         int ret;
11630         u64 start;
11631
11632         btrfs_init_path(&path);
11633         key.objectid = 0;
11634         key.type = BTRFS_CHUNK_ITEM_KEY;
11635         key.offset = 0;
11636         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11637         if (ret < 0) {
11638                 btrfs_release_path(&path);
11639                 return ret;
11640         }
11641
11642         /*
11643          * We do this in case the block groups were screwed up and had alloc
11644          * bits that aren't actually set on the chunks.  This happens with
11645          * restored images every time and could happen in real life I guess.
11646          */
11647         fs_info->avail_data_alloc_bits = 0;
11648         fs_info->avail_metadata_alloc_bits = 0;
11649         fs_info->avail_system_alloc_bits = 0;
11650
11651         /* First we need to create the in-memory block groups */
11652         while (1) {
11653                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11654                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11655                         if (ret < 0) {
11656                                 btrfs_release_path(&path);
11657                                 return ret;
11658                         }
11659                         if (ret) {
11660                                 ret = 0;
11661                                 break;
11662                         }
11663                 }
11664                 leaf = path.nodes[0];
11665                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11666                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11667                         path.slots[0]++;
11668                         continue;
11669                 }
11670
11671                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11672                 btrfs_add_block_group(fs_info, 0,
11673                                       btrfs_chunk_type(leaf, chunk),
11674                                       key.objectid, key.offset,
11675                                       btrfs_chunk_length(leaf, chunk));
11676                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11677                                  key.offset + btrfs_chunk_length(leaf, chunk));
11678                 path.slots[0]++;
11679         }
11680         start = 0;
11681         while (1) {
11682                 cache = btrfs_lookup_first_block_group(fs_info, start);
11683                 if (!cache)
11684                         break;
11685                 cache->cached = 1;
11686                 start = cache->key.objectid + cache->key.offset;
11687         }
11688
11689         btrfs_release_path(&path);
11690         return 0;
11691 }
11692
11693 static int reset_balance(struct btrfs_trans_handle *trans,
11694                          struct btrfs_fs_info *fs_info)
11695 {
11696         struct btrfs_root *root = fs_info->tree_root;
11697         struct btrfs_path path;
11698         struct extent_buffer *leaf;
11699         struct btrfs_key key;
11700         int del_slot, del_nr = 0;
11701         int ret;
11702         int found = 0;
11703
11704         btrfs_init_path(&path);
11705         key.objectid = BTRFS_BALANCE_OBJECTID;
11706         key.type = BTRFS_BALANCE_ITEM_KEY;
11707         key.offset = 0;
11708         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11709         if (ret) {
11710                 if (ret > 0)
11711                         ret = 0;
11712                 if (!ret)
11713                         goto reinit_data_reloc;
11714                 else
11715                         goto out;
11716         }
11717
11718         ret = btrfs_del_item(trans, root, &path);
11719         if (ret)
11720                 goto out;
11721         btrfs_release_path(&path);
11722
11723         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11724         key.type = BTRFS_ROOT_ITEM_KEY;
11725         key.offset = 0;
11726         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11727         if (ret < 0)
11728                 goto out;
11729         while (1) {
11730                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11731                         if (!found)
11732                                 break;
11733
11734                         if (del_nr) {
11735                                 ret = btrfs_del_items(trans, root, &path,
11736                                                       del_slot, del_nr);
11737                                 del_nr = 0;
11738                                 if (ret)
11739                                         goto out;
11740                         }
11741                         key.offset++;
11742                         btrfs_release_path(&path);
11743
11744                         found = 0;
11745                         ret = btrfs_search_slot(trans, root, &key, &path,
11746                                                 -1, 1);
11747                         if (ret < 0)
11748                                 goto out;
11749                         continue;
11750                 }
11751                 found = 1;
11752                 leaf = path.nodes[0];
11753                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11754                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11755                         break;
11756                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11757                         path.slots[0]++;
11758                         continue;
11759                 }
11760                 if (!del_nr) {
11761                         del_slot = path.slots[0];
11762                         del_nr = 1;
11763                 } else {
11764                         del_nr++;
11765                 }
11766                 path.slots[0]++;
11767         }
11768
11769         if (del_nr) {
11770                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11771                 if (ret)
11772                         goto out;
11773         }
11774         btrfs_release_path(&path);
11775
11776 reinit_data_reloc:
11777         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11778         key.type = BTRFS_ROOT_ITEM_KEY;
11779         key.offset = (u64)-1;
11780         root = btrfs_read_fs_root(fs_info, &key);
11781         if (IS_ERR(root)) {
11782                 fprintf(stderr, "Error reading data reloc tree\n");
11783                 ret = PTR_ERR(root);
11784                 goto out;
11785         }
11786         record_root_in_trans(trans, root);
11787         ret = btrfs_fsck_reinit_root(trans, root, 0);
11788         if (ret)
11789                 goto out;
11790         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11791 out:
11792         btrfs_release_path(&path);
11793         return ret;
11794 }
11795
11796 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11797                               struct btrfs_fs_info *fs_info)
11798 {
11799         u64 start = 0;
11800         int ret;
11801
11802         /*
11803          * The only reason we don't do this is because right now we're just
11804          * walking the trees we find and pinning down their bytes, we don't look
11805          * at any of the leaves.  In order to do mixed groups we'd have to check
11806          * the leaves of any fs roots and pin down the bytes for any file
11807          * extents we find.  Not hard but why do it if we don't have to?
11808          */
11809         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11810                 fprintf(stderr, "We don't support re-initing the extent tree "
11811                         "for mixed block groups yet, please notify a btrfs "
11812                         "developer you want to do this so they can add this "
11813                         "functionality.\n");
11814                 return -EINVAL;
11815         }
11816
11817         /*
11818          * first we need to walk all of the trees except the extent tree and pin
11819          * down the bytes that are in use so we don't overwrite any existing
11820          * metadata.
11821          */
11822         ret = pin_metadata_blocks(fs_info);
11823         if (ret) {
11824                 fprintf(stderr, "error pinning down used bytes\n");
11825                 return ret;
11826         }
11827
11828         /*
11829          * Need to drop all the block groups since we're going to recreate all
11830          * of them again.
11831          */
11832         btrfs_free_block_groups(fs_info);
11833         ret = reset_block_groups(fs_info);
11834         if (ret) {
11835                 fprintf(stderr, "error resetting the block groups\n");
11836                 return ret;
11837         }
11838
11839         /* Ok we can allocate now, reinit the extent root */
11840         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11841         if (ret) {
11842                 fprintf(stderr, "extent root initialization failed\n");
11843                 /*
11844                  * When the transaction code is updated we should end the
11845                  * transaction, but for now progs only knows about commit so
11846                  * just return an error.
11847                  */
11848                 return ret;
11849         }
11850
11851         /*
11852          * Now we have all the in-memory block groups setup so we can make
11853          * allocations properly, and the metadata we care about is safe since we
11854          * pinned all of it above.
11855          */
11856         while (1) {
11857                 struct btrfs_block_group_cache *cache;
11858
11859                 cache = btrfs_lookup_first_block_group(fs_info, start);
11860                 if (!cache)
11861                         break;
11862                 start = cache->key.objectid + cache->key.offset;
11863                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11864                                         &cache->key, &cache->item,
11865                                         sizeof(cache->item));
11866                 if (ret) {
11867                         fprintf(stderr, "Error adding block group\n");
11868                         return ret;
11869                 }
11870                 btrfs_extent_post_op(trans, fs_info->extent_root);
11871         }
11872
11873         ret = reset_balance(trans, fs_info);
11874         if (ret)
11875                 fprintf(stderr, "error resetting the pending balance\n");
11876
11877         return ret;
11878 }
11879
11880 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11881 {
11882         struct btrfs_path path;
11883         struct btrfs_trans_handle *trans;
11884         struct btrfs_key key;
11885         int ret;
11886
11887         printf("Recowing metadata block %llu\n", eb->start);
11888         key.objectid = btrfs_header_owner(eb);
11889         key.type = BTRFS_ROOT_ITEM_KEY;
11890         key.offset = (u64)-1;
11891
11892         root = btrfs_read_fs_root(root->fs_info, &key);
11893         if (IS_ERR(root)) {
11894                 fprintf(stderr, "Couldn't find owner root %llu\n",
11895                         key.objectid);
11896                 return PTR_ERR(root);
11897         }
11898
11899         trans = btrfs_start_transaction(root, 1);
11900         if (IS_ERR(trans))
11901                 return PTR_ERR(trans);
11902
11903         btrfs_init_path(&path);
11904         path.lowest_level = btrfs_header_level(eb);
11905         if (path.lowest_level)
11906                 btrfs_node_key_to_cpu(eb, &key, 0);
11907         else
11908                 btrfs_item_key_to_cpu(eb, &key, 0);
11909
11910         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11911         btrfs_commit_transaction(trans, root);
11912         btrfs_release_path(&path);
11913         return ret;
11914 }
11915
11916 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11917 {
11918         struct btrfs_path path;
11919         struct btrfs_trans_handle *trans;
11920         struct btrfs_key key;
11921         int ret;
11922
11923         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11924                bad->key.type, bad->key.offset);
11925         key.objectid = bad->root_id;
11926         key.type = BTRFS_ROOT_ITEM_KEY;
11927         key.offset = (u64)-1;
11928
11929         root = btrfs_read_fs_root(root->fs_info, &key);
11930         if (IS_ERR(root)) {
11931                 fprintf(stderr, "Couldn't find owner root %llu\n",
11932                         key.objectid);
11933                 return PTR_ERR(root);
11934         }
11935
11936         trans = btrfs_start_transaction(root, 1);
11937         if (IS_ERR(trans))
11938                 return PTR_ERR(trans);
11939
11940         btrfs_init_path(&path);
11941         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11942         if (ret) {
11943                 if (ret > 0)
11944                         ret = 0;
11945                 goto out;
11946         }
11947         ret = btrfs_del_item(trans, root, &path);
11948 out:
11949         btrfs_commit_transaction(trans, root);
11950         btrfs_release_path(&path);
11951         return ret;
11952 }
11953
11954 static int zero_log_tree(struct btrfs_root *root)
11955 {
11956         struct btrfs_trans_handle *trans;
11957         int ret;
11958
11959         trans = btrfs_start_transaction(root, 1);
11960         if (IS_ERR(trans)) {
11961                 ret = PTR_ERR(trans);
11962                 return ret;
11963         }
11964         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11965         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11966         ret = btrfs_commit_transaction(trans, root);
11967         return ret;
11968 }
11969
11970 static int populate_csum(struct btrfs_trans_handle *trans,
11971                          struct btrfs_root *csum_root, char *buf, u64 start,
11972                          u64 len)
11973 {
11974         u64 offset = 0;
11975         u64 sectorsize;
11976         int ret = 0;
11977
11978         while (offset < len) {
11979                 sectorsize = csum_root->sectorsize;
11980                 ret = read_extent_data(csum_root, buf, start + offset,
11981                                        &sectorsize, 0);
11982                 if (ret)
11983                         break;
11984                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11985                                             start + offset, buf, sectorsize);
11986                 if (ret)
11987                         break;
11988                 offset += sectorsize;
11989         }
11990         return ret;
11991 }
11992
11993 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11994                                       struct btrfs_root *csum_root,
11995                                       struct btrfs_root *cur_root)
11996 {
11997         struct btrfs_path path;
11998         struct btrfs_key key;
11999         struct extent_buffer *node;
12000         struct btrfs_file_extent_item *fi;
12001         char *buf = NULL;
12002         u64 start = 0;
12003         u64 len = 0;
12004         int slot = 0;
12005         int ret = 0;
12006
12007         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12008         if (!buf)
12009                 return -ENOMEM;
12010
12011         btrfs_init_path(&path);
12012         key.objectid = 0;
12013         key.offset = 0;
12014         key.type = 0;
12015         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12016         if (ret < 0)
12017                 goto out;
12018         /* Iterate all regular file extents and fill its csum */
12019         while (1) {
12020                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12021
12022                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12023                         goto next;
12024                 node = path.nodes[0];
12025                 slot = path.slots[0];
12026                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12027                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12028                         goto next;
12029                 start = btrfs_file_extent_disk_bytenr(node, fi);
12030                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12031
12032                 ret = populate_csum(trans, csum_root, buf, start, len);
12033                 if (ret == -EEXIST)
12034                         ret = 0;
12035                 if (ret < 0)
12036                         goto out;
12037 next:
12038                 /*
12039                  * TODO: if next leaf is corrupted, jump to nearest next valid
12040                  * leaf.
12041                  */
12042                 ret = btrfs_next_item(cur_root, &path);
12043                 if (ret < 0)
12044                         goto out;
12045                 if (ret > 0) {
12046                         ret = 0;
12047                         goto out;
12048                 }
12049         }
12050
12051 out:
12052         btrfs_release_path(&path);
12053         free(buf);
12054         return ret;
12055 }
12056
12057 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12058                                   struct btrfs_root *csum_root)
12059 {
12060         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12061         struct btrfs_path path;
12062         struct btrfs_root *tree_root = fs_info->tree_root;
12063         struct btrfs_root *cur_root;
12064         struct extent_buffer *node;
12065         struct btrfs_key key;
12066         int slot = 0;
12067         int ret = 0;
12068
12069         btrfs_init_path(&path);
12070         key.objectid = BTRFS_FS_TREE_OBJECTID;
12071         key.offset = 0;
12072         key.type = BTRFS_ROOT_ITEM_KEY;
12073         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12074         if (ret < 0)
12075                 goto out;
12076         if (ret > 0) {
12077                 ret = -ENOENT;
12078                 goto out;
12079         }
12080
12081         while (1) {
12082                 node = path.nodes[0];
12083                 slot = path.slots[0];
12084                 btrfs_item_key_to_cpu(node, &key, slot);
12085                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12086                         goto out;
12087                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12088                         goto next;
12089                 if (!is_fstree(key.objectid))
12090                         goto next;
12091                 key.offset = (u64)-1;
12092
12093                 cur_root = btrfs_read_fs_root(fs_info, &key);
12094                 if (IS_ERR(cur_root) || !cur_root) {
12095                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12096                                 key.objectid);
12097                         goto out;
12098                 }
12099                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12100                                 cur_root);
12101                 if (ret < 0)
12102                         goto out;
12103 next:
12104                 ret = btrfs_next_item(tree_root, &path);
12105                 if (ret > 0) {
12106                         ret = 0;
12107                         goto out;
12108                 }
12109                 if (ret < 0)
12110                         goto out;
12111         }
12112
12113 out:
12114         btrfs_release_path(&path);
12115         return ret;
12116 }
12117
12118 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12119                                       struct btrfs_root *csum_root)
12120 {
12121         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12122         struct btrfs_path path;
12123         struct btrfs_extent_item *ei;
12124         struct extent_buffer *leaf;
12125         char *buf;
12126         struct btrfs_key key;
12127         int ret;
12128
12129         btrfs_init_path(&path);
12130         key.objectid = 0;
12131         key.type = BTRFS_EXTENT_ITEM_KEY;
12132         key.offset = 0;
12133         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12134         if (ret < 0) {
12135                 btrfs_release_path(&path);
12136                 return ret;
12137         }
12138
12139         buf = malloc(csum_root->sectorsize);
12140         if (!buf) {
12141                 btrfs_release_path(&path);
12142                 return -ENOMEM;
12143         }
12144
12145         while (1) {
12146                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12147                         ret = btrfs_next_leaf(extent_root, &path);
12148                         if (ret < 0)
12149                                 break;
12150                         if (ret) {
12151                                 ret = 0;
12152                                 break;
12153                         }
12154                 }
12155                 leaf = path.nodes[0];
12156
12157                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12158                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12159                         path.slots[0]++;
12160                         continue;
12161                 }
12162
12163                 ei = btrfs_item_ptr(leaf, path.slots[0],
12164                                     struct btrfs_extent_item);
12165                 if (!(btrfs_extent_flags(leaf, ei) &
12166                       BTRFS_EXTENT_FLAG_DATA)) {
12167                         path.slots[0]++;
12168                         continue;
12169                 }
12170
12171                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12172                                     key.offset);
12173                 if (ret)
12174                         break;
12175                 path.slots[0]++;
12176         }
12177
12178         btrfs_release_path(&path);
12179         free(buf);
12180         return ret;
12181 }
12182
12183 /*
12184  * Recalculate the csum and put it into the csum tree.
12185  *
12186  * Extent tree init will wipe out all the extent info, so in that case, we
12187  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12188  * will use fs/subvol trees to init the csum tree.
12189  */
12190 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12191                           struct btrfs_root *csum_root,
12192                           int search_fs_tree)
12193 {
12194         if (search_fs_tree)
12195                 return fill_csum_tree_from_fs(trans, csum_root);
12196         else
12197                 return fill_csum_tree_from_extent(trans, csum_root);
12198 }
12199
12200 static void free_roots_info_cache(void)
12201 {
12202         if (!roots_info_cache)
12203                 return;
12204
12205         while (!cache_tree_empty(roots_info_cache)) {
12206                 struct cache_extent *entry;
12207                 struct root_item_info *rii;
12208
12209                 entry = first_cache_extent(roots_info_cache);
12210                 if (!entry)
12211                         break;
12212                 remove_cache_extent(roots_info_cache, entry);
12213                 rii = container_of(entry, struct root_item_info, cache_extent);
12214                 free(rii);
12215         }
12216
12217         free(roots_info_cache);
12218         roots_info_cache = NULL;
12219 }
12220
12221 static int build_roots_info_cache(struct btrfs_fs_info *info)
12222 {
12223         int ret = 0;
12224         struct btrfs_key key;
12225         struct extent_buffer *leaf;
12226         struct btrfs_path path;
12227
12228         if (!roots_info_cache) {
12229                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12230                 if (!roots_info_cache)
12231                         return -ENOMEM;
12232                 cache_tree_init(roots_info_cache);
12233         }
12234
12235         btrfs_init_path(&path);
12236         key.objectid = 0;
12237         key.type = BTRFS_EXTENT_ITEM_KEY;
12238         key.offset = 0;
12239         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12240         if (ret < 0)
12241                 goto out;
12242         leaf = path.nodes[0];
12243
12244         while (1) {
12245                 struct btrfs_key found_key;
12246                 struct btrfs_extent_item *ei;
12247                 struct btrfs_extent_inline_ref *iref;
12248                 int slot = path.slots[0];
12249                 int type;
12250                 u64 flags;
12251                 u64 root_id;
12252                 u8 level;
12253                 struct cache_extent *entry;
12254                 struct root_item_info *rii;
12255
12256                 if (slot >= btrfs_header_nritems(leaf)) {
12257                         ret = btrfs_next_leaf(info->extent_root, &path);
12258                         if (ret < 0) {
12259                                 break;
12260                         } else if (ret) {
12261                                 ret = 0;
12262                                 break;
12263                         }
12264                         leaf = path.nodes[0];
12265                         slot = path.slots[0];
12266                 }
12267
12268                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12269
12270                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12271                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12272                         goto next;
12273
12274                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12275                 flags = btrfs_extent_flags(leaf, ei);
12276
12277                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12278                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12279                         goto next;
12280
12281                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12282                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12283                         level = found_key.offset;
12284                 } else {
12285                         struct btrfs_tree_block_info *binfo;
12286
12287                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12288                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12289                         level = btrfs_tree_block_level(leaf, binfo);
12290                 }
12291
12292                 /*
12293                  * For a root extent, it must be of the following type and the
12294                  * first (and only one) iref in the item.
12295                  */
12296                 type = btrfs_extent_inline_ref_type(leaf, iref);
12297                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12298                         goto next;
12299
12300                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12301                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12302                 if (!entry) {
12303                         rii = malloc(sizeof(struct root_item_info));
12304                         if (!rii) {
12305                                 ret = -ENOMEM;
12306                                 goto out;
12307                         }
12308                         rii->cache_extent.start = root_id;
12309                         rii->cache_extent.size = 1;
12310                         rii->level = (u8)-1;
12311                         entry = &rii->cache_extent;
12312                         ret = insert_cache_extent(roots_info_cache, entry);
12313                         ASSERT(ret == 0);
12314                 } else {
12315                         rii = container_of(entry, struct root_item_info,
12316                                            cache_extent);
12317                 }
12318
12319                 ASSERT(rii->cache_extent.start == root_id);
12320                 ASSERT(rii->cache_extent.size == 1);
12321
12322                 if (level > rii->level || rii->level == (u8)-1) {
12323                         rii->level = level;
12324                         rii->bytenr = found_key.objectid;
12325                         rii->gen = btrfs_extent_generation(leaf, ei);
12326                         rii->node_count = 1;
12327                 } else if (level == rii->level) {
12328                         rii->node_count++;
12329                 }
12330 next:
12331                 path.slots[0]++;
12332         }
12333
12334 out:
12335         btrfs_release_path(&path);
12336
12337         return ret;
12338 }
12339
12340 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12341                                   struct btrfs_path *path,
12342                                   const struct btrfs_key *root_key,
12343                                   const int read_only_mode)
12344 {
12345         const u64 root_id = root_key->objectid;
12346         struct cache_extent *entry;
12347         struct root_item_info *rii;
12348         struct btrfs_root_item ri;
12349         unsigned long offset;
12350
12351         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12352         if (!entry) {
12353                 fprintf(stderr,
12354                         "Error: could not find extent items for root %llu\n",
12355                         root_key->objectid);
12356                 return -ENOENT;
12357         }
12358
12359         rii = container_of(entry, struct root_item_info, cache_extent);
12360         ASSERT(rii->cache_extent.start == root_id);
12361         ASSERT(rii->cache_extent.size == 1);
12362
12363         if (rii->node_count != 1) {
12364                 fprintf(stderr,
12365                         "Error: could not find btree root extent for root %llu\n",
12366                         root_id);
12367                 return -ENOENT;
12368         }
12369
12370         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12371         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12372
12373         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12374             btrfs_root_level(&ri) != rii->level ||
12375             btrfs_root_generation(&ri) != rii->gen) {
12376
12377                 /*
12378                  * If we're in repair mode but our caller told us to not update
12379                  * the root item, i.e. just check if it needs to be updated, don't
12380                  * print this message, since the caller will call us again shortly
12381                  * for the same root item without read only mode (the caller will
12382                  * open a transaction first).
12383                  */
12384                 if (!(read_only_mode && repair))
12385                         fprintf(stderr,
12386                                 "%sroot item for root %llu,"
12387                                 " current bytenr %llu, current gen %llu, current level %u,"
12388                                 " new bytenr %llu, new gen %llu, new level %u\n",
12389                                 (read_only_mode ? "" : "fixing "),
12390                                 root_id,
12391                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12392                                 btrfs_root_level(&ri),
12393                                 rii->bytenr, rii->gen, rii->level);
12394
12395                 if (btrfs_root_generation(&ri) > rii->gen) {
12396                         fprintf(stderr,
12397                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12398                                 root_id, btrfs_root_generation(&ri), rii->gen);
12399                         return -EINVAL;
12400                 }
12401
12402                 if (!read_only_mode) {
12403                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12404                         btrfs_set_root_level(&ri, rii->level);
12405                         btrfs_set_root_generation(&ri, rii->gen);
12406                         write_extent_buffer(path->nodes[0], &ri,
12407                                             offset, sizeof(ri));
12408                 }
12409
12410                 return 1;
12411         }
12412
12413         return 0;
12414 }
12415
12416 /*
12417  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12418  * caused read-only snapshots to be corrupted if they were created at a moment
12419  * when the source subvolume/snapshot had orphan items. The issue was that the
12420  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12421  * node instead of the post orphan cleanup root node.
12422  * So this function, and its callees, just detects and fixes those cases. Even
12423  * though the regression was for read-only snapshots, this function applies to
12424  * any snapshot/subvolume root.
12425  * This must be run before any other repair code - not doing it so, makes other
12426  * repair code delete or modify backrefs in the extent tree for example, which
12427  * will result in an inconsistent fs after repairing the root items.
12428  */
12429 static int repair_root_items(struct btrfs_fs_info *info)
12430 {
12431         struct btrfs_path path;
12432         struct btrfs_key key;
12433         struct extent_buffer *leaf;
12434         struct btrfs_trans_handle *trans = NULL;
12435         int ret = 0;
12436         int bad_roots = 0;
12437         int need_trans = 0;
12438
12439         btrfs_init_path(&path);
12440
12441         ret = build_roots_info_cache(info);
12442         if (ret)
12443                 goto out;
12444
12445         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12446         key.type = BTRFS_ROOT_ITEM_KEY;
12447         key.offset = 0;
12448
12449 again:
12450         /*
12451          * Avoid opening and committing transactions if a leaf doesn't have
12452          * any root items that need to be fixed, so that we avoid rotating
12453          * backup roots unnecessarily.
12454          */
12455         if (need_trans) {
12456                 trans = btrfs_start_transaction(info->tree_root, 1);
12457                 if (IS_ERR(trans)) {
12458                         ret = PTR_ERR(trans);
12459                         goto out;
12460                 }
12461         }
12462
12463         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12464                                 0, trans ? 1 : 0);
12465         if (ret < 0)
12466                 goto out;
12467         leaf = path.nodes[0];
12468
12469         while (1) {
12470                 struct btrfs_key found_key;
12471
12472                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12473                         int no_more_keys = find_next_key(&path, &key);
12474
12475                         btrfs_release_path(&path);
12476                         if (trans) {
12477                                 ret = btrfs_commit_transaction(trans,
12478                                                                info->tree_root);
12479                                 trans = NULL;
12480                                 if (ret < 0)
12481                                         goto out;
12482                         }
12483                         need_trans = 0;
12484                         if (no_more_keys)
12485                                 break;
12486                         goto again;
12487                 }
12488
12489                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12490
12491                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12492                         goto next;
12493                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12494                         goto next;
12495
12496                 ret = maybe_repair_root_item(info, &path, &found_key,
12497                                              trans ? 0 : 1);
12498                 if (ret < 0)
12499                         goto out;
12500                 if (ret) {
12501                         if (!trans && repair) {
12502                                 need_trans = 1;
12503                                 key = found_key;
12504                                 btrfs_release_path(&path);
12505                                 goto again;
12506                         }
12507                         bad_roots++;
12508                 }
12509 next:
12510                 path.slots[0]++;
12511         }
12512         ret = 0;
12513 out:
12514         free_roots_info_cache();
12515         btrfs_release_path(&path);
12516         if (trans)
12517                 btrfs_commit_transaction(trans, info->tree_root);
12518         if (ret < 0)
12519                 return ret;
12520
12521         return bad_roots;
12522 }
12523
12524 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12525 {
12526         struct btrfs_trans_handle *trans;
12527         struct btrfs_block_group_cache *bg_cache;
12528         u64 current = 0;
12529         int ret = 0;
12530
12531         /* Clear all free space cache inodes and its extent data */
12532         while (1) {
12533                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12534                 if (!bg_cache)
12535                         break;
12536                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12537                 if (ret < 0)
12538                         return ret;
12539                 current = bg_cache->key.objectid + bg_cache->key.offset;
12540         }
12541
12542         /* Don't forget to set cache_generation to -1 */
12543         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12544         if (IS_ERR(trans)) {
12545                 error("failed to update super block cache generation");
12546                 return PTR_ERR(trans);
12547         }
12548         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12549         btrfs_commit_transaction(trans, fs_info->tree_root);
12550
12551         return ret;
12552 }
12553
12554 const char * const cmd_check_usage[] = {
12555         "btrfs check [options] <device>",
12556         "Check structural integrity of a filesystem (unmounted).",
12557         "Check structural integrity of an unmounted filesystem. Verify internal",
12558         "trees' consistency and item connectivity. In the repair mode try to",
12559         "fix the problems found. ",
12560         "WARNING: the repair mode is considered dangerous",
12561         "",
12562         "-s|--super <superblock>     use this superblock copy",
12563         "-b|--backup                 use the first valid backup root copy",
12564         "--repair                    try to repair the filesystem",
12565         "--readonly                  run in read-only mode (default)",
12566         "--init-csum-tree            create a new CRC tree",
12567         "--init-extent-tree          create a new extent tree",
12568         "--mode <MODE>               allows choice of memory/IO trade-offs",
12569         "                            where MODE is one of:",
12570         "                            original - read inodes and extents to memory (requires",
12571         "                                       more memory, does less IO)",
12572         "                            lowmem   - try to use less memory but read blocks again",
12573         "                                       when needed",
12574         "--check-data-csum           verify checksums of data blocks",
12575         "-Q|--qgroup-report          print a report on qgroup consistency",
12576         "-E|--subvol-extents <subvolid>",
12577         "                            print subvolume extents and sharing state",
12578         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12579         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12580         "-p|--progress               indicate progress",
12581         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12582         NULL
12583 };
12584
12585 int cmd_check(int argc, char **argv)
12586 {
12587         struct cache_tree root_cache;
12588         struct btrfs_root *root;
12589         struct btrfs_fs_info *info;
12590         u64 bytenr = 0;
12591         u64 subvolid = 0;
12592         u64 tree_root_bytenr = 0;
12593         u64 chunk_root_bytenr = 0;
12594         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12595         int ret;
12596         int err = 0;
12597         u64 num;
12598         int init_csum_tree = 0;
12599         int readonly = 0;
12600         int clear_space_cache = 0;
12601         int qgroup_report = 0;
12602         int qgroups_repaired = 0;
12603         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12604
12605         while(1) {
12606                 int c;
12607                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12608                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12609                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12610                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12611                 static const struct option long_options[] = {
12612                         { "super", required_argument, NULL, 's' },
12613                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12614                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12615                         { "init-csum-tree", no_argument, NULL,
12616                                 GETOPT_VAL_INIT_CSUM },
12617                         { "init-extent-tree", no_argument, NULL,
12618                                 GETOPT_VAL_INIT_EXTENT },
12619                         { "check-data-csum", no_argument, NULL,
12620                                 GETOPT_VAL_CHECK_CSUM },
12621                         { "backup", no_argument, NULL, 'b' },
12622                         { "subvol-extents", required_argument, NULL, 'E' },
12623                         { "qgroup-report", no_argument, NULL, 'Q' },
12624                         { "tree-root", required_argument, NULL, 'r' },
12625                         { "chunk-root", required_argument, NULL,
12626                                 GETOPT_VAL_CHUNK_TREE },
12627                         { "progress", no_argument, NULL, 'p' },
12628                         { "mode", required_argument, NULL,
12629                                 GETOPT_VAL_MODE },
12630                         { "clear-space-cache", required_argument, NULL,
12631                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12632                         { NULL, 0, NULL, 0}
12633                 };
12634
12635                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12636                 if (c < 0)
12637                         break;
12638                 switch(c) {
12639                         case 'a': /* ignored */ break;
12640                         case 'b':
12641                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12642                                 break;
12643                         case 's':
12644                                 num = arg_strtou64(optarg);
12645                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12646                                         error(
12647                                         "super mirror should be less than %d",
12648                                                 BTRFS_SUPER_MIRROR_MAX);
12649                                         exit(1);
12650                                 }
12651                                 bytenr = btrfs_sb_offset(((int)num));
12652                                 printf("using SB copy %llu, bytenr %llu\n", num,
12653                                        (unsigned long long)bytenr);
12654                                 break;
12655                         case 'Q':
12656                                 qgroup_report = 1;
12657                                 break;
12658                         case 'E':
12659                                 subvolid = arg_strtou64(optarg);
12660                                 break;
12661                         case 'r':
12662                                 tree_root_bytenr = arg_strtou64(optarg);
12663                                 break;
12664                         case GETOPT_VAL_CHUNK_TREE:
12665                                 chunk_root_bytenr = arg_strtou64(optarg);
12666                                 break;
12667                         case 'p':
12668                                 ctx.progress_enabled = true;
12669                                 break;
12670                         case '?':
12671                         case 'h':
12672                                 usage(cmd_check_usage);
12673                         case GETOPT_VAL_REPAIR:
12674                                 printf("enabling repair mode\n");
12675                                 repair = 1;
12676                                 ctree_flags |= OPEN_CTREE_WRITES;
12677                                 break;
12678                         case GETOPT_VAL_READONLY:
12679                                 readonly = 1;
12680                                 break;
12681                         case GETOPT_VAL_INIT_CSUM:
12682                                 printf("Creating a new CRC tree\n");
12683                                 init_csum_tree = 1;
12684                                 repair = 1;
12685                                 ctree_flags |= OPEN_CTREE_WRITES;
12686                                 break;
12687                         case GETOPT_VAL_INIT_EXTENT:
12688                                 init_extent_tree = 1;
12689                                 ctree_flags |= (OPEN_CTREE_WRITES |
12690                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12691                                 repair = 1;
12692                                 break;
12693                         case GETOPT_VAL_CHECK_CSUM:
12694                                 check_data_csum = 1;
12695                                 break;
12696                         case GETOPT_VAL_MODE:
12697                                 check_mode = parse_check_mode(optarg);
12698                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12699                                         error("unknown mode: %s", optarg);
12700                                         exit(1);
12701                                 }
12702                                 break;
12703                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12704                                 if (strcmp(optarg, "v1") == 0) {
12705                                         clear_space_cache = 1;
12706                                 } else if (strcmp(optarg, "v2") == 0) {
12707                                         clear_space_cache = 2;
12708                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12709                                 } else {
12710                                         error(
12711                 "invalid argument to --clear-space-cache, must be v1 or v2");
12712                                         exit(1);
12713                                 }
12714                                 ctree_flags |= OPEN_CTREE_WRITES;
12715                                 break;
12716                 }
12717         }
12718
12719         if (check_argc_exact(argc - optind, 1))
12720                 usage(cmd_check_usage);
12721
12722         if (ctx.progress_enabled) {
12723                 ctx.tp = TASK_NOTHING;
12724                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12725         }
12726
12727         /* This check is the only reason for --readonly to exist */
12728         if (readonly && repair) {
12729                 error("repair options are not compatible with --readonly");
12730                 exit(1);
12731         }
12732
12733         /*
12734          * Not supported yet
12735          */
12736         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12737                 error("low memory mode doesn't support repair yet");
12738                 exit(1);
12739         }
12740
12741         radix_tree_init();
12742         cache_tree_init(&root_cache);
12743
12744         if((ret = check_mounted(argv[optind])) < 0) {
12745                 error("could not check mount status: %s", strerror(-ret));
12746                 err |= !!ret;
12747                 goto err_out;
12748         } else if(ret) {
12749                 error("%s is currently mounted, aborting", argv[optind]);
12750                 ret = -EBUSY;
12751                 err |= !!ret;
12752                 goto err_out;
12753         }
12754
12755         /* only allow partial opening under repair mode */
12756         if (repair)
12757                 ctree_flags |= OPEN_CTREE_PARTIAL;
12758
12759         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12760                                   chunk_root_bytenr, ctree_flags);
12761         if (!info) {
12762                 error("cannot open file system");
12763                 ret = -EIO;
12764                 err |= !!ret;
12765                 goto err_out;
12766         }
12767
12768         global_info = info;
12769         root = info->fs_root;
12770         if (clear_space_cache == 1) {
12771                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12772                         error(
12773                 "free space cache v2 detected, use --clear-space-cache v2");
12774                         ret = 1;
12775                         goto close_out;
12776                 }
12777                 printf("Clearing free space cache\n");
12778                 ret = clear_free_space_cache(info);
12779                 if (ret) {
12780                         error("failed to clear free space cache");
12781                         ret = 1;
12782                 } else {
12783                         printf("Free space cache cleared\n");
12784                 }
12785                 goto close_out;
12786         } else if (clear_space_cache == 2) {
12787                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12788                         printf("no free space cache v2 to clear\n");
12789                         ret = 0;
12790                         goto close_out;
12791                 }
12792                 printf("Clear free space cache v2\n");
12793                 ret = btrfs_clear_free_space_tree(info);
12794                 if (ret) {
12795                         error("failed to clear free space cache v2: %d", ret);
12796                         ret = 1;
12797                 } else {
12798                         printf("free space cache v2 cleared\n");
12799                 }
12800                 goto close_out;
12801         }
12802
12803         /*
12804          * repair mode will force us to commit transaction which
12805          * will make us fail to load log tree when mounting.
12806          */
12807         if (repair && btrfs_super_log_root(info->super_copy)) {
12808                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12809                 if (!ret) {
12810                         ret = 1;
12811                         err |= !!ret;
12812                         goto close_out;
12813                 }
12814                 ret = zero_log_tree(root);
12815                 err |= !!ret;
12816                 if (ret) {
12817                         error("failed to zero log tree: %d", ret);
12818                         goto close_out;
12819                 }
12820         }
12821
12822         uuid_unparse(info->super_copy->fsid, uuidbuf);
12823         if (qgroup_report) {
12824                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12825                        uuidbuf);
12826                 ret = qgroup_verify_all(info);
12827                 err |= !!ret;
12828                 if (ret == 0)
12829                         report_qgroups(1);
12830                 goto close_out;
12831         }
12832         if (subvolid) {
12833                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12834                        subvolid, argv[optind], uuidbuf);
12835                 ret = print_extent_state(info, subvolid);
12836                 err |= !!ret;
12837                 goto close_out;
12838         }
12839         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12840
12841         if (!extent_buffer_uptodate(info->tree_root->node) ||
12842             !extent_buffer_uptodate(info->dev_root->node) ||
12843             !extent_buffer_uptodate(info->chunk_root->node)) {
12844                 error("critical roots corrupted, unable to check the filesystem");
12845                 err |= !!ret;
12846                 ret = -EIO;
12847                 goto close_out;
12848         }
12849
12850         if (init_extent_tree || init_csum_tree) {
12851                 struct btrfs_trans_handle *trans;
12852
12853                 trans = btrfs_start_transaction(info->extent_root, 0);
12854                 if (IS_ERR(trans)) {
12855                         error("error starting transaction");
12856                         ret = PTR_ERR(trans);
12857                         err |= !!ret;
12858                         goto close_out;
12859                 }
12860
12861                 if (init_extent_tree) {
12862                         printf("Creating a new extent tree\n");
12863                         ret = reinit_extent_tree(trans, info);
12864                         err |= !!ret;
12865                         if (ret)
12866                                 goto close_out;
12867                 }
12868
12869                 if (init_csum_tree) {
12870                         printf("Reinitialize checksum tree\n");
12871                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12872                         if (ret) {
12873                                 error("checksum tree initialization failed: %d",
12874                                                 ret);
12875                                 ret = -EIO;
12876                                 err |= !!ret;
12877                                 goto close_out;
12878                         }
12879
12880                         ret = fill_csum_tree(trans, info->csum_root,
12881                                              init_extent_tree);
12882                         err |= !!ret;
12883                         if (ret) {
12884                                 error("checksum tree refilling failed: %d", ret);
12885                                 return -EIO;
12886                         }
12887                 }
12888                 /*
12889                  * Ok now we commit and run the normal fsck, which will add
12890                  * extent entries for all of the items it finds.
12891                  */
12892                 ret = btrfs_commit_transaction(trans, info->extent_root);
12893                 err |= !!ret;
12894                 if (ret)
12895                         goto close_out;
12896         }
12897         if (!extent_buffer_uptodate(info->extent_root->node)) {
12898                 error("critical: extent_root, unable to check the filesystem");
12899                 ret = -EIO;
12900                 err |= !!ret;
12901                 goto close_out;
12902         }
12903         if (!extent_buffer_uptodate(info->csum_root->node)) {
12904                 error("critical: csum_root, unable to check the filesystem");
12905                 ret = -EIO;
12906                 err |= !!ret;
12907                 goto close_out;
12908         }
12909
12910         if (!ctx.progress_enabled)
12911                 fprintf(stderr, "checking extents\n");
12912         if (check_mode == CHECK_MODE_LOWMEM)
12913                 ret = check_chunks_and_extents_v2(root);
12914         else
12915                 ret = check_chunks_and_extents(root);
12916         err |= !!ret;
12917         if (ret)
12918                 error(
12919                 "errors found in extent allocation tree or chunk allocation");
12920
12921         ret = repair_root_items(info);
12922         err |= !!ret;
12923         if (ret < 0)
12924                 goto close_out;
12925         if (repair) {
12926                 fprintf(stderr, "Fixed %d roots.\n", ret);
12927                 ret = 0;
12928         } else if (ret > 0) {
12929                 fprintf(stderr,
12930                        "Found %d roots with an outdated root item.\n",
12931                        ret);
12932                 fprintf(stderr,
12933                         "Please run a filesystem check with the option --repair to fix them.\n");
12934                 ret = 1;
12935                 err |= !!ret;
12936                 goto close_out;
12937         }
12938
12939         if (!ctx.progress_enabled) {
12940                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12941                         fprintf(stderr, "checking free space tree\n");
12942                 else
12943                         fprintf(stderr, "checking free space cache\n");
12944         }
12945         ret = check_space_cache(root);
12946         err |= !!ret;
12947         if (ret)
12948                 goto out;
12949
12950         /*
12951          * We used to have to have these hole extents in between our real
12952          * extents so if we don't have this flag set we need to make sure there
12953          * are no gaps in the file extents for inodes, otherwise we can just
12954          * ignore it when this happens.
12955          */
12956         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12957         if (!ctx.progress_enabled)
12958                 fprintf(stderr, "checking fs roots\n");
12959         if (check_mode == CHECK_MODE_LOWMEM)
12960                 ret = check_fs_roots_v2(root->fs_info);
12961         else
12962                 ret = check_fs_roots(root, &root_cache);
12963         err |= !!ret;
12964         if (ret)
12965                 goto out;
12966
12967         fprintf(stderr, "checking csums\n");
12968         ret = check_csums(root);
12969         err |= !!ret;
12970         if (ret)
12971                 goto out;
12972
12973         fprintf(stderr, "checking root refs\n");
12974         /* For low memory mode, check_fs_roots_v2 handles root refs */
12975         if (check_mode != CHECK_MODE_LOWMEM) {
12976                 ret = check_root_refs(root, &root_cache);
12977                 err |= !!ret;
12978                 if (ret)
12979                         goto out;
12980         }
12981
12982         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12983                 struct extent_buffer *eb;
12984
12985                 eb = list_first_entry(&root->fs_info->recow_ebs,
12986                                       struct extent_buffer, recow);
12987                 list_del_init(&eb->recow);
12988                 ret = recow_extent_buffer(root, eb);
12989                 err |= !!ret;
12990                 if (ret)
12991                         break;
12992         }
12993
12994         while (!list_empty(&delete_items)) {
12995                 struct bad_item *bad;
12996
12997                 bad = list_first_entry(&delete_items, struct bad_item, list);
12998                 list_del_init(&bad->list);
12999                 if (repair) {
13000                         ret = delete_bad_item(root, bad);
13001                         err |= !!ret;
13002                 }
13003                 free(bad);
13004         }
13005
13006         if (info->quota_enabled) {
13007                 fprintf(stderr, "checking quota groups\n");
13008                 ret = qgroup_verify_all(info);
13009                 err |= !!ret;
13010                 if (ret)
13011                         goto out;
13012                 report_qgroups(0);
13013                 ret = repair_qgroups(info, &qgroups_repaired);
13014                 err |= !!ret;
13015                 if (err)
13016                         goto out;
13017                 ret = 0;
13018         }
13019
13020         if (!list_empty(&root->fs_info->recow_ebs)) {
13021                 error("transid errors in file system");
13022                 ret = 1;
13023                 err |= !!ret;
13024         }
13025 out:
13026         if (found_old_backref) { /*
13027                  * there was a disk format change when mixed
13028                  * backref was in testing tree. The old format
13029                  * existed about one week.
13030                  */
13031                 printf("\n * Found old mixed backref format. "
13032                        "The old format is not supported! *"
13033                        "\n * Please mount the FS in readonly mode, "
13034                        "backup data and re-format the FS. *\n\n");
13035                 err |= 1;
13036         }
13037         printf("found %llu bytes used err is %d\n",
13038                (unsigned long long)bytes_used, ret);
13039         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13040         printf("total tree bytes: %llu\n",
13041                (unsigned long long)total_btree_bytes);
13042         printf("total fs tree bytes: %llu\n",
13043                (unsigned long long)total_fs_tree_bytes);
13044         printf("total extent tree bytes: %llu\n",
13045                (unsigned long long)total_extent_tree_bytes);
13046         printf("btree space waste bytes: %llu\n",
13047                (unsigned long long)btree_space_waste);
13048         printf("file data blocks allocated: %llu\n referenced %llu\n",
13049                 (unsigned long long)data_bytes_allocated,
13050                 (unsigned long long)data_bytes_referenced);
13051
13052         free_qgroup_counts();
13053         free_root_recs_tree(&root_cache);
13054 close_out:
13055         close_ctree(root);
13056 err_out:
13057         if (ctx.progress_enabled)
13058                 task_deinit(ctx.info);
13059
13060         return err;
13061 }