btrfs-progs: check: remove unused argument from free_extent_record_cache
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 found;
4703         unsigned int extent_type;
4704         unsigned int is_hole;
4705         int ret;
4706         int err = 0;
4707
4708         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4709
4710         extent_type = btrfs_file_extent_type(node, fi);
4711         /* Skip if file extent is inline */
4712         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4713                 struct btrfs_item *e = btrfs_item_nr(slot);
4714                 u32 item_inline_len;
4715
4716                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4717                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4718                 if (extent_num_bytes == 0 ||
4719                     extent_num_bytes != item_inline_len)
4720                         err |= FILE_EXTENT_ERROR;
4721                 *size += extent_num_bytes;
4722                 return err;
4723         }
4724
4725         /* Check extent type */
4726         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4727                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4728                 err |= FILE_EXTENT_ERROR;
4729                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4730                       root->objectid, fkey->objectid, fkey->offset);
4731                 return err;
4732         }
4733
4734         /* Check REG_EXTENT/PREALLOC_EXTENT */
4735         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4736         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4737         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4738         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4739
4740         /* Check EXTENT_DATA datasum */
4741         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4742         if (found > 0 && nodatasum) {
4743                 err |= ODD_CSUM_ITEM;
4744                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4745                       root->objectid, fkey->objectid, fkey->offset);
4746         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4747                    !is_hole &&
4748                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4749                 err |= CSUM_ITEM_MISSING;
4750                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4753                 err |= ODD_CSUM_ITEM;
4754                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         }
4757
4758         /* Check EXTENT_DATA hole */
4759         if (no_holes && is_hole) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         } else if (!no_holes && *end != fkey->offset) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         }
4768
4769         *end += extent_num_bytes;
4770         if (!is_hole)
4771                 *size += extent_num_bytes;
4772
4773         return err;
4774 }
4775
4776 /*
4777  * Check INODE_ITEM and related ITEMs (the same inode number)
4778  * 1. check link count
4779  * 2. check inode ref/extref
4780  * 3. check dir item/index
4781  *
4782  * @ext_ref:    the EXTENDED_IREF feature
4783  *
4784  * Return 0 if no error occurred.
4785  * Return >0 for error or hit the traversal is done(by error bitmap)
4786  */
4787 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4788                             unsigned int ext_ref)
4789 {
4790         struct extent_buffer *node;
4791         struct btrfs_inode_item *ii;
4792         struct btrfs_key key;
4793         u64 inode_id;
4794         u32 mode;
4795         u64 nlink;
4796         u64 nbytes;
4797         u64 isize;
4798         u64 size = 0;
4799         u64 refs = 0;
4800         u64 extent_end = 0;
4801         u64 extent_size = 0;
4802         unsigned int dir;
4803         unsigned int nodatasum;
4804         int slot;
4805         int ret;
4806         int err = 0;
4807
4808         node = path->nodes[0];
4809         slot = path->slots[0];
4810
4811         btrfs_item_key_to_cpu(node, &key, slot);
4812         inode_id = key.objectid;
4813
4814         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4815                 ret = btrfs_next_item(root, path);
4816                 if (ret > 0)
4817                         err |= LAST_ITEM;
4818                 return err;
4819         }
4820
4821         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4822         isize = btrfs_inode_size(node, ii);
4823         nbytes = btrfs_inode_nbytes(node, ii);
4824         mode = btrfs_inode_mode(node, ii);
4825         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4826         nlink = btrfs_inode_nlink(node, ii);
4827         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4828
4829         while (1) {
4830                 ret = btrfs_next_item(root, path);
4831                 if (ret < 0) {
4832                         /* out will fill 'err' rusing current statistics */
4833                         goto out;
4834                 } else if (ret > 0) {
4835                         err |= LAST_ITEM;
4836                         goto out;
4837                 }
4838
4839                 node = path->nodes[0];
4840                 slot = path->slots[0];
4841                 btrfs_item_key_to_cpu(node, &key, slot);
4842                 if (key.objectid != inode_id)
4843                         goto out;
4844
4845                 switch (key.type) {
4846                 case BTRFS_INODE_REF_KEY:
4847                         ret = check_inode_ref(root, &key, node, slot, &refs,
4848                                               mode);
4849                         err |= ret;
4850                         break;
4851                 case BTRFS_INODE_EXTREF_KEY:
4852                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4853                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4854                                         root->objectid, key.objectid,
4855                                         key.offset);
4856                         ret = check_inode_extref(root, &key, node, slot, &refs,
4857                                                  mode);
4858                         err |= ret;
4859                         break;
4860                 case BTRFS_DIR_ITEM_KEY:
4861                 case BTRFS_DIR_INDEX_KEY:
4862                         if (!dir) {
4863                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4864                                         root->objectid, inode_id,
4865                                         imode_to_type(mode), key.objectid,
4866                                         key.offset);
4867                         }
4868                         ret = check_dir_item(root, &key, node, slot, &size,
4869                                              ext_ref);
4870                         err |= ret;
4871                         break;
4872                 case BTRFS_EXTENT_DATA_KEY:
4873                         if (dir) {
4874                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4875                                         root->objectid, inode_id, key.objectid,
4876                                         key.offset);
4877                         }
4878                         ret = check_file_extent(root, &key, node, slot,
4879                                                 nodatasum, &extent_size,
4880                                                 &extent_end);
4881                         err |= ret;
4882                         break;
4883                 case BTRFS_XATTR_ITEM_KEY:
4884                         break;
4885                 default:
4886                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4887                               key.objectid, key.type, key.offset);
4888                 }
4889         }
4890
4891 out:
4892         /* verify INODE_ITEM nlink/isize/nbytes */
4893         if (dir) {
4894                 if (nlink != 1) {
4895                         err |= LINK_COUNT_ERROR;
4896                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4897                               root->objectid, inode_id, nlink);
4898                 }
4899
4900                 /*
4901                  * Just a warning, as dir inode nbytes is just an
4902                  * instructive value.
4903                  */
4904                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4905                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4906                                 root->objectid, inode_id, root->nodesize);
4907                 }
4908
4909                 if (isize != size) {
4910                         err |= ISIZE_ERROR;
4911                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4912                               root->objectid, inode_id, isize, size);
4913                 }
4914         } else {
4915                 if (nlink != refs) {
4916                         err |= LINK_COUNT_ERROR;
4917                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4918                               root->objectid, inode_id, nlink, refs);
4919                 } else if (!nlink) {
4920                         err |= ORPHAN_ITEM;
4921                 }
4922
4923                 if (!nbytes && !no_holes && extent_end < isize) {
4924                         err |= NBYTES_ERROR;
4925                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4926                               root->objectid, inode_id, isize);
4927                 }
4928
4929                 if (nbytes != extent_size) {
4930                         err |= NBYTES_ERROR;
4931                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4932                               root->objectid, inode_id, nbytes, extent_size);
4933                 }
4934         }
4935
4936         return err;
4937 }
4938
4939 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4940 {
4941         struct btrfs_path path;
4942         struct btrfs_key key;
4943         int err = 0;
4944         int ret;
4945
4946         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4947         key.type = BTRFS_INODE_ITEM_KEY;
4948         key.offset = 0;
4949
4950         /* For root being dropped, we don't need to check first inode */
4951         if (btrfs_root_refs(&root->root_item) == 0 &&
4952             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4953             key.objectid)
4954                 return 0;
4955
4956         btrfs_init_path(&path);
4957
4958         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4959         if (ret < 0)
4960                 goto out;
4961         if (ret > 0) {
4962                 ret = 0;
4963                 err |= INODE_ITEM_MISSING;
4964         }
4965
4966         err |= check_inode_item(root, &path, ext_ref);
4967         err &= ~LAST_ITEM;
4968         if (err && !ret)
4969                 ret = -EIO;
4970 out:
4971         btrfs_release_path(&path);
4972         return ret;
4973 }
4974
4975 /*
4976  * Iterate all item on the tree and call check_inode_item() to check.
4977  *
4978  * @root:       the root of the tree to be checked.
4979  * @ext_ref:    the EXTENDED_IREF feature
4980  *
4981  * Return 0 if no error found.
4982  * Return <0 for error.
4983  */
4984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4985 {
4986         struct btrfs_path path;
4987         struct node_refs nrefs;
4988         struct btrfs_root_item *root_item = &root->root_item;
4989         int ret, wret;
4990         int level;
4991
4992         /*
4993          * We need to manually check the first inode item(256)
4994          * As the following traversal function will only start from
4995          * the first inode item in the leaf, if inode item(256) is missing
4996          * we will just skip it forever.
4997          */
4998         ret = check_fs_first_inode(root, ext_ref);
4999         if (ret < 0)
5000                 return ret;
5001
5002         memset(&nrefs, 0, sizeof(nrefs));
5003         level = btrfs_header_level(root->node);
5004         btrfs_init_path(&path);
5005
5006         if (btrfs_root_refs(root_item) > 0 ||
5007             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5008                 path.nodes[level] = root->node;
5009                 path.slots[level] = 0;
5010                 extent_buffer_get(root->node);
5011         } else {
5012                 struct btrfs_key key;
5013
5014                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5015                 level = root_item->drop_level;
5016                 path.lowest_level = level;
5017                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5018                 if (ret < 0)
5019                         goto out;
5020                 ret = 0;
5021         }
5022
5023         while (1) {
5024                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5025                 if (wret < 0)
5026                         ret = wret;
5027                 if (wret != 0)
5028                         break;
5029
5030                 wret = walk_up_tree_v2(root, &path, &level);
5031                 if (wret < 0)
5032                         ret = wret;
5033                 if (wret != 0)
5034                         break;
5035         }
5036
5037 out:
5038         btrfs_release_path(&path);
5039         return ret;
5040 }
5041
5042 /*
5043  * Find the relative ref for root_ref and root_backref.
5044  *
5045  * @root:       the root of the root tree.
5046  * @ref_key:    the key of the root ref.
5047  *
5048  * Return 0 if no error occurred.
5049  */
5050 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5051                           struct extent_buffer *node, int slot)
5052 {
5053         struct btrfs_path path;
5054         struct btrfs_key key;
5055         struct btrfs_root_ref *ref;
5056         struct btrfs_root_ref *backref;
5057         char ref_name[BTRFS_NAME_LEN] = {0};
5058         char backref_name[BTRFS_NAME_LEN] = {0};
5059         u64 ref_dirid;
5060         u64 ref_seq;
5061         u32 ref_namelen;
5062         u64 backref_dirid;
5063         u64 backref_seq;
5064         u32 backref_namelen;
5065         u32 len;
5066         int ret;
5067         int err = 0;
5068
5069         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5070         ref_dirid = btrfs_root_ref_dirid(node, ref);
5071         ref_seq = btrfs_root_ref_sequence(node, ref);
5072         ref_namelen = btrfs_root_ref_name_len(node, ref);
5073
5074         if (ref_namelen <= BTRFS_NAME_LEN) {
5075                 len = ref_namelen;
5076         } else {
5077                 len = BTRFS_NAME_LEN;
5078                 warning("%s[%llu %llu] ref_name too long",
5079                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5080                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5081                         ref_key->offset);
5082         }
5083         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5084
5085         /* Find relative root_ref */
5086         key.objectid = ref_key->offset;
5087         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5088         key.offset = ref_key->objectid;
5089
5090         btrfs_init_path(&path);
5091         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5092         if (ret) {
5093                 err |= ROOT_REF_MISSING;
5094                 error("%s[%llu %llu] couldn't find relative ref",
5095                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5096                       "ROOT_REF" : "ROOT_BACKREF",
5097                       ref_key->objectid, ref_key->offset);
5098                 goto out;
5099         }
5100
5101         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5102                                  struct btrfs_root_ref);
5103         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5104         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5105         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5106
5107         if (backref_namelen <= BTRFS_NAME_LEN) {
5108                 len = backref_namelen;
5109         } else {
5110                 len = BTRFS_NAME_LEN;
5111                 warning("%s[%llu %llu] ref_name too long",
5112                         key.type == BTRFS_ROOT_REF_KEY ?
5113                         "ROOT_REF" : "ROOT_BACKREF",
5114                         key.objectid, key.offset);
5115         }
5116         read_extent_buffer(path.nodes[0], backref_name,
5117                            (unsigned long)(backref + 1), len);
5118
5119         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5120             ref_namelen != backref_namelen ||
5121             strncmp(ref_name, backref_name, len)) {
5122                 err |= ROOT_REF_MISMATCH;
5123                 error("%s[%llu %llu] mismatch relative ref",
5124                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5125                       "ROOT_REF" : "ROOT_BACKREF",
5126                       ref_key->objectid, ref_key->offset);
5127         }
5128 out:
5129         btrfs_release_path(&path);
5130         return err;
5131 }
5132
5133 /*
5134  * Check all fs/file tree in low_memory mode.
5135  *
5136  * 1. for fs tree root item, call check_fs_root_v2()
5137  * 2. for fs tree root ref/backref, call check_root_ref()
5138  *
5139  * Return 0 if no error occurred.
5140  */
5141 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5142 {
5143         struct btrfs_root *tree_root = fs_info->tree_root;
5144         struct btrfs_root *cur_root = NULL;
5145         struct btrfs_path path;
5146         struct btrfs_key key;
5147         struct extent_buffer *node;
5148         unsigned int ext_ref;
5149         int slot;
5150         int ret;
5151         int err = 0;
5152
5153         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5154
5155         btrfs_init_path(&path);
5156         key.objectid = BTRFS_FS_TREE_OBJECTID;
5157         key.offset = 0;
5158         key.type = BTRFS_ROOT_ITEM_KEY;
5159
5160         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5161         if (ret < 0) {
5162                 err = ret;
5163                 goto out;
5164         } else if (ret > 0) {
5165                 err = -ENOENT;
5166                 goto out;
5167         }
5168
5169         while (1) {
5170                 node = path.nodes[0];
5171                 slot = path.slots[0];
5172                 btrfs_item_key_to_cpu(node, &key, slot);
5173                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5174                         goto out;
5175                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5176                     fs_root_objectid(key.objectid)) {
5177                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5178                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5179                                                                        &key);
5180                         } else {
5181                                 key.offset = (u64)-1;
5182                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5183                         }
5184
5185                         if (IS_ERR(cur_root)) {
5186                                 error("Fail to read fs/subvol tree: %lld",
5187                                       key.objectid);
5188                                 err = -EIO;
5189                                 goto next;
5190                         }
5191
5192                         ret = check_fs_root_v2(cur_root, ext_ref);
5193                         err |= ret;
5194
5195                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5196                                 btrfs_free_fs_root(cur_root);
5197                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5198                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5199                         ret = check_root_ref(tree_root, &key, node, slot);
5200                         err |= ret;
5201                 }
5202 next:
5203                 ret = btrfs_next_item(tree_root, &path);
5204                 if (ret > 0)
5205                         goto out;
5206                 if (ret < 0) {
5207                         err = ret;
5208                         goto out;
5209                 }
5210         }
5211
5212 out:
5213         btrfs_release_path(&path);
5214         return err;
5215 }
5216
5217 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5218 {
5219         struct list_head *cur = rec->backrefs.next;
5220         struct extent_backref *back;
5221         struct tree_backref *tback;
5222         struct data_backref *dback;
5223         u64 found = 0;
5224         int err = 0;
5225
5226         while(cur != &rec->backrefs) {
5227                 back = to_extent_backref(cur);
5228                 cur = cur->next;
5229                 if (!back->found_extent_tree) {
5230                         err = 1;
5231                         if (!print_errs)
5232                                 goto out;
5233                         if (back->is_data) {
5234                                 dback = to_data_backref(back);
5235                                 fprintf(stderr, "Backref %llu %s %llu"
5236                                         " owner %llu offset %llu num_refs %lu"
5237                                         " not found in extent tree\n",
5238                                         (unsigned long long)rec->start,
5239                                         back->full_backref ?
5240                                         "parent" : "root",
5241                                         back->full_backref ?
5242                                         (unsigned long long)dback->parent:
5243                                         (unsigned long long)dback->root,
5244                                         (unsigned long long)dback->owner,
5245                                         (unsigned long long)dback->offset,
5246                                         (unsigned long)dback->num_refs);
5247                         } else {
5248                                 tback = to_tree_backref(back);
5249                                 fprintf(stderr, "Backref %llu parent %llu"
5250                                         " root %llu not found in extent tree\n",
5251                                         (unsigned long long)rec->start,
5252                                         (unsigned long long)tback->parent,
5253                                         (unsigned long long)tback->root);
5254                         }
5255                 }
5256                 if (!back->is_data && !back->found_ref) {
5257                         err = 1;
5258                         if (!print_errs)
5259                                 goto out;
5260                         tback = to_tree_backref(back);
5261                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5262                                 (unsigned long long)rec->start,
5263                                 back->full_backref ? "parent" : "root",
5264                                 back->full_backref ?
5265                                 (unsigned long long)tback->parent :
5266                                 (unsigned long long)tback->root, back);
5267                 }
5268                 if (back->is_data) {
5269                         dback = to_data_backref(back);
5270                         if (dback->found_ref != dback->num_refs) {
5271                                 err = 1;
5272                                 if (!print_errs)
5273                                         goto out;
5274                                 fprintf(stderr, "Incorrect local backref count"
5275                                         " on %llu %s %llu owner %llu"
5276                                         " offset %llu found %u wanted %u back %p\n",
5277                                         (unsigned long long)rec->start,
5278                                         back->full_backref ?
5279                                         "parent" : "root",
5280                                         back->full_backref ?
5281                                         (unsigned long long)dback->parent:
5282                                         (unsigned long long)dback->root,
5283                                         (unsigned long long)dback->owner,
5284                                         (unsigned long long)dback->offset,
5285                                         dback->found_ref, dback->num_refs, back);
5286                         }
5287                         if (dback->disk_bytenr != rec->start) {
5288                                 err = 1;
5289                                 if (!print_errs)
5290                                         goto out;
5291                                 fprintf(stderr, "Backref disk bytenr does not"
5292                                         " match extent record, bytenr=%llu, "
5293                                         "ref bytenr=%llu\n",
5294                                         (unsigned long long)rec->start,
5295                                         (unsigned long long)dback->disk_bytenr);
5296                         }
5297
5298                         if (dback->bytes != rec->nr) {
5299                                 err = 1;
5300                                 if (!print_errs)
5301                                         goto out;
5302                                 fprintf(stderr, "Backref bytes do not match "
5303                                         "extent backref, bytenr=%llu, ref "
5304                                         "bytes=%llu, backref bytes=%llu\n",
5305                                         (unsigned long long)rec->start,
5306                                         (unsigned long long)rec->nr,
5307                                         (unsigned long long)dback->bytes);
5308                         }
5309                 }
5310                 if (!back->is_data) {
5311                         found += 1;
5312                 } else {
5313                         dback = to_data_backref(back);
5314                         found += dback->found_ref;
5315                 }
5316         }
5317         if (found != rec->refs) {
5318                 err = 1;
5319                 if (!print_errs)
5320                         goto out;
5321                 fprintf(stderr, "Incorrect global backref count "
5322                         "on %llu found %llu wanted %llu\n",
5323                         (unsigned long long)rec->start,
5324                         (unsigned long long)found,
5325                         (unsigned long long)rec->refs);
5326         }
5327 out:
5328         return err;
5329 }
5330
5331 static int free_all_extent_backrefs(struct extent_record *rec)
5332 {
5333         struct extent_backref *back;
5334         struct list_head *cur;
5335         while (!list_empty(&rec->backrefs)) {
5336                 cur = rec->backrefs.next;
5337                 back = to_extent_backref(cur);
5338                 list_del(cur);
5339                 free(back);
5340         }
5341         return 0;
5342 }
5343
5344 static void free_extent_record_cache(struct cache_tree *extent_cache)
5345 {
5346         struct cache_extent *cache;
5347         struct extent_record *rec;
5348
5349         while (1) {
5350                 cache = first_cache_extent(extent_cache);
5351                 if (!cache)
5352                         break;
5353                 rec = container_of(cache, struct extent_record, cache);
5354                 remove_cache_extent(extent_cache, cache);
5355                 free_all_extent_backrefs(rec);
5356                 free(rec);
5357         }
5358 }
5359
5360 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5361                                  struct extent_record *rec)
5362 {
5363         if (rec->content_checked && rec->owner_ref_checked &&
5364             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5365             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5366             !rec->bad_full_backref && !rec->crossing_stripes &&
5367             !rec->wrong_chunk_type) {
5368                 remove_cache_extent(extent_cache, &rec->cache);
5369                 free_all_extent_backrefs(rec);
5370                 list_del_init(&rec->list);
5371                 free(rec);
5372         }
5373         return 0;
5374 }
5375
5376 static int check_owner_ref(struct btrfs_root *root,
5377                             struct extent_record *rec,
5378                             struct extent_buffer *buf)
5379 {
5380         struct extent_backref *node;
5381         struct tree_backref *back;
5382         struct btrfs_root *ref_root;
5383         struct btrfs_key key;
5384         struct btrfs_path path;
5385         struct extent_buffer *parent;
5386         int level;
5387         int found = 0;
5388         int ret;
5389
5390         list_for_each_entry(node, &rec->backrefs, list) {
5391                 if (node->is_data)
5392                         continue;
5393                 if (!node->found_ref)
5394                         continue;
5395                 if (node->full_backref)
5396                         continue;
5397                 back = to_tree_backref(node);
5398                 if (btrfs_header_owner(buf) == back->root)
5399                         return 0;
5400         }
5401         BUG_ON(rec->is_root);
5402
5403         /* try to find the block by search corresponding fs tree */
5404         key.objectid = btrfs_header_owner(buf);
5405         key.type = BTRFS_ROOT_ITEM_KEY;
5406         key.offset = (u64)-1;
5407
5408         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5409         if (IS_ERR(ref_root))
5410                 return 1;
5411
5412         level = btrfs_header_level(buf);
5413         if (level == 0)
5414                 btrfs_item_key_to_cpu(buf, &key, 0);
5415         else
5416                 btrfs_node_key_to_cpu(buf, &key, 0);
5417
5418         btrfs_init_path(&path);
5419         path.lowest_level = level + 1;
5420         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5421         if (ret < 0)
5422                 return 0;
5423
5424         parent = path.nodes[level + 1];
5425         if (parent && buf->start == btrfs_node_blockptr(parent,
5426                                                         path.slots[level + 1]))
5427                 found = 1;
5428
5429         btrfs_release_path(&path);
5430         return found ? 0 : 1;
5431 }
5432
5433 static int is_extent_tree_record(struct extent_record *rec)
5434 {
5435         struct list_head *cur = rec->backrefs.next;
5436         struct extent_backref *node;
5437         struct tree_backref *back;
5438         int is_extent = 0;
5439
5440         while(cur != &rec->backrefs) {
5441                 node = to_extent_backref(cur);
5442                 cur = cur->next;
5443                 if (node->is_data)
5444                         return 0;
5445                 back = to_tree_backref(node);
5446                 if (node->full_backref)
5447                         return 0;
5448                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5449                         is_extent = 1;
5450         }
5451         return is_extent;
5452 }
5453
5454
5455 static int record_bad_block_io(struct btrfs_fs_info *info,
5456                                struct cache_tree *extent_cache,
5457                                u64 start, u64 len)
5458 {
5459         struct extent_record *rec;
5460         struct cache_extent *cache;
5461         struct btrfs_key key;
5462
5463         cache = lookup_cache_extent(extent_cache, start, len);
5464         if (!cache)
5465                 return 0;
5466
5467         rec = container_of(cache, struct extent_record, cache);
5468         if (!is_extent_tree_record(rec))
5469                 return 0;
5470
5471         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5472         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5473 }
5474
5475 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5476                        struct extent_buffer *buf, int slot)
5477 {
5478         if (btrfs_header_level(buf)) {
5479                 struct btrfs_key_ptr ptr1, ptr2;
5480
5481                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5482                                    sizeof(struct btrfs_key_ptr));
5483                 read_extent_buffer(buf, &ptr2,
5484                                    btrfs_node_key_ptr_offset(slot + 1),
5485                                    sizeof(struct btrfs_key_ptr));
5486                 write_extent_buffer(buf, &ptr1,
5487                                     btrfs_node_key_ptr_offset(slot + 1),
5488                                     sizeof(struct btrfs_key_ptr));
5489                 write_extent_buffer(buf, &ptr2,
5490                                     btrfs_node_key_ptr_offset(slot),
5491                                     sizeof(struct btrfs_key_ptr));
5492                 if (slot == 0) {
5493                         struct btrfs_disk_key key;
5494                         btrfs_node_key(buf, &key, 0);
5495                         btrfs_fixup_low_keys(root, path, &key,
5496                                              btrfs_header_level(buf) + 1);
5497                 }
5498         } else {
5499                 struct btrfs_item *item1, *item2;
5500                 struct btrfs_key k1, k2;
5501                 char *item1_data, *item2_data;
5502                 u32 item1_offset, item2_offset, item1_size, item2_size;
5503
5504                 item1 = btrfs_item_nr(slot);
5505                 item2 = btrfs_item_nr(slot + 1);
5506                 btrfs_item_key_to_cpu(buf, &k1, slot);
5507                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5508                 item1_offset = btrfs_item_offset(buf, item1);
5509                 item2_offset = btrfs_item_offset(buf, item2);
5510                 item1_size = btrfs_item_size(buf, item1);
5511                 item2_size = btrfs_item_size(buf, item2);
5512
5513                 item1_data = malloc(item1_size);
5514                 if (!item1_data)
5515                         return -ENOMEM;
5516                 item2_data = malloc(item2_size);
5517                 if (!item2_data) {
5518                         free(item1_data);
5519                         return -ENOMEM;
5520                 }
5521
5522                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5523                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5524
5525                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5526                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5527                 free(item1_data);
5528                 free(item2_data);
5529
5530                 btrfs_set_item_offset(buf, item1, item2_offset);
5531                 btrfs_set_item_offset(buf, item2, item1_offset);
5532                 btrfs_set_item_size(buf, item1, item2_size);
5533                 btrfs_set_item_size(buf, item2, item1_size);
5534
5535                 path->slots[0] = slot;
5536                 btrfs_set_item_key_unsafe(root, path, &k2);
5537                 path->slots[0] = slot + 1;
5538                 btrfs_set_item_key_unsafe(root, path, &k1);
5539         }
5540         return 0;
5541 }
5542
5543 static int fix_key_order(struct btrfs_trans_handle *trans,
5544                          struct btrfs_root *root,
5545                          struct btrfs_path *path)
5546 {
5547         struct extent_buffer *buf;
5548         struct btrfs_key k1, k2;
5549         int i;
5550         int level = path->lowest_level;
5551         int ret = -EIO;
5552
5553         buf = path->nodes[level];
5554         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5555                 if (level) {
5556                         btrfs_node_key_to_cpu(buf, &k1, i);
5557                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5558                 } else {
5559                         btrfs_item_key_to_cpu(buf, &k1, i);
5560                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5561                 }
5562                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5563                         continue;
5564                 ret = swap_values(root, path, buf, i);
5565                 if (ret)
5566                         break;
5567                 btrfs_mark_buffer_dirty(buf);
5568                 i = 0;
5569         }
5570         return ret;
5571 }
5572
5573 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5574                              struct btrfs_root *root,
5575                              struct btrfs_path *path,
5576                              struct extent_buffer *buf, int slot)
5577 {
5578         struct btrfs_key key;
5579         int nritems = btrfs_header_nritems(buf);
5580
5581         btrfs_item_key_to_cpu(buf, &key, slot);
5582
5583         /* These are all the keys we can deal with missing. */
5584         if (key.type != BTRFS_DIR_INDEX_KEY &&
5585             key.type != BTRFS_EXTENT_ITEM_KEY &&
5586             key.type != BTRFS_METADATA_ITEM_KEY &&
5587             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5588             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5589                 return -1;
5590
5591         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5592                (unsigned long long)key.objectid, key.type,
5593                (unsigned long long)key.offset, slot, buf->start);
5594         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5595                               btrfs_item_nr_offset(slot + 1),
5596                               sizeof(struct btrfs_item) *
5597                               (nritems - slot - 1));
5598         btrfs_set_header_nritems(buf, nritems - 1);
5599         if (slot == 0) {
5600                 struct btrfs_disk_key disk_key;
5601
5602                 btrfs_item_key(buf, &disk_key, 0);
5603                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5604         }
5605         btrfs_mark_buffer_dirty(buf);
5606         return 0;
5607 }
5608
5609 static int fix_item_offset(struct btrfs_trans_handle *trans,
5610                            struct btrfs_root *root,
5611                            struct btrfs_path *path)
5612 {
5613         struct extent_buffer *buf;
5614         int i;
5615         int ret = 0;
5616
5617         /* We should only get this for leaves */
5618         BUG_ON(path->lowest_level);
5619         buf = path->nodes[0];
5620 again:
5621         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5622                 unsigned int shift = 0, offset;
5623
5624                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5625                     BTRFS_LEAF_DATA_SIZE(root)) {
5626                         if (btrfs_item_end_nr(buf, i) >
5627                             BTRFS_LEAF_DATA_SIZE(root)) {
5628                                 ret = delete_bogus_item(trans, root, path,
5629                                                         buf, i);
5630                                 if (!ret)
5631                                         goto again;
5632                                 fprintf(stderr, "item is off the end of the "
5633                                         "leaf, can't fix\n");
5634                                 ret = -EIO;
5635                                 break;
5636                         }
5637                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5638                                 btrfs_item_end_nr(buf, i);
5639                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5640                            btrfs_item_offset_nr(buf, i - 1)) {
5641                         if (btrfs_item_end_nr(buf, i) >
5642                             btrfs_item_offset_nr(buf, i - 1)) {
5643                                 ret = delete_bogus_item(trans, root, path,
5644                                                         buf, i);
5645                                 if (!ret)
5646                                         goto again;
5647                                 fprintf(stderr, "items overlap, can't fix\n");
5648                                 ret = -EIO;
5649                                 break;
5650                         }
5651                         shift = btrfs_item_offset_nr(buf, i - 1) -
5652                                 btrfs_item_end_nr(buf, i);
5653                 }
5654                 if (!shift)
5655                         continue;
5656
5657                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5658                        i, shift, (unsigned long long)buf->start);
5659                 offset = btrfs_item_offset_nr(buf, i);
5660                 memmove_extent_buffer(buf,
5661                                       btrfs_leaf_data(buf) + offset + shift,
5662                                       btrfs_leaf_data(buf) + offset,
5663                                       btrfs_item_size_nr(buf, i));
5664                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5665                                       offset + shift);
5666                 btrfs_mark_buffer_dirty(buf);
5667         }
5668
5669         /*
5670          * We may have moved things, in which case we want to exit so we don't
5671          * write those changes out.  Once we have proper abort functionality in
5672          * progs this can be changed to something nicer.
5673          */
5674         BUG_ON(ret);
5675         return ret;
5676 }
5677
5678 /*
5679  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5680  * then just return -EIO.
5681  */
5682 static int try_to_fix_bad_block(struct btrfs_root *root,
5683                                 struct extent_buffer *buf,
5684                                 enum btrfs_tree_block_status status)
5685 {
5686         struct btrfs_trans_handle *trans;
5687         struct ulist *roots;
5688         struct ulist_node *node;
5689         struct btrfs_root *search_root;
5690         struct btrfs_path path;
5691         struct ulist_iterator iter;
5692         struct btrfs_key root_key, key;
5693         int ret;
5694
5695         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5696             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5697                 return -EIO;
5698
5699         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5700         if (ret)
5701                 return -EIO;
5702
5703         btrfs_init_path(&path);
5704         ULIST_ITER_INIT(&iter);
5705         while ((node = ulist_next(roots, &iter))) {
5706                 root_key.objectid = node->val;
5707                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5708                 root_key.offset = (u64)-1;
5709
5710                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5711                 if (IS_ERR(root)) {
5712                         ret = -EIO;
5713                         break;
5714                 }
5715
5716
5717                 trans = btrfs_start_transaction(search_root, 0);
5718                 if (IS_ERR(trans)) {
5719                         ret = PTR_ERR(trans);
5720                         break;
5721                 }
5722
5723                 path.lowest_level = btrfs_header_level(buf);
5724                 path.skip_check_block = 1;
5725                 if (path.lowest_level)
5726                         btrfs_node_key_to_cpu(buf, &key, 0);
5727                 else
5728                         btrfs_item_key_to_cpu(buf, &key, 0);
5729                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5730                 if (ret) {
5731                         ret = -EIO;
5732                         btrfs_commit_transaction(trans, search_root);
5733                         break;
5734                 }
5735                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5736                         ret = fix_key_order(trans, search_root, &path);
5737                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5738                         ret = fix_item_offset(trans, search_root, &path);
5739                 if (ret) {
5740                         btrfs_commit_transaction(trans, search_root);
5741                         break;
5742                 }
5743                 btrfs_release_path(&path);
5744                 btrfs_commit_transaction(trans, search_root);
5745         }
5746         ulist_free(roots);
5747         btrfs_release_path(&path);
5748         return ret;
5749 }
5750
5751 static int check_block(struct btrfs_root *root,
5752                        struct cache_tree *extent_cache,
5753                        struct extent_buffer *buf, u64 flags)
5754 {
5755         struct extent_record *rec;
5756         struct cache_extent *cache;
5757         struct btrfs_key key;
5758         enum btrfs_tree_block_status status;
5759         int ret = 0;
5760         int level;
5761
5762         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5763         if (!cache)
5764                 return 1;
5765         rec = container_of(cache, struct extent_record, cache);
5766         rec->generation = btrfs_header_generation(buf);
5767
5768         level = btrfs_header_level(buf);
5769         if (btrfs_header_nritems(buf) > 0) {
5770
5771                 if (level == 0)
5772                         btrfs_item_key_to_cpu(buf, &key, 0);
5773                 else
5774                         btrfs_node_key_to_cpu(buf, &key, 0);
5775
5776                 rec->info_objectid = key.objectid;
5777         }
5778         rec->info_level = level;
5779
5780         if (btrfs_is_leaf(buf))
5781                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5782         else
5783                 status = btrfs_check_node(root, &rec->parent_key, buf);
5784
5785         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5786                 if (repair)
5787                         status = try_to_fix_bad_block(root, buf, status);
5788                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5789                         ret = -EIO;
5790                         fprintf(stderr, "bad block %llu\n",
5791                                 (unsigned long long)buf->start);
5792                 } else {
5793                         /*
5794                          * Signal to callers we need to start the scan over
5795                          * again since we'll have cowed blocks.
5796                          */
5797                         ret = -EAGAIN;
5798                 }
5799         } else {
5800                 rec->content_checked = 1;
5801                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5802                         rec->owner_ref_checked = 1;
5803                 else {
5804                         ret = check_owner_ref(root, rec, buf);
5805                         if (!ret)
5806                                 rec->owner_ref_checked = 1;
5807                 }
5808         }
5809         if (!ret)
5810                 maybe_free_extent_rec(extent_cache, rec);
5811         return ret;
5812 }
5813
5814 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5815                                                 u64 parent, u64 root)
5816 {
5817         struct list_head *cur = rec->backrefs.next;
5818         struct extent_backref *node;
5819         struct tree_backref *back;
5820
5821         while(cur != &rec->backrefs) {
5822                 node = to_extent_backref(cur);
5823                 cur = cur->next;
5824                 if (node->is_data)
5825                         continue;
5826                 back = to_tree_backref(node);
5827                 if (parent > 0) {
5828                         if (!node->full_backref)
5829                                 continue;
5830                         if (parent == back->parent)
5831                                 return back;
5832                 } else {
5833                         if (node->full_backref)
5834                                 continue;
5835                         if (back->root == root)
5836                                 return back;
5837                 }
5838         }
5839         return NULL;
5840 }
5841
5842 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5843                                                 u64 parent, u64 root)
5844 {
5845         struct tree_backref *ref = malloc(sizeof(*ref));
5846
5847         if (!ref)
5848                 return NULL;
5849         memset(&ref->node, 0, sizeof(ref->node));
5850         if (parent > 0) {
5851                 ref->parent = parent;
5852                 ref->node.full_backref = 1;
5853         } else {
5854                 ref->root = root;
5855                 ref->node.full_backref = 0;
5856         }
5857         list_add_tail(&ref->node.list, &rec->backrefs);
5858
5859         return ref;
5860 }
5861
5862 static struct data_backref *find_data_backref(struct extent_record *rec,
5863                                                 u64 parent, u64 root,
5864                                                 u64 owner, u64 offset,
5865                                                 int found_ref,
5866                                                 u64 disk_bytenr, u64 bytes)
5867 {
5868         struct list_head *cur = rec->backrefs.next;
5869         struct extent_backref *node;
5870         struct data_backref *back;
5871
5872         while(cur != &rec->backrefs) {
5873                 node = to_extent_backref(cur);
5874                 cur = cur->next;
5875                 if (!node->is_data)
5876                         continue;
5877                 back = to_data_backref(node);
5878                 if (parent > 0) {
5879                         if (!node->full_backref)
5880                                 continue;
5881                         if (parent == back->parent)
5882                                 return back;
5883                 } else {
5884                         if (node->full_backref)
5885                                 continue;
5886                         if (back->root == root && back->owner == owner &&
5887                             back->offset == offset) {
5888                                 if (found_ref && node->found_ref &&
5889                                     (back->bytes != bytes ||
5890                                     back->disk_bytenr != disk_bytenr))
5891                                         continue;
5892                                 return back;
5893                         }
5894                 }
5895         }
5896         return NULL;
5897 }
5898
5899 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5900                                                 u64 parent, u64 root,
5901                                                 u64 owner, u64 offset,
5902                                                 u64 max_size)
5903 {
5904         struct data_backref *ref = malloc(sizeof(*ref));
5905
5906         if (!ref)
5907                 return NULL;
5908         memset(&ref->node, 0, sizeof(ref->node));
5909         ref->node.is_data = 1;
5910
5911         if (parent > 0) {
5912                 ref->parent = parent;
5913                 ref->owner = 0;
5914                 ref->offset = 0;
5915                 ref->node.full_backref = 1;
5916         } else {
5917                 ref->root = root;
5918                 ref->owner = owner;
5919                 ref->offset = offset;
5920                 ref->node.full_backref = 0;
5921         }
5922         ref->bytes = max_size;
5923         ref->found_ref = 0;
5924         ref->num_refs = 0;
5925         list_add_tail(&ref->node.list, &rec->backrefs);
5926         if (max_size > rec->max_size)
5927                 rec->max_size = max_size;
5928         return ref;
5929 }
5930
5931 /* Check if the type of extent matches with its chunk */
5932 static void check_extent_type(struct extent_record *rec)
5933 {
5934         struct btrfs_block_group_cache *bg_cache;
5935
5936         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5937         if (!bg_cache)
5938                 return;
5939
5940         /* data extent, check chunk directly*/
5941         if (!rec->metadata) {
5942                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5943                         rec->wrong_chunk_type = 1;
5944                 return;
5945         }
5946
5947         /* metadata extent, check the obvious case first */
5948         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5949                                  BTRFS_BLOCK_GROUP_METADATA))) {
5950                 rec->wrong_chunk_type = 1;
5951                 return;
5952         }
5953
5954         /*
5955          * Check SYSTEM extent, as it's also marked as metadata, we can only
5956          * make sure it's a SYSTEM extent by its backref
5957          */
5958         if (!list_empty(&rec->backrefs)) {
5959                 struct extent_backref *node;
5960                 struct tree_backref *tback;
5961                 u64 bg_type;
5962
5963                 node = to_extent_backref(rec->backrefs.next);
5964                 if (node->is_data) {
5965                         /* tree block shouldn't have data backref */
5966                         rec->wrong_chunk_type = 1;
5967                         return;
5968                 }
5969                 tback = container_of(node, struct tree_backref, node);
5970
5971                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5972                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5973                 else
5974                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5975                 if (!(bg_cache->flags & bg_type))
5976                         rec->wrong_chunk_type = 1;
5977         }
5978 }
5979
5980 /*
5981  * Allocate a new extent record, fill default values from @tmpl and insert int
5982  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5983  * the cache, otherwise it fails.
5984  */
5985 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5986                 struct extent_record *tmpl)
5987 {
5988         struct extent_record *rec;
5989         int ret = 0;
5990
5991         rec = malloc(sizeof(*rec));
5992         if (!rec)
5993                 return -ENOMEM;
5994         rec->start = tmpl->start;
5995         rec->max_size = tmpl->max_size;
5996         rec->nr = max(tmpl->nr, tmpl->max_size);
5997         rec->found_rec = tmpl->found_rec;
5998         rec->content_checked = tmpl->content_checked;
5999         rec->owner_ref_checked = tmpl->owner_ref_checked;
6000         rec->num_duplicates = 0;
6001         rec->metadata = tmpl->metadata;
6002         rec->flag_block_full_backref = FLAG_UNSET;
6003         rec->bad_full_backref = 0;
6004         rec->crossing_stripes = 0;
6005         rec->wrong_chunk_type = 0;
6006         rec->is_root = tmpl->is_root;
6007         rec->refs = tmpl->refs;
6008         rec->extent_item_refs = tmpl->extent_item_refs;
6009         rec->parent_generation = tmpl->parent_generation;
6010         INIT_LIST_HEAD(&rec->backrefs);
6011         INIT_LIST_HEAD(&rec->dups);
6012         INIT_LIST_HEAD(&rec->list);
6013         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6014         rec->cache.start = tmpl->start;
6015         rec->cache.size = tmpl->nr;
6016         ret = insert_cache_extent(extent_cache, &rec->cache);
6017         if (ret) {
6018                 free(rec);
6019                 return ret;
6020         }
6021         bytes_used += rec->nr;
6022
6023         if (tmpl->metadata)
6024                 rec->crossing_stripes = check_crossing_stripes(global_info,
6025                                 rec->start, global_info->tree_root->nodesize);
6026         check_extent_type(rec);
6027         return ret;
6028 }
6029
6030 /*
6031  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6032  * some are hints:
6033  * - refs              - if found, increase refs
6034  * - is_root           - if found, set
6035  * - content_checked   - if found, set
6036  * - owner_ref_checked - if found, set
6037  *
6038  * If not found, create a new one, initialize and insert.
6039  */
6040 static int add_extent_rec(struct cache_tree *extent_cache,
6041                 struct extent_record *tmpl)
6042 {
6043         struct extent_record *rec;
6044         struct cache_extent *cache;
6045         int ret = 0;
6046         int dup = 0;
6047
6048         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6049         if (cache) {
6050                 rec = container_of(cache, struct extent_record, cache);
6051                 if (tmpl->refs)
6052                         rec->refs++;
6053                 if (rec->nr == 1)
6054                         rec->nr = max(tmpl->nr, tmpl->max_size);
6055
6056                 /*
6057                  * We need to make sure to reset nr to whatever the extent
6058                  * record says was the real size, this way we can compare it to
6059                  * the backrefs.
6060                  */
6061                 if (tmpl->found_rec) {
6062                         if (tmpl->start != rec->start || rec->found_rec) {
6063                                 struct extent_record *tmp;
6064
6065                                 dup = 1;
6066                                 if (list_empty(&rec->list))
6067                                         list_add_tail(&rec->list,
6068                                                       &duplicate_extents);
6069
6070                                 /*
6071                                  * We have to do this song and dance in case we
6072                                  * find an extent record that falls inside of
6073                                  * our current extent record but does not have
6074                                  * the same objectid.
6075                                  */
6076                                 tmp = malloc(sizeof(*tmp));
6077                                 if (!tmp)
6078                                         return -ENOMEM;
6079                                 tmp->start = tmpl->start;
6080                                 tmp->max_size = tmpl->max_size;
6081                                 tmp->nr = tmpl->nr;
6082                                 tmp->found_rec = 1;
6083                                 tmp->metadata = tmpl->metadata;
6084                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6085                                 INIT_LIST_HEAD(&tmp->list);
6086                                 list_add_tail(&tmp->list, &rec->dups);
6087                                 rec->num_duplicates++;
6088                         } else {
6089                                 rec->nr = tmpl->nr;
6090                                 rec->found_rec = 1;
6091                         }
6092                 }
6093
6094                 if (tmpl->extent_item_refs && !dup) {
6095                         if (rec->extent_item_refs) {
6096                                 fprintf(stderr, "block %llu rec "
6097                                         "extent_item_refs %llu, passed %llu\n",
6098                                         (unsigned long long)tmpl->start,
6099                                         (unsigned long long)
6100                                                         rec->extent_item_refs,
6101                                         (unsigned long long)tmpl->extent_item_refs);
6102                         }
6103                         rec->extent_item_refs = tmpl->extent_item_refs;
6104                 }
6105                 if (tmpl->is_root)
6106                         rec->is_root = 1;
6107                 if (tmpl->content_checked)
6108                         rec->content_checked = 1;
6109                 if (tmpl->owner_ref_checked)
6110                         rec->owner_ref_checked = 1;
6111                 memcpy(&rec->parent_key, &tmpl->parent_key,
6112                                 sizeof(tmpl->parent_key));
6113                 if (tmpl->parent_generation)
6114                         rec->parent_generation = tmpl->parent_generation;
6115                 if (rec->max_size < tmpl->max_size)
6116                         rec->max_size = tmpl->max_size;
6117
6118                 /*
6119                  * A metadata extent can't cross stripe_len boundary, otherwise
6120                  * kernel scrub won't be able to handle it.
6121                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6122                  * it.
6123                  */
6124                 if (tmpl->metadata)
6125                         rec->crossing_stripes = check_crossing_stripes(
6126                                         global_info, rec->start,
6127                                         global_info->tree_root->nodesize);
6128                 check_extent_type(rec);
6129                 maybe_free_extent_rec(extent_cache, rec);
6130                 return ret;
6131         }
6132
6133         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6134
6135         return ret;
6136 }
6137
6138 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6139                             u64 parent, u64 root, int found_ref)
6140 {
6141         struct extent_record *rec;
6142         struct tree_backref *back;
6143         struct cache_extent *cache;
6144         int ret;
6145
6146         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6147         if (!cache) {
6148                 struct extent_record tmpl;
6149
6150                 memset(&tmpl, 0, sizeof(tmpl));
6151                 tmpl.start = bytenr;
6152                 tmpl.nr = 1;
6153                 tmpl.metadata = 1;
6154
6155                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6156                 if (ret)
6157                         return ret;
6158
6159                 /* really a bug in cache_extent implement now */
6160                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6161                 if (!cache)
6162                         return -ENOENT;
6163         }
6164
6165         rec = container_of(cache, struct extent_record, cache);
6166         if (rec->start != bytenr) {
6167                 /*
6168                  * Several cause, from unaligned bytenr to over lapping extents
6169                  */
6170                 return -EEXIST;
6171         }
6172
6173         back = find_tree_backref(rec, parent, root);
6174         if (!back) {
6175                 back = alloc_tree_backref(rec, parent, root);
6176                 if (!back)
6177                         return -ENOMEM;
6178         }
6179
6180         if (found_ref) {
6181                 if (back->node.found_ref) {
6182                         fprintf(stderr, "Extent back ref already exists "
6183                                 "for %llu parent %llu root %llu \n",
6184                                 (unsigned long long)bytenr,
6185                                 (unsigned long long)parent,
6186                                 (unsigned long long)root);
6187                 }
6188                 back->node.found_ref = 1;
6189         } else {
6190                 if (back->node.found_extent_tree) {
6191                         fprintf(stderr, "Extent back ref already exists "
6192                                 "for %llu parent %llu root %llu \n",
6193                                 (unsigned long long)bytenr,
6194                                 (unsigned long long)parent,
6195                                 (unsigned long long)root);
6196                 }
6197                 back->node.found_extent_tree = 1;
6198         }
6199         check_extent_type(rec);
6200         maybe_free_extent_rec(extent_cache, rec);
6201         return 0;
6202 }
6203
6204 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6205                             u64 parent, u64 root, u64 owner, u64 offset,
6206                             u32 num_refs, int found_ref, u64 max_size)
6207 {
6208         struct extent_record *rec;
6209         struct data_backref *back;
6210         struct cache_extent *cache;
6211         int ret;
6212
6213         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6214         if (!cache) {
6215                 struct extent_record tmpl;
6216
6217                 memset(&tmpl, 0, sizeof(tmpl));
6218                 tmpl.start = bytenr;
6219                 tmpl.nr = 1;
6220                 tmpl.max_size = max_size;
6221
6222                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6223                 if (ret)
6224                         return ret;
6225
6226                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6227                 if (!cache)
6228                         abort();
6229         }
6230
6231         rec = container_of(cache, struct extent_record, cache);
6232         if (rec->max_size < max_size)
6233                 rec->max_size = max_size;
6234
6235         /*
6236          * If found_ref is set then max_size is the real size and must match the
6237          * existing refs.  So if we have already found a ref then we need to
6238          * make sure that this ref matches the existing one, otherwise we need
6239          * to add a new backref so we can notice that the backrefs don't match
6240          * and we need to figure out who is telling the truth.  This is to
6241          * account for that awful fsync bug I introduced where we'd end up with
6242          * a btrfs_file_extent_item that would have its length include multiple
6243          * prealloc extents or point inside of a prealloc extent.
6244          */
6245         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6246                                  bytenr, max_size);
6247         if (!back) {
6248                 back = alloc_data_backref(rec, parent, root, owner, offset,
6249                                           max_size);
6250                 BUG_ON(!back);
6251         }
6252
6253         if (found_ref) {
6254                 BUG_ON(num_refs != 1);
6255                 if (back->node.found_ref)
6256                         BUG_ON(back->bytes != max_size);
6257                 back->node.found_ref = 1;
6258                 back->found_ref += 1;
6259                 back->bytes = max_size;
6260                 back->disk_bytenr = bytenr;
6261                 rec->refs += 1;
6262                 rec->content_checked = 1;
6263                 rec->owner_ref_checked = 1;
6264         } else {
6265                 if (back->node.found_extent_tree) {
6266                         fprintf(stderr, "Extent back ref already exists "
6267                                 "for %llu parent %llu root %llu "
6268                                 "owner %llu offset %llu num_refs %lu\n",
6269                                 (unsigned long long)bytenr,
6270                                 (unsigned long long)parent,
6271                                 (unsigned long long)root,
6272                                 (unsigned long long)owner,
6273                                 (unsigned long long)offset,
6274                                 (unsigned long)num_refs);
6275                 }
6276                 back->num_refs = num_refs;
6277                 back->node.found_extent_tree = 1;
6278         }
6279         maybe_free_extent_rec(extent_cache, rec);
6280         return 0;
6281 }
6282
6283 static int add_pending(struct cache_tree *pending,
6284                        struct cache_tree *seen, u64 bytenr, u32 size)
6285 {
6286         int ret;
6287         ret = add_cache_extent(seen, bytenr, size);
6288         if (ret)
6289                 return ret;
6290         add_cache_extent(pending, bytenr, size);
6291         return 0;
6292 }
6293
6294 static int pick_next_pending(struct cache_tree *pending,
6295                         struct cache_tree *reada,
6296                         struct cache_tree *nodes,
6297                         u64 last, struct block_info *bits, int bits_nr,
6298                         int *reada_bits)
6299 {
6300         unsigned long node_start = last;
6301         struct cache_extent *cache;
6302         int ret;
6303
6304         cache = search_cache_extent(reada, 0);
6305         if (cache) {
6306                 bits[0].start = cache->start;
6307                 bits[0].size = cache->size;
6308                 *reada_bits = 1;
6309                 return 1;
6310         }
6311         *reada_bits = 0;
6312         if (node_start > 32768)
6313                 node_start -= 32768;
6314
6315         cache = search_cache_extent(nodes, node_start);
6316         if (!cache)
6317                 cache = search_cache_extent(nodes, 0);
6318
6319         if (!cache) {
6320                  cache = search_cache_extent(pending, 0);
6321                  if (!cache)
6322                          return 0;
6323                  ret = 0;
6324                  do {
6325                          bits[ret].start = cache->start;
6326                          bits[ret].size = cache->size;
6327                          cache = next_cache_extent(cache);
6328                          ret++;
6329                  } while (cache && ret < bits_nr);
6330                  return ret;
6331         }
6332
6333         ret = 0;
6334         do {
6335                 bits[ret].start = cache->start;
6336                 bits[ret].size = cache->size;
6337                 cache = next_cache_extent(cache);
6338                 ret++;
6339         } while (cache && ret < bits_nr);
6340
6341         if (bits_nr - ret > 8) {
6342                 u64 lookup = bits[0].start + bits[0].size;
6343                 struct cache_extent *next;
6344                 next = search_cache_extent(pending, lookup);
6345                 while(next) {
6346                         if (next->start - lookup > 32768)
6347                                 break;
6348                         bits[ret].start = next->start;
6349                         bits[ret].size = next->size;
6350                         lookup = next->start + next->size;
6351                         ret++;
6352                         if (ret == bits_nr)
6353                                 break;
6354                         next = next_cache_extent(next);
6355                         if (!next)
6356                                 break;
6357                 }
6358         }
6359         return ret;
6360 }
6361
6362 static void free_chunk_record(struct cache_extent *cache)
6363 {
6364         struct chunk_record *rec;
6365
6366         rec = container_of(cache, struct chunk_record, cache);
6367         list_del_init(&rec->list);
6368         list_del_init(&rec->dextents);
6369         free(rec);
6370 }
6371
6372 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6373 {
6374         cache_tree_free_extents(chunk_cache, free_chunk_record);
6375 }
6376
6377 static void free_device_record(struct rb_node *node)
6378 {
6379         struct device_record *rec;
6380
6381         rec = container_of(node, struct device_record, node);
6382         free(rec);
6383 }
6384
6385 FREE_RB_BASED_TREE(device_cache, free_device_record);
6386
6387 int insert_block_group_record(struct block_group_tree *tree,
6388                               struct block_group_record *bg_rec)
6389 {
6390         int ret;
6391
6392         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6393         if (ret)
6394                 return ret;
6395
6396         list_add_tail(&bg_rec->list, &tree->block_groups);
6397         return 0;
6398 }
6399
6400 static void free_block_group_record(struct cache_extent *cache)
6401 {
6402         struct block_group_record *rec;
6403
6404         rec = container_of(cache, struct block_group_record, cache);
6405         list_del_init(&rec->list);
6406         free(rec);
6407 }
6408
6409 void free_block_group_tree(struct block_group_tree *tree)
6410 {
6411         cache_tree_free_extents(&tree->tree, free_block_group_record);
6412 }
6413
6414 int insert_device_extent_record(struct device_extent_tree *tree,
6415                                 struct device_extent_record *de_rec)
6416 {
6417         int ret;
6418
6419         /*
6420          * Device extent is a bit different from the other extents, because
6421          * the extents which belong to the different devices may have the
6422          * same start and size, so we need use the special extent cache
6423          * search/insert functions.
6424          */
6425         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6426         if (ret)
6427                 return ret;
6428
6429         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6430         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6431         return 0;
6432 }
6433
6434 static void free_device_extent_record(struct cache_extent *cache)
6435 {
6436         struct device_extent_record *rec;
6437
6438         rec = container_of(cache, struct device_extent_record, cache);
6439         if (!list_empty(&rec->chunk_list))
6440                 list_del_init(&rec->chunk_list);
6441         if (!list_empty(&rec->device_list))
6442                 list_del_init(&rec->device_list);
6443         free(rec);
6444 }
6445
6446 void free_device_extent_tree(struct device_extent_tree *tree)
6447 {
6448         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6449 }
6450
6451 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6452 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6453                                  struct extent_buffer *leaf, int slot)
6454 {
6455         struct btrfs_extent_ref_v0 *ref0;
6456         struct btrfs_key key;
6457         int ret;
6458
6459         btrfs_item_key_to_cpu(leaf, &key, slot);
6460         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6461         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6462                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6463                                 0, 0);
6464         } else {
6465                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6466                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6467         }
6468         return ret;
6469 }
6470 #endif
6471
6472 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6473                                             struct btrfs_key *key,
6474                                             int slot)
6475 {
6476         struct btrfs_chunk *ptr;
6477         struct chunk_record *rec;
6478         int num_stripes, i;
6479
6480         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6481         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6482
6483         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6484         if (!rec) {
6485                 fprintf(stderr, "memory allocation failed\n");
6486                 exit(-1);
6487         }
6488
6489         INIT_LIST_HEAD(&rec->list);
6490         INIT_LIST_HEAD(&rec->dextents);
6491         rec->bg_rec = NULL;
6492
6493         rec->cache.start = key->offset;
6494         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6495
6496         rec->generation = btrfs_header_generation(leaf);
6497
6498         rec->objectid = key->objectid;
6499         rec->type = key->type;
6500         rec->offset = key->offset;
6501
6502         rec->length = rec->cache.size;
6503         rec->owner = btrfs_chunk_owner(leaf, ptr);
6504         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6505         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6506         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6507         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6508         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6509         rec->num_stripes = num_stripes;
6510         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6511
6512         for (i = 0; i < rec->num_stripes; ++i) {
6513                 rec->stripes[i].devid =
6514                         btrfs_stripe_devid_nr(leaf, ptr, i);
6515                 rec->stripes[i].offset =
6516                         btrfs_stripe_offset_nr(leaf, ptr, i);
6517                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6518                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6519                                 BTRFS_UUID_SIZE);
6520         }
6521
6522         return rec;
6523 }
6524
6525 static int process_chunk_item(struct cache_tree *chunk_cache,
6526                               struct btrfs_key *key, struct extent_buffer *eb,
6527                               int slot)
6528 {
6529         struct chunk_record *rec;
6530         struct btrfs_chunk *chunk;
6531         int ret = 0;
6532
6533         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6534         /*
6535          * Do extra check for this chunk item,
6536          *
6537          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6538          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6539          * and owner<->key_type check.
6540          */
6541         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6542                                       key->offset);
6543         if (ret < 0) {
6544                 error("chunk(%llu, %llu) is not valid, ignore it",
6545                       key->offset, btrfs_chunk_length(eb, chunk));
6546                 return 0;
6547         }
6548         rec = btrfs_new_chunk_record(eb, key, slot);
6549         ret = insert_cache_extent(chunk_cache, &rec->cache);
6550         if (ret) {
6551                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6552                         rec->offset, rec->length);
6553                 free(rec);
6554         }
6555
6556         return ret;
6557 }
6558
6559 static int process_device_item(struct rb_root *dev_cache,
6560                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6561 {
6562         struct btrfs_dev_item *ptr;
6563         struct device_record *rec;
6564         int ret = 0;
6565
6566         ptr = btrfs_item_ptr(eb,
6567                 slot, struct btrfs_dev_item);
6568
6569         rec = malloc(sizeof(*rec));
6570         if (!rec) {
6571                 fprintf(stderr, "memory allocation failed\n");
6572                 return -ENOMEM;
6573         }
6574
6575         rec->devid = key->offset;
6576         rec->generation = btrfs_header_generation(eb);
6577
6578         rec->objectid = key->objectid;
6579         rec->type = key->type;
6580         rec->offset = key->offset;
6581
6582         rec->devid = btrfs_device_id(eb, ptr);
6583         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6584         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6585
6586         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6587         if (ret) {
6588                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6589                 free(rec);
6590         }
6591
6592         return ret;
6593 }
6594
6595 struct block_group_record *
6596 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6597                              int slot)
6598 {
6599         struct btrfs_block_group_item *ptr;
6600         struct block_group_record *rec;
6601
6602         rec = calloc(1, sizeof(*rec));
6603         if (!rec) {
6604                 fprintf(stderr, "memory allocation failed\n");
6605                 exit(-1);
6606         }
6607
6608         rec->cache.start = key->objectid;
6609         rec->cache.size = key->offset;
6610
6611         rec->generation = btrfs_header_generation(leaf);
6612
6613         rec->objectid = key->objectid;
6614         rec->type = key->type;
6615         rec->offset = key->offset;
6616
6617         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6618         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6619
6620         INIT_LIST_HEAD(&rec->list);
6621
6622         return rec;
6623 }
6624
6625 static int process_block_group_item(struct block_group_tree *block_group_cache,
6626                                     struct btrfs_key *key,
6627                                     struct extent_buffer *eb, int slot)
6628 {
6629         struct block_group_record *rec;
6630         int ret = 0;
6631
6632         rec = btrfs_new_block_group_record(eb, key, slot);
6633         ret = insert_block_group_record(block_group_cache, rec);
6634         if (ret) {
6635                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6636                         rec->objectid, rec->offset);
6637                 free(rec);
6638         }
6639
6640         return ret;
6641 }
6642
6643 struct device_extent_record *
6644 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6645                                struct btrfs_key *key, int slot)
6646 {
6647         struct device_extent_record *rec;
6648         struct btrfs_dev_extent *ptr;
6649
6650         rec = calloc(1, sizeof(*rec));
6651         if (!rec) {
6652                 fprintf(stderr, "memory allocation failed\n");
6653                 exit(-1);
6654         }
6655
6656         rec->cache.objectid = key->objectid;
6657         rec->cache.start = key->offset;
6658
6659         rec->generation = btrfs_header_generation(leaf);
6660
6661         rec->objectid = key->objectid;
6662         rec->type = key->type;
6663         rec->offset = key->offset;
6664
6665         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6666         rec->chunk_objecteid =
6667                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6668         rec->chunk_offset =
6669                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6670         rec->length = btrfs_dev_extent_length(leaf, ptr);
6671         rec->cache.size = rec->length;
6672
6673         INIT_LIST_HEAD(&rec->chunk_list);
6674         INIT_LIST_HEAD(&rec->device_list);
6675
6676         return rec;
6677 }
6678
6679 static int
6680 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6681                            struct btrfs_key *key, struct extent_buffer *eb,
6682                            int slot)
6683 {
6684         struct device_extent_record *rec;
6685         int ret;
6686
6687         rec = btrfs_new_device_extent_record(eb, key, slot);
6688         ret = insert_device_extent_record(dev_extent_cache, rec);
6689         if (ret) {
6690                 fprintf(stderr,
6691                         "Device extent[%llu, %llu, %llu] existed.\n",
6692                         rec->objectid, rec->offset, rec->length);
6693                 free(rec);
6694         }
6695
6696         return ret;
6697 }
6698
6699 static int process_extent_item(struct btrfs_root *root,
6700                                struct cache_tree *extent_cache,
6701                                struct extent_buffer *eb, int slot)
6702 {
6703         struct btrfs_extent_item *ei;
6704         struct btrfs_extent_inline_ref *iref;
6705         struct btrfs_extent_data_ref *dref;
6706         struct btrfs_shared_data_ref *sref;
6707         struct btrfs_key key;
6708         struct extent_record tmpl;
6709         unsigned long end;
6710         unsigned long ptr;
6711         int ret;
6712         int type;
6713         u32 item_size = btrfs_item_size_nr(eb, slot);
6714         u64 refs = 0;
6715         u64 offset;
6716         u64 num_bytes;
6717         int metadata = 0;
6718
6719         btrfs_item_key_to_cpu(eb, &key, slot);
6720
6721         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6722                 metadata = 1;
6723                 num_bytes = root->nodesize;
6724         } else {
6725                 num_bytes = key.offset;
6726         }
6727
6728         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6729                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6730                       key.objectid, root->sectorsize);
6731                 return -EIO;
6732         }
6733         if (item_size < sizeof(*ei)) {
6734 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6735                 struct btrfs_extent_item_v0 *ei0;
6736                 BUG_ON(item_size != sizeof(*ei0));
6737                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6738                 refs = btrfs_extent_refs_v0(eb, ei0);
6739 #else
6740                 BUG();
6741 #endif
6742                 memset(&tmpl, 0, sizeof(tmpl));
6743                 tmpl.start = key.objectid;
6744                 tmpl.nr = num_bytes;
6745                 tmpl.extent_item_refs = refs;
6746                 tmpl.metadata = metadata;
6747                 tmpl.found_rec = 1;
6748                 tmpl.max_size = num_bytes;
6749
6750                 return add_extent_rec(extent_cache, &tmpl);
6751         }
6752
6753         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6754         refs = btrfs_extent_refs(eb, ei);
6755         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6756                 metadata = 1;
6757         else
6758                 metadata = 0;
6759         if (metadata && num_bytes != root->nodesize) {
6760                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6761                       num_bytes, root->nodesize);
6762                 return -EIO;
6763         }
6764         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6765                 error("ignore invalid data extent, length %llu is not aligned to %u",
6766                       num_bytes, root->sectorsize);
6767                 return -EIO;
6768         }
6769
6770         memset(&tmpl, 0, sizeof(tmpl));
6771         tmpl.start = key.objectid;
6772         tmpl.nr = num_bytes;
6773         tmpl.extent_item_refs = refs;
6774         tmpl.metadata = metadata;
6775         tmpl.found_rec = 1;
6776         tmpl.max_size = num_bytes;
6777         add_extent_rec(extent_cache, &tmpl);
6778
6779         ptr = (unsigned long)(ei + 1);
6780         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6781             key.type == BTRFS_EXTENT_ITEM_KEY)
6782                 ptr += sizeof(struct btrfs_tree_block_info);
6783
6784         end = (unsigned long)ei + item_size;
6785         while (ptr < end) {
6786                 iref = (struct btrfs_extent_inline_ref *)ptr;
6787                 type = btrfs_extent_inline_ref_type(eb, iref);
6788                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6789                 switch (type) {
6790                 case BTRFS_TREE_BLOCK_REF_KEY:
6791                         ret = add_tree_backref(extent_cache, key.objectid,
6792                                         0, offset, 0);
6793                         if (ret < 0)
6794                                 error("add_tree_backref failed: %s",
6795                                       strerror(-ret));
6796                         break;
6797                 case BTRFS_SHARED_BLOCK_REF_KEY:
6798                         ret = add_tree_backref(extent_cache, key.objectid,
6799                                         offset, 0, 0);
6800                         if (ret < 0)
6801                                 error("add_tree_backref failed: %s",
6802                                       strerror(-ret));
6803                         break;
6804                 case BTRFS_EXTENT_DATA_REF_KEY:
6805                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6806                         add_data_backref(extent_cache, key.objectid, 0,
6807                                         btrfs_extent_data_ref_root(eb, dref),
6808                                         btrfs_extent_data_ref_objectid(eb,
6809                                                                        dref),
6810                                         btrfs_extent_data_ref_offset(eb, dref),
6811                                         btrfs_extent_data_ref_count(eb, dref),
6812                                         0, num_bytes);
6813                         break;
6814                 case BTRFS_SHARED_DATA_REF_KEY:
6815                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6816                         add_data_backref(extent_cache, key.objectid, offset,
6817                                         0, 0, 0,
6818                                         btrfs_shared_data_ref_count(eb, sref),
6819                                         0, num_bytes);
6820                         break;
6821                 default:
6822                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6823                                 key.objectid, key.type, num_bytes);
6824                         goto out;
6825                 }
6826                 ptr += btrfs_extent_inline_ref_size(type);
6827         }
6828         WARN_ON(ptr > end);
6829 out:
6830         return 0;
6831 }
6832
6833 static int check_cache_range(struct btrfs_root *root,
6834                              struct btrfs_block_group_cache *cache,
6835                              u64 offset, u64 bytes)
6836 {
6837         struct btrfs_free_space *entry;
6838         u64 *logical;
6839         u64 bytenr;
6840         int stripe_len;
6841         int i, nr, ret;
6842
6843         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6844                 bytenr = btrfs_sb_offset(i);
6845                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6846                                        cache->key.objectid, bytenr, 0,
6847                                        &logical, &nr, &stripe_len);
6848                 if (ret)
6849                         return ret;
6850
6851                 while (nr--) {
6852                         if (logical[nr] + stripe_len <= offset)
6853                                 continue;
6854                         if (offset + bytes <= logical[nr])
6855                                 continue;
6856                         if (logical[nr] == offset) {
6857                                 if (stripe_len >= bytes) {
6858                                         free(logical);
6859                                         return 0;
6860                                 }
6861                                 bytes -= stripe_len;
6862                                 offset += stripe_len;
6863                         } else if (logical[nr] < offset) {
6864                                 if (logical[nr] + stripe_len >=
6865                                     offset + bytes) {
6866                                         free(logical);
6867                                         return 0;
6868                                 }
6869                                 bytes = (offset + bytes) -
6870                                         (logical[nr] + stripe_len);
6871                                 offset = logical[nr] + stripe_len;
6872                         } else {
6873                                 /*
6874                                  * Could be tricky, the super may land in the
6875                                  * middle of the area we're checking.  First
6876                                  * check the easiest case, it's at the end.
6877                                  */
6878                                 if (logical[nr] + stripe_len >=
6879                                     bytes + offset) {
6880                                         bytes = logical[nr] - offset;
6881                                         continue;
6882                                 }
6883
6884                                 /* Check the left side */
6885                                 ret = check_cache_range(root, cache,
6886                                                         offset,
6887                                                         logical[nr] - offset);
6888                                 if (ret) {
6889                                         free(logical);
6890                                         return ret;
6891                                 }
6892
6893                                 /* Now we continue with the right side */
6894                                 bytes = (offset + bytes) -
6895                                         (logical[nr] + stripe_len);
6896                                 offset = logical[nr] + stripe_len;
6897                         }
6898                 }
6899
6900                 free(logical);
6901         }
6902
6903         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6904         if (!entry) {
6905                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6906                         offset, offset+bytes);
6907                 return -EINVAL;
6908         }
6909
6910         if (entry->offset != offset) {
6911                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6912                         entry->offset);
6913                 return -EINVAL;
6914         }
6915
6916         if (entry->bytes != bytes) {
6917                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6918                         bytes, entry->bytes, offset);
6919                 return -EINVAL;
6920         }
6921
6922         unlink_free_space(cache->free_space_ctl, entry);
6923         free(entry);
6924         return 0;
6925 }
6926
6927 static int verify_space_cache(struct btrfs_root *root,
6928                               struct btrfs_block_group_cache *cache)
6929 {
6930         struct btrfs_path path;
6931         struct extent_buffer *leaf;
6932         struct btrfs_key key;
6933         u64 last;
6934         int ret = 0;
6935
6936         root = root->fs_info->extent_root;
6937
6938         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6939
6940         btrfs_init_path(&path);
6941         key.objectid = last;
6942         key.offset = 0;
6943         key.type = BTRFS_EXTENT_ITEM_KEY;
6944         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6945         if (ret < 0)
6946                 goto out;
6947         ret = 0;
6948         while (1) {
6949                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6950                         ret = btrfs_next_leaf(root, &path);
6951                         if (ret < 0)
6952                                 goto out;
6953                         if (ret > 0) {
6954                                 ret = 0;
6955                                 break;
6956                         }
6957                 }
6958                 leaf = path.nodes[0];
6959                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6960                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6961                         break;
6962                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6963                     key.type != BTRFS_METADATA_ITEM_KEY) {
6964                         path.slots[0]++;
6965                         continue;
6966                 }
6967
6968                 if (last == key.objectid) {
6969                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6970                                 last = key.objectid + key.offset;
6971                         else
6972                                 last = key.objectid + root->nodesize;
6973                         path.slots[0]++;
6974                         continue;
6975                 }
6976
6977                 ret = check_cache_range(root, cache, last,
6978                                         key.objectid - last);
6979                 if (ret)
6980                         break;
6981                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6982                         last = key.objectid + key.offset;
6983                 else
6984                         last = key.objectid + root->nodesize;
6985                 path.slots[0]++;
6986         }
6987
6988         if (last < cache->key.objectid + cache->key.offset)
6989                 ret = check_cache_range(root, cache, last,
6990                                         cache->key.objectid +
6991                                         cache->key.offset - last);
6992
6993 out:
6994         btrfs_release_path(&path);
6995
6996         if (!ret &&
6997             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6998                 fprintf(stderr, "There are still entries left in the space "
6999                         "cache\n");
7000                 ret = -EINVAL;
7001         }
7002
7003         return ret;
7004 }
7005
7006 static int check_space_cache(struct btrfs_root *root)
7007 {
7008         struct btrfs_block_group_cache *cache;
7009         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7010         int ret;
7011         int error = 0;
7012
7013         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7014             btrfs_super_generation(root->fs_info->super_copy) !=
7015             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7016                 printf("cache and super generation don't match, space cache "
7017                        "will be invalidated\n");
7018                 return 0;
7019         }
7020
7021         if (ctx.progress_enabled) {
7022                 ctx.tp = TASK_FREE_SPACE;
7023                 task_start(ctx.info);
7024         }
7025
7026         while (1) {
7027                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7028                 if (!cache)
7029                         break;
7030
7031                 start = cache->key.objectid + cache->key.offset;
7032                 if (!cache->free_space_ctl) {
7033                         if (btrfs_init_free_space_ctl(cache,
7034                                                       root->sectorsize)) {
7035                                 ret = -ENOMEM;
7036                                 break;
7037                         }
7038                 } else {
7039                         btrfs_remove_free_space_cache(cache);
7040                 }
7041
7042                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7043                         ret = exclude_super_stripes(root, cache);
7044                         if (ret) {
7045                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7046                                         strerror(-ret));
7047                                 error++;
7048                                 continue;
7049                         }
7050                         ret = load_free_space_tree(root->fs_info, cache);
7051                         free_excluded_extents(root, cache);
7052                         if (ret < 0) {
7053                                 fprintf(stderr, "could not load free space tree: %s\n",
7054                                         strerror(-ret));
7055                                 error++;
7056                                 continue;
7057                         }
7058                         error += ret;
7059                 } else {
7060                         ret = load_free_space_cache(root->fs_info, cache);
7061                         if (!ret)
7062                                 continue;
7063                 }
7064
7065                 ret = verify_space_cache(root, cache);
7066                 if (ret) {
7067                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7068                                 cache->key.objectid);
7069                         error++;
7070                 }
7071         }
7072
7073         task_stop(ctx.info);
7074
7075         return error ? -EINVAL : 0;
7076 }
7077
7078 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7079                         u64 num_bytes, unsigned long leaf_offset,
7080                         struct extent_buffer *eb) {
7081
7082         u64 offset = 0;
7083         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7084         char *data;
7085         unsigned long csum_offset;
7086         u32 csum;
7087         u32 csum_expected;
7088         u64 read_len;
7089         u64 data_checked = 0;
7090         u64 tmp;
7091         int ret = 0;
7092         int mirror;
7093         int num_copies;
7094
7095         if (num_bytes % root->sectorsize)
7096                 return -EINVAL;
7097
7098         data = malloc(num_bytes);
7099         if (!data)
7100                 return -ENOMEM;
7101
7102         while (offset < num_bytes) {
7103                 mirror = 0;
7104 again:
7105                 read_len = num_bytes - offset;
7106                 /* read as much space once a time */
7107                 ret = read_extent_data(root, data + offset,
7108                                 bytenr + offset, &read_len, mirror);
7109                 if (ret)
7110                         goto out;
7111                 data_checked = 0;
7112                 /* verify every 4k data's checksum */
7113                 while (data_checked < read_len) {
7114                         csum = ~(u32)0;
7115                         tmp = offset + data_checked;
7116
7117                         csum = btrfs_csum_data((char *)data + tmp,
7118                                                csum, root->sectorsize);
7119                         btrfs_csum_final(csum, (u8 *)&csum);
7120
7121                         csum_offset = leaf_offset +
7122                                  tmp / root->sectorsize * csum_size;
7123                         read_extent_buffer(eb, (char *)&csum_expected,
7124                                            csum_offset, csum_size);
7125                         /* try another mirror */
7126                         if (csum != csum_expected) {
7127                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7128                                                 mirror, bytenr + tmp,
7129                                                 csum, csum_expected);
7130                                 num_copies = btrfs_num_copies(
7131                                                 &root->fs_info->mapping_tree,
7132                                                 bytenr, num_bytes);
7133                                 if (mirror < num_copies - 1) {
7134                                         mirror += 1;
7135                                         goto again;
7136                                 }
7137                         }
7138                         data_checked += root->sectorsize;
7139                 }
7140                 offset += read_len;
7141         }
7142 out:
7143         free(data);
7144         return ret;
7145 }
7146
7147 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7148                                u64 num_bytes)
7149 {
7150         struct btrfs_path path;
7151         struct extent_buffer *leaf;
7152         struct btrfs_key key;
7153         int ret;
7154
7155         btrfs_init_path(&path);
7156         key.objectid = bytenr;
7157         key.type = BTRFS_EXTENT_ITEM_KEY;
7158         key.offset = (u64)-1;
7159
7160 again:
7161         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7162                                 0, 0);
7163         if (ret < 0) {
7164                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7165                 btrfs_release_path(&path);
7166                 return ret;
7167         } else if (ret) {
7168                 if (path.slots[0] > 0) {
7169                         path.slots[0]--;
7170                 } else {
7171                         ret = btrfs_prev_leaf(root, &path);
7172                         if (ret < 0) {
7173                                 goto out;
7174                         } else if (ret > 0) {
7175                                 ret = 0;
7176                                 goto out;
7177                         }
7178                 }
7179         }
7180
7181         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7182
7183         /*
7184          * Block group items come before extent items if they have the same
7185          * bytenr, so walk back one more just in case.  Dear future traveller,
7186          * first congrats on mastering time travel.  Now if it's not too much
7187          * trouble could you go back to 2006 and tell Chris to make the
7188          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7189          * EXTENT_ITEM_KEY please?
7190          */
7191         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7192                 if (path.slots[0] > 0) {
7193                         path.slots[0]--;
7194                 } else {
7195                         ret = btrfs_prev_leaf(root, &path);
7196                         if (ret < 0) {
7197                                 goto out;
7198                         } else if (ret > 0) {
7199                                 ret = 0;
7200                                 goto out;
7201                         }
7202                 }
7203                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7204         }
7205
7206         while (num_bytes) {
7207                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7208                         ret = btrfs_next_leaf(root, &path);
7209                         if (ret < 0) {
7210                                 fprintf(stderr, "Error going to next leaf "
7211                                         "%d\n", ret);
7212                                 btrfs_release_path(&path);
7213                                 return ret;
7214                         } else if (ret) {
7215                                 break;
7216                         }
7217                 }
7218                 leaf = path.nodes[0];
7219                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7220                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7221                         path.slots[0]++;
7222                         continue;
7223                 }
7224                 if (key.objectid + key.offset < bytenr) {
7225                         path.slots[0]++;
7226                         continue;
7227                 }
7228                 if (key.objectid > bytenr + num_bytes)
7229                         break;
7230
7231                 if (key.objectid == bytenr) {
7232                         if (key.offset >= num_bytes) {
7233                                 num_bytes = 0;
7234                                 break;
7235                         }
7236                         num_bytes -= key.offset;
7237                         bytenr += key.offset;
7238                 } else if (key.objectid < bytenr) {
7239                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7240                                 num_bytes = 0;
7241                                 break;
7242                         }
7243                         num_bytes = (bytenr + num_bytes) -
7244                                 (key.objectid + key.offset);
7245                         bytenr = key.objectid + key.offset;
7246                 } else {
7247                         if (key.objectid + key.offset < bytenr + num_bytes) {
7248                                 u64 new_start = key.objectid + key.offset;
7249                                 u64 new_bytes = bytenr + num_bytes - new_start;
7250
7251                                 /*
7252                                  * Weird case, the extent is in the middle of
7253                                  * our range, we'll have to search one side
7254                                  * and then the other.  Not sure if this happens
7255                                  * in real life, but no harm in coding it up
7256                                  * anyway just in case.
7257                                  */
7258                                 btrfs_release_path(&path);
7259                                 ret = check_extent_exists(root, new_start,
7260                                                           new_bytes);
7261                                 if (ret) {
7262                                         fprintf(stderr, "Right section didn't "
7263                                                 "have a record\n");
7264                                         break;
7265                                 }
7266                                 num_bytes = key.objectid - bytenr;
7267                                 goto again;
7268                         }
7269                         num_bytes = key.objectid - bytenr;
7270                 }
7271                 path.slots[0]++;
7272         }
7273         ret = 0;
7274
7275 out:
7276         if (num_bytes && !ret) {
7277                 fprintf(stderr, "There are no extents for csum range "
7278                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7279                 ret = 1;
7280         }
7281
7282         btrfs_release_path(&path);
7283         return ret;
7284 }
7285
7286 static int check_csums(struct btrfs_root *root)
7287 {
7288         struct btrfs_path path;
7289         struct extent_buffer *leaf;
7290         struct btrfs_key key;
7291         u64 offset = 0, num_bytes = 0;
7292         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7293         int errors = 0;
7294         int ret;
7295         u64 data_len;
7296         unsigned long leaf_offset;
7297
7298         root = root->fs_info->csum_root;
7299         if (!extent_buffer_uptodate(root->node)) {
7300                 fprintf(stderr, "No valid csum tree found\n");
7301                 return -ENOENT;
7302         }
7303
7304         btrfs_init_path(&path);
7305         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7306         key.type = BTRFS_EXTENT_CSUM_KEY;
7307         key.offset = 0;
7308         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7309         if (ret < 0) {
7310                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7311                 btrfs_release_path(&path);
7312                 return ret;
7313         }
7314
7315         if (ret > 0 && path.slots[0])
7316                 path.slots[0]--;
7317         ret = 0;
7318
7319         while (1) {
7320                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7321                         ret = btrfs_next_leaf(root, &path);
7322                         if (ret < 0) {
7323                                 fprintf(stderr, "Error going to next leaf "
7324                                         "%d\n", ret);
7325                                 break;
7326                         }
7327                         if (ret)
7328                                 break;
7329                 }
7330                 leaf = path.nodes[0];
7331
7332                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7333                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7334                         path.slots[0]++;
7335                         continue;
7336                 }
7337
7338                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7339                               csum_size) * root->sectorsize;
7340                 if (!check_data_csum)
7341                         goto skip_csum_check;
7342                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7343                 ret = check_extent_csums(root, key.offset, data_len,
7344                                          leaf_offset, leaf);
7345                 if (ret)
7346                         break;
7347 skip_csum_check:
7348                 if (!num_bytes) {
7349                         offset = key.offset;
7350                 } else if (key.offset != offset + num_bytes) {
7351                         ret = check_extent_exists(root, offset, num_bytes);
7352                         if (ret) {
7353                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7354                                         "there is no extent record\n",
7355                                         offset, offset+num_bytes);
7356                                 errors++;
7357                         }
7358                         offset = key.offset;
7359                         num_bytes = 0;
7360                 }
7361                 num_bytes += data_len;
7362                 path.slots[0]++;
7363         }
7364
7365         btrfs_release_path(&path);
7366         return errors;
7367 }
7368
7369 static int is_dropped_key(struct btrfs_key *key,
7370                           struct btrfs_key *drop_key) {
7371         if (key->objectid < drop_key->objectid)
7372                 return 1;
7373         else if (key->objectid == drop_key->objectid) {
7374                 if (key->type < drop_key->type)
7375                         return 1;
7376                 else if (key->type == drop_key->type) {
7377                         if (key->offset < drop_key->offset)
7378                                 return 1;
7379                 }
7380         }
7381         return 0;
7382 }
7383
7384 /*
7385  * Here are the rules for FULL_BACKREF.
7386  *
7387  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7388  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7389  *      FULL_BACKREF set.
7390  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7391  *    if it happened after the relocation occurred since we'll have dropped the
7392  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7393  *    have no real way to know for sure.
7394  *
7395  * We process the blocks one root at a time, and we start from the lowest root
7396  * objectid and go to the highest.  So we can just lookup the owner backref for
7397  * the record and if we don't find it then we know it doesn't exist and we have
7398  * a FULL BACKREF.
7399  *
7400  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7401  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7402  * be set or not and then we can check later once we've gathered all the refs.
7403  */
7404 static int calc_extent_flag(struct btrfs_root *root,
7405                            struct cache_tree *extent_cache,
7406                            struct extent_buffer *buf,
7407                            struct root_item_record *ri,
7408                            u64 *flags)
7409 {
7410         struct extent_record *rec;
7411         struct cache_extent *cache;
7412         struct tree_backref *tback;
7413         u64 owner = 0;
7414
7415         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7416         /* we have added this extent before */
7417         if (!cache)
7418                 return -ENOENT;
7419
7420         rec = container_of(cache, struct extent_record, cache);
7421
7422         /*
7423          * Except file/reloc tree, we can not have
7424          * FULL BACKREF MODE
7425          */
7426         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7427                 goto normal;
7428         /*
7429          * root node
7430          */
7431         if (buf->start == ri->bytenr)
7432                 goto normal;
7433
7434         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7435                 goto full_backref;
7436
7437         owner = btrfs_header_owner(buf);
7438         if (owner == ri->objectid)
7439                 goto normal;
7440
7441         tback = find_tree_backref(rec, 0, owner);
7442         if (!tback)
7443                 goto full_backref;
7444 normal:
7445         *flags = 0;
7446         if (rec->flag_block_full_backref != FLAG_UNSET &&
7447             rec->flag_block_full_backref != 0)
7448                 rec->bad_full_backref = 1;
7449         return 0;
7450 full_backref:
7451         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7452         if (rec->flag_block_full_backref != FLAG_UNSET &&
7453             rec->flag_block_full_backref != 1)
7454                 rec->bad_full_backref = 1;
7455         return 0;
7456 }
7457
7458 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7459 {
7460         fprintf(stderr, "Invalid key type(");
7461         print_key_type(stderr, 0, key_type);
7462         fprintf(stderr, ") found in root(");
7463         print_objectid(stderr, rootid, 0);
7464         fprintf(stderr, ")\n");
7465 }
7466
7467 /*
7468  * Check if the key is valid with its extent buffer.
7469  *
7470  * This is a early check in case invalid key exists in a extent buffer
7471  * This is not comprehensive yet, but should prevent wrong key/item passed
7472  * further
7473  */
7474 static int check_type_with_root(u64 rootid, u8 key_type)
7475 {
7476         switch (key_type) {
7477         /* Only valid in chunk tree */
7478         case BTRFS_DEV_ITEM_KEY:
7479         case BTRFS_CHUNK_ITEM_KEY:
7480                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7481                         goto err;
7482                 break;
7483         /* valid in csum and log tree */
7484         case BTRFS_CSUM_TREE_OBJECTID:
7485                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7486                       is_fstree(rootid)))
7487                         goto err;
7488                 break;
7489         case BTRFS_EXTENT_ITEM_KEY:
7490         case BTRFS_METADATA_ITEM_KEY:
7491         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7492                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7493                         goto err;
7494                 break;
7495         case BTRFS_ROOT_ITEM_KEY:
7496                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7497                         goto err;
7498                 break;
7499         case BTRFS_DEV_EXTENT_KEY:
7500                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7501                         goto err;
7502                 break;
7503         }
7504         return 0;
7505 err:
7506         report_mismatch_key_root(key_type, rootid);
7507         return -EINVAL;
7508 }
7509
7510 static int run_next_block(struct btrfs_root *root,
7511                           struct block_info *bits,
7512                           int bits_nr,
7513                           u64 *last,
7514                           struct cache_tree *pending,
7515                           struct cache_tree *seen,
7516                           struct cache_tree *reada,
7517                           struct cache_tree *nodes,
7518                           struct cache_tree *extent_cache,
7519                           struct cache_tree *chunk_cache,
7520                           struct rb_root *dev_cache,
7521                           struct block_group_tree *block_group_cache,
7522                           struct device_extent_tree *dev_extent_cache,
7523                           struct root_item_record *ri)
7524 {
7525         struct extent_buffer *buf;
7526         struct extent_record *rec = NULL;
7527         u64 bytenr;
7528         u32 size;
7529         u64 parent;
7530         u64 owner;
7531         u64 flags;
7532         u64 ptr;
7533         u64 gen = 0;
7534         int ret = 0;
7535         int i;
7536         int nritems;
7537         struct btrfs_key key;
7538         struct cache_extent *cache;
7539         int reada_bits;
7540
7541         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7542                                     bits_nr, &reada_bits);
7543         if (nritems == 0)
7544                 return 1;
7545
7546         if (!reada_bits) {
7547                 for(i = 0; i < nritems; i++) {
7548                         ret = add_cache_extent(reada, bits[i].start,
7549                                                bits[i].size);
7550                         if (ret == -EEXIST)
7551                                 continue;
7552
7553                         /* fixme, get the parent transid */
7554                         readahead_tree_block(root, bits[i].start,
7555                                              bits[i].size, 0);
7556                 }
7557         }
7558         *last = bits[0].start;
7559         bytenr = bits[0].start;
7560         size = bits[0].size;
7561
7562         cache = lookup_cache_extent(pending, bytenr, size);
7563         if (cache) {
7564                 remove_cache_extent(pending, cache);
7565                 free(cache);
7566         }
7567         cache = lookup_cache_extent(reada, bytenr, size);
7568         if (cache) {
7569                 remove_cache_extent(reada, cache);
7570                 free(cache);
7571         }
7572         cache = lookup_cache_extent(nodes, bytenr, size);
7573         if (cache) {
7574                 remove_cache_extent(nodes, cache);
7575                 free(cache);
7576         }
7577         cache = lookup_cache_extent(extent_cache, bytenr, size);
7578         if (cache) {
7579                 rec = container_of(cache, struct extent_record, cache);
7580                 gen = rec->parent_generation;
7581         }
7582
7583         /* fixme, get the real parent transid */
7584         buf = read_tree_block(root, bytenr, size, gen);
7585         if (!extent_buffer_uptodate(buf)) {
7586                 record_bad_block_io(root->fs_info,
7587                                     extent_cache, bytenr, size);
7588                 goto out;
7589         }
7590
7591         nritems = btrfs_header_nritems(buf);
7592
7593         flags = 0;
7594         if (!init_extent_tree) {
7595                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7596                                        btrfs_header_level(buf), 1, NULL,
7597                                        &flags);
7598                 if (ret < 0) {
7599                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7600                         if (ret < 0) {
7601                                 fprintf(stderr, "Couldn't calc extent flags\n");
7602                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7603                         }
7604                 }
7605         } else {
7606                 flags = 0;
7607                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7608                 if (ret < 0) {
7609                         fprintf(stderr, "Couldn't calc extent flags\n");
7610                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7611                 }
7612         }
7613
7614         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7615                 if (ri != NULL &&
7616                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7617                     ri->objectid == btrfs_header_owner(buf)) {
7618                         /*
7619                          * Ok we got to this block from it's original owner and
7620                          * we have FULL_BACKREF set.  Relocation can leave
7621                          * converted blocks over so this is altogether possible,
7622                          * however it's not possible if the generation > the
7623                          * last snapshot, so check for this case.
7624                          */
7625                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7626                             btrfs_header_generation(buf) > ri->last_snapshot) {
7627                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7628                                 rec->bad_full_backref = 1;
7629                         }
7630                 }
7631         } else {
7632                 if (ri != NULL &&
7633                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7634                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7635                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7636                         rec->bad_full_backref = 1;
7637                 }
7638         }
7639
7640         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7641                 rec->flag_block_full_backref = 1;
7642                 parent = bytenr;
7643                 owner = 0;
7644         } else {
7645                 rec->flag_block_full_backref = 0;
7646                 parent = 0;
7647                 owner = btrfs_header_owner(buf);
7648         }
7649
7650         ret = check_block(root, extent_cache, buf, flags);
7651         if (ret)
7652                 goto out;
7653
7654         if (btrfs_is_leaf(buf)) {
7655                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7656                 for (i = 0; i < nritems; i++) {
7657                         struct btrfs_file_extent_item *fi;
7658                         btrfs_item_key_to_cpu(buf, &key, i);
7659                         /*
7660                          * Check key type against the leaf owner.
7661                          * Could filter quite a lot of early error if
7662                          * owner is correct
7663                          */
7664                         if (check_type_with_root(btrfs_header_owner(buf),
7665                                                  key.type)) {
7666                                 fprintf(stderr, "ignoring invalid key\n");
7667                                 continue;
7668                         }
7669                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7670                                 process_extent_item(root, extent_cache, buf,
7671                                                     i);
7672                                 continue;
7673                         }
7674                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7675                                 process_extent_item(root, extent_cache, buf,
7676                                                     i);
7677                                 continue;
7678                         }
7679                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7680                                 total_csum_bytes +=
7681                                         btrfs_item_size_nr(buf, i);
7682                                 continue;
7683                         }
7684                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7685                                 process_chunk_item(chunk_cache, &key, buf, i);
7686                                 continue;
7687                         }
7688                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7689                                 process_device_item(dev_cache, &key, buf, i);
7690                                 continue;
7691                         }
7692                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7693                                 process_block_group_item(block_group_cache,
7694                                         &key, buf, i);
7695                                 continue;
7696                         }
7697                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7698                                 process_device_extent_item(dev_extent_cache,
7699                                         &key, buf, i);
7700                                 continue;
7701
7702                         }
7703                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7704 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7705                                 process_extent_ref_v0(extent_cache, buf, i);
7706 #else
7707                                 BUG();
7708 #endif
7709                                 continue;
7710                         }
7711
7712                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7713                                 ret = add_tree_backref(extent_cache,
7714                                                 key.objectid, 0, key.offset, 0);
7715                                 if (ret < 0)
7716                                         error("add_tree_backref failed: %s",
7717                                               strerror(-ret));
7718                                 continue;
7719                         }
7720                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7721                                 ret = add_tree_backref(extent_cache,
7722                                                 key.objectid, key.offset, 0, 0);
7723                                 if (ret < 0)
7724                                         error("add_tree_backref failed: %s",
7725                                               strerror(-ret));
7726                                 continue;
7727                         }
7728                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7729                                 struct btrfs_extent_data_ref *ref;
7730                                 ref = btrfs_item_ptr(buf, i,
7731                                                 struct btrfs_extent_data_ref);
7732                                 add_data_backref(extent_cache,
7733                                         key.objectid, 0,
7734                                         btrfs_extent_data_ref_root(buf, ref),
7735                                         btrfs_extent_data_ref_objectid(buf,
7736                                                                        ref),
7737                                         btrfs_extent_data_ref_offset(buf, ref),
7738                                         btrfs_extent_data_ref_count(buf, ref),
7739                                         0, root->sectorsize);
7740                                 continue;
7741                         }
7742                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7743                                 struct btrfs_shared_data_ref *ref;
7744                                 ref = btrfs_item_ptr(buf, i,
7745                                                 struct btrfs_shared_data_ref);
7746                                 add_data_backref(extent_cache,
7747                                         key.objectid, key.offset, 0, 0, 0,
7748                                         btrfs_shared_data_ref_count(buf, ref),
7749                                         0, root->sectorsize);
7750                                 continue;
7751                         }
7752                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7753                                 struct bad_item *bad;
7754
7755                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7756                                         continue;
7757                                 if (!owner)
7758                                         continue;
7759                                 bad = malloc(sizeof(struct bad_item));
7760                                 if (!bad)
7761                                         continue;
7762                                 INIT_LIST_HEAD(&bad->list);
7763                                 memcpy(&bad->key, &key,
7764                                        sizeof(struct btrfs_key));
7765                                 bad->root_id = owner;
7766                                 list_add_tail(&bad->list, &delete_items);
7767                                 continue;
7768                         }
7769                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7770                                 continue;
7771                         fi = btrfs_item_ptr(buf, i,
7772                                             struct btrfs_file_extent_item);
7773                         if (btrfs_file_extent_type(buf, fi) ==
7774                             BTRFS_FILE_EXTENT_INLINE)
7775                                 continue;
7776                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7777                                 continue;
7778
7779                         data_bytes_allocated +=
7780                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7781                         if (data_bytes_allocated < root->sectorsize) {
7782                                 abort();
7783                         }
7784                         data_bytes_referenced +=
7785                                 btrfs_file_extent_num_bytes(buf, fi);
7786                         add_data_backref(extent_cache,
7787                                 btrfs_file_extent_disk_bytenr(buf, fi),
7788                                 parent, owner, key.objectid, key.offset -
7789                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7790                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7791                 }
7792         } else {
7793                 int level;
7794                 struct btrfs_key first_key;
7795
7796                 first_key.objectid = 0;
7797
7798                 if (nritems > 0)
7799                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7800                 level = btrfs_header_level(buf);
7801                 for (i = 0; i < nritems; i++) {
7802                         struct extent_record tmpl;
7803
7804                         ptr = btrfs_node_blockptr(buf, i);
7805                         size = root->nodesize;
7806                         btrfs_node_key_to_cpu(buf, &key, i);
7807                         if (ri != NULL) {
7808                                 if ((level == ri->drop_level)
7809                                     && is_dropped_key(&key, &ri->drop_key)) {
7810                                         continue;
7811                                 }
7812                         }
7813
7814                         memset(&tmpl, 0, sizeof(tmpl));
7815                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7816                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7817                         tmpl.start = ptr;
7818                         tmpl.nr = size;
7819                         tmpl.refs = 1;
7820                         tmpl.metadata = 1;
7821                         tmpl.max_size = size;
7822                         ret = add_extent_rec(extent_cache, &tmpl);
7823                         if (ret < 0)
7824                                 goto out;
7825
7826                         ret = add_tree_backref(extent_cache, ptr, parent,
7827                                         owner, 1);
7828                         if (ret < 0) {
7829                                 error("add_tree_backref failed: %s",
7830                                       strerror(-ret));
7831                                 continue;
7832                         }
7833
7834                         if (level > 1) {
7835                                 add_pending(nodes, seen, ptr, size);
7836                         } else {
7837                                 add_pending(pending, seen, ptr, size);
7838                         }
7839                 }
7840                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7841                                       nritems) * sizeof(struct btrfs_key_ptr);
7842         }
7843         total_btree_bytes += buf->len;
7844         if (fs_root_objectid(btrfs_header_owner(buf)))
7845                 total_fs_tree_bytes += buf->len;
7846         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7847                 total_extent_tree_bytes += buf->len;
7848         if (!found_old_backref &&
7849             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7850             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7851             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7852                 found_old_backref = 1;
7853 out:
7854         free_extent_buffer(buf);
7855         return ret;
7856 }
7857
7858 static int add_root_to_pending(struct extent_buffer *buf,
7859                                struct cache_tree *extent_cache,
7860                                struct cache_tree *pending,
7861                                struct cache_tree *seen,
7862                                struct cache_tree *nodes,
7863                                u64 objectid)
7864 {
7865         struct extent_record tmpl;
7866         int ret;
7867
7868         if (btrfs_header_level(buf) > 0)
7869                 add_pending(nodes, seen, buf->start, buf->len);
7870         else
7871                 add_pending(pending, seen, buf->start, buf->len);
7872
7873         memset(&tmpl, 0, sizeof(tmpl));
7874         tmpl.start = buf->start;
7875         tmpl.nr = buf->len;
7876         tmpl.is_root = 1;
7877         tmpl.refs = 1;
7878         tmpl.metadata = 1;
7879         tmpl.max_size = buf->len;
7880         add_extent_rec(extent_cache, &tmpl);
7881
7882         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7883             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7884                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7885                                 0, 1);
7886         else
7887                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7888                                 1);
7889         return ret;
7890 }
7891
7892 /* as we fix the tree, we might be deleting blocks that
7893  * we're tracking for repair.  This hook makes sure we
7894  * remove any backrefs for blocks as we are fixing them.
7895  */
7896 static int free_extent_hook(struct btrfs_trans_handle *trans,
7897                             struct btrfs_root *root,
7898                             u64 bytenr, u64 num_bytes, u64 parent,
7899                             u64 root_objectid, u64 owner, u64 offset,
7900                             int refs_to_drop)
7901 {
7902         struct extent_record *rec;
7903         struct cache_extent *cache;
7904         int is_data;
7905         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7906
7907         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7908         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7909         if (!cache)
7910                 return 0;
7911
7912         rec = container_of(cache, struct extent_record, cache);
7913         if (is_data) {
7914                 struct data_backref *back;
7915                 back = find_data_backref(rec, parent, root_objectid, owner,
7916                                          offset, 1, bytenr, num_bytes);
7917                 if (!back)
7918                         goto out;
7919                 if (back->node.found_ref) {
7920                         back->found_ref -= refs_to_drop;
7921                         if (rec->refs)
7922                                 rec->refs -= refs_to_drop;
7923                 }
7924                 if (back->node.found_extent_tree) {
7925                         back->num_refs -= refs_to_drop;
7926                         if (rec->extent_item_refs)
7927                                 rec->extent_item_refs -= refs_to_drop;
7928                 }
7929                 if (back->found_ref == 0)
7930                         back->node.found_ref = 0;
7931                 if (back->num_refs == 0)
7932                         back->node.found_extent_tree = 0;
7933
7934                 if (!back->node.found_extent_tree && back->node.found_ref) {
7935                         list_del(&back->node.list);
7936                         free(back);
7937                 }
7938         } else {
7939                 struct tree_backref *back;
7940                 back = find_tree_backref(rec, parent, root_objectid);
7941                 if (!back)
7942                         goto out;
7943                 if (back->node.found_ref) {
7944                         if (rec->refs)
7945                                 rec->refs--;
7946                         back->node.found_ref = 0;
7947                 }
7948                 if (back->node.found_extent_tree) {
7949                         if (rec->extent_item_refs)
7950                                 rec->extent_item_refs--;
7951                         back->node.found_extent_tree = 0;
7952                 }
7953                 if (!back->node.found_extent_tree && back->node.found_ref) {
7954                         list_del(&back->node.list);
7955                         free(back);
7956                 }
7957         }
7958         maybe_free_extent_rec(extent_cache, rec);
7959 out:
7960         return 0;
7961 }
7962
7963 static int delete_extent_records(struct btrfs_trans_handle *trans,
7964                                  struct btrfs_root *root,
7965                                  struct btrfs_path *path,
7966                                  u64 bytenr)
7967 {
7968         struct btrfs_key key;
7969         struct btrfs_key found_key;
7970         struct extent_buffer *leaf;
7971         int ret;
7972         int slot;
7973
7974
7975         key.objectid = bytenr;
7976         key.type = (u8)-1;
7977         key.offset = (u64)-1;
7978
7979         while(1) {
7980                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7981                                         &key, path, 0, 1);
7982                 if (ret < 0)
7983                         break;
7984
7985                 if (ret > 0) {
7986                         ret = 0;
7987                         if (path->slots[0] == 0)
7988                                 break;
7989                         path->slots[0]--;
7990                 }
7991                 ret = 0;
7992
7993                 leaf = path->nodes[0];
7994                 slot = path->slots[0];
7995
7996                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7997                 if (found_key.objectid != bytenr)
7998                         break;
7999
8000                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8001                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8002                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8003                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8004                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8005                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8006                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8007                         btrfs_release_path(path);
8008                         if (found_key.type == 0) {
8009                                 if (found_key.offset == 0)
8010                                         break;
8011                                 key.offset = found_key.offset - 1;
8012                                 key.type = found_key.type;
8013                         }
8014                         key.type = found_key.type - 1;
8015                         key.offset = (u64)-1;
8016                         continue;
8017                 }
8018
8019                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8020                         found_key.objectid, found_key.type, found_key.offset);
8021
8022                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8023                 if (ret)
8024                         break;
8025                 btrfs_release_path(path);
8026
8027                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8028                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8029                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8030                                 found_key.offset : root->nodesize;
8031
8032                         ret = btrfs_update_block_group(trans, root, bytenr,
8033                                                        bytes, 0, 0);
8034                         if (ret)
8035                                 break;
8036                 }
8037         }
8038
8039         btrfs_release_path(path);
8040         return ret;
8041 }
8042
8043 /*
8044  * for a single backref, this will allocate a new extent
8045  * and add the backref to it.
8046  */
8047 static int record_extent(struct btrfs_trans_handle *trans,
8048                          struct btrfs_fs_info *info,
8049                          struct btrfs_path *path,
8050                          struct extent_record *rec,
8051                          struct extent_backref *back,
8052                          int allocated, u64 flags)
8053 {
8054         int ret = 0;
8055         struct btrfs_root *extent_root = info->extent_root;
8056         struct extent_buffer *leaf;
8057         struct btrfs_key ins_key;
8058         struct btrfs_extent_item *ei;
8059         struct data_backref *dback;
8060         struct btrfs_tree_block_info *bi;
8061
8062         if (!back->is_data)
8063                 rec->max_size = max_t(u64, rec->max_size,
8064                                     info->extent_root->nodesize);
8065
8066         if (!allocated) {
8067                 u32 item_size = sizeof(*ei);
8068
8069                 if (!back->is_data)
8070                         item_size += sizeof(*bi);
8071
8072                 ins_key.objectid = rec->start;
8073                 ins_key.offset = rec->max_size;
8074                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8075
8076                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8077                                         &ins_key, item_size);
8078                 if (ret)
8079                         goto fail;
8080
8081                 leaf = path->nodes[0];
8082                 ei = btrfs_item_ptr(leaf, path->slots[0],
8083                                     struct btrfs_extent_item);
8084
8085                 btrfs_set_extent_refs(leaf, ei, 0);
8086                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8087
8088                 if (back->is_data) {
8089                         btrfs_set_extent_flags(leaf, ei,
8090                                                BTRFS_EXTENT_FLAG_DATA);
8091                 } else {
8092                         struct btrfs_disk_key copy_key;;
8093
8094                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8095                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8096                                              sizeof(*bi));
8097
8098                         btrfs_set_disk_key_objectid(&copy_key,
8099                                                     rec->info_objectid);
8100                         btrfs_set_disk_key_type(&copy_key, 0);
8101                         btrfs_set_disk_key_offset(&copy_key, 0);
8102
8103                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8104                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8105
8106                         btrfs_set_extent_flags(leaf, ei,
8107                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8108                 }
8109
8110                 btrfs_mark_buffer_dirty(leaf);
8111                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8112                                                rec->max_size, 1, 0);
8113                 if (ret)
8114                         goto fail;
8115                 btrfs_release_path(path);
8116         }
8117
8118         if (back->is_data) {
8119                 u64 parent;
8120                 int i;
8121
8122                 dback = to_data_backref(back);
8123                 if (back->full_backref)
8124                         parent = dback->parent;
8125                 else
8126                         parent = 0;
8127
8128                 for (i = 0; i < dback->found_ref; i++) {
8129                         /* if parent != 0, we're doing a full backref
8130                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8131                          * just makes the backref allocator create a data
8132                          * backref
8133                          */
8134                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8135                                                    rec->start, rec->max_size,
8136                                                    parent,
8137                                                    dback->root,
8138                                                    parent ?
8139                                                    BTRFS_FIRST_FREE_OBJECTID :
8140                                                    dback->owner,
8141                                                    dback->offset);
8142                         if (ret)
8143                                 break;
8144                 }
8145                 fprintf(stderr, "adding new data backref"
8146                                 " on %llu %s %llu owner %llu"
8147                                 " offset %llu found %d\n",
8148                                 (unsigned long long)rec->start,
8149                                 back->full_backref ?
8150                                 "parent" : "root",
8151                                 back->full_backref ?
8152                                 (unsigned long long)parent :
8153                                 (unsigned long long)dback->root,
8154                                 (unsigned long long)dback->owner,
8155                                 (unsigned long long)dback->offset,
8156                                 dback->found_ref);
8157         } else {
8158                 u64 parent;
8159                 struct tree_backref *tback;
8160
8161                 tback = to_tree_backref(back);
8162                 if (back->full_backref)
8163                         parent = tback->parent;
8164                 else
8165                         parent = 0;
8166
8167                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8168                                            rec->start, rec->max_size,
8169                                            parent, tback->root, 0, 0);
8170                 fprintf(stderr, "adding new tree backref on "
8171                         "start %llu len %llu parent %llu root %llu\n",
8172                         rec->start, rec->max_size, parent, tback->root);
8173         }
8174 fail:
8175         btrfs_release_path(path);
8176         return ret;
8177 }
8178
8179 static struct extent_entry *find_entry(struct list_head *entries,
8180                                        u64 bytenr, u64 bytes)
8181 {
8182         struct extent_entry *entry = NULL;
8183
8184         list_for_each_entry(entry, entries, list) {
8185                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8186                         return entry;
8187         }
8188
8189         return NULL;
8190 }
8191
8192 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8193 {
8194         struct extent_entry *entry, *best = NULL, *prev = NULL;
8195
8196         list_for_each_entry(entry, entries, list) {
8197                 /*
8198                  * If there are as many broken entries as entries then we know
8199                  * not to trust this particular entry.
8200                  */
8201                 if (entry->broken == entry->count)
8202                         continue;
8203
8204                 /*
8205                  * Special case, when there are only two entries and 'best' is
8206                  * the first one
8207                  */
8208                 if (!prev) {
8209                         best = entry;
8210                         prev = entry;
8211                         continue;
8212                 }
8213
8214                 /*
8215                  * If our current entry == best then we can't be sure our best
8216                  * is really the best, so we need to keep searching.
8217                  */
8218                 if (best && best->count == entry->count) {
8219                         prev = entry;
8220                         best = NULL;
8221                         continue;
8222                 }
8223
8224                 /* Prev == entry, not good enough, have to keep searching */
8225                 if (!prev->broken && prev->count == entry->count)
8226                         continue;
8227
8228                 if (!best)
8229                         best = (prev->count > entry->count) ? prev : entry;
8230                 else if (best->count < entry->count)
8231                         best = entry;
8232                 prev = entry;
8233         }
8234
8235         return best;
8236 }
8237
8238 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8239                       struct data_backref *dback, struct extent_entry *entry)
8240 {
8241         struct btrfs_trans_handle *trans;
8242         struct btrfs_root *root;
8243         struct btrfs_file_extent_item *fi;
8244         struct extent_buffer *leaf;
8245         struct btrfs_key key;
8246         u64 bytenr, bytes;
8247         int ret, err;
8248
8249         key.objectid = dback->root;
8250         key.type = BTRFS_ROOT_ITEM_KEY;
8251         key.offset = (u64)-1;
8252         root = btrfs_read_fs_root(info, &key);
8253         if (IS_ERR(root)) {
8254                 fprintf(stderr, "Couldn't find root for our ref\n");
8255                 return -EINVAL;
8256         }
8257
8258         /*
8259          * The backref points to the original offset of the extent if it was
8260          * split, so we need to search down to the offset we have and then walk
8261          * forward until we find the backref we're looking for.
8262          */
8263         key.objectid = dback->owner;
8264         key.type = BTRFS_EXTENT_DATA_KEY;
8265         key.offset = dback->offset;
8266         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8267         if (ret < 0) {
8268                 fprintf(stderr, "Error looking up ref %d\n", ret);
8269                 return ret;
8270         }
8271
8272         while (1) {
8273                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8274                         ret = btrfs_next_leaf(root, path);
8275                         if (ret) {
8276                                 fprintf(stderr, "Couldn't find our ref, next\n");
8277                                 return -EINVAL;
8278                         }
8279                 }
8280                 leaf = path->nodes[0];
8281                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8282                 if (key.objectid != dback->owner ||
8283                     key.type != BTRFS_EXTENT_DATA_KEY) {
8284                         fprintf(stderr, "Couldn't find our ref, search\n");
8285                         return -EINVAL;
8286                 }
8287                 fi = btrfs_item_ptr(leaf, path->slots[0],
8288                                     struct btrfs_file_extent_item);
8289                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8290                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8291
8292                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8293                         break;
8294                 path->slots[0]++;
8295         }
8296
8297         btrfs_release_path(path);
8298
8299         trans = btrfs_start_transaction(root, 1);
8300         if (IS_ERR(trans))
8301                 return PTR_ERR(trans);
8302
8303         /*
8304          * Ok we have the key of the file extent we want to fix, now we can cow
8305          * down to the thing and fix it.
8306          */
8307         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8308         if (ret < 0) {
8309                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8310                         key.objectid, key.type, key.offset, ret);
8311                 goto out;
8312         }
8313         if (ret > 0) {
8314                 fprintf(stderr, "Well that's odd, we just found this key "
8315                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8316                         key.offset);
8317                 ret = -EINVAL;
8318                 goto out;
8319         }
8320         leaf = path->nodes[0];
8321         fi = btrfs_item_ptr(leaf, path->slots[0],
8322                             struct btrfs_file_extent_item);
8323
8324         if (btrfs_file_extent_compression(leaf, fi) &&
8325             dback->disk_bytenr != entry->bytenr) {
8326                 fprintf(stderr, "Ref doesn't match the record start and is "
8327                         "compressed, please take a btrfs-image of this file "
8328                         "system and send it to a btrfs developer so they can "
8329                         "complete this functionality for bytenr %Lu\n",
8330                         dback->disk_bytenr);
8331                 ret = -EINVAL;
8332                 goto out;
8333         }
8334
8335         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8336                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8337         } else if (dback->disk_bytenr > entry->bytenr) {
8338                 u64 off_diff, offset;
8339
8340                 off_diff = dback->disk_bytenr - entry->bytenr;
8341                 offset = btrfs_file_extent_offset(leaf, fi);
8342                 if (dback->disk_bytenr + offset +
8343                     btrfs_file_extent_num_bytes(leaf, fi) >
8344                     entry->bytenr + entry->bytes) {
8345                         fprintf(stderr, "Ref is past the entry end, please "
8346                                 "take a btrfs-image of this file system and "
8347                                 "send it to a btrfs developer, ref %Lu\n",
8348                                 dback->disk_bytenr);
8349                         ret = -EINVAL;
8350                         goto out;
8351                 }
8352                 offset += off_diff;
8353                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8354                 btrfs_set_file_extent_offset(leaf, fi, offset);
8355         } else if (dback->disk_bytenr < entry->bytenr) {
8356                 u64 offset;
8357
8358                 offset = btrfs_file_extent_offset(leaf, fi);
8359                 if (dback->disk_bytenr + offset < entry->bytenr) {
8360                         fprintf(stderr, "Ref is before the entry start, please"
8361                                 " take a btrfs-image of this file system and "
8362                                 "send it to a btrfs developer, ref %Lu\n",
8363                                 dback->disk_bytenr);
8364                         ret = -EINVAL;
8365                         goto out;
8366                 }
8367
8368                 offset += dback->disk_bytenr;
8369                 offset -= entry->bytenr;
8370                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8371                 btrfs_set_file_extent_offset(leaf, fi, offset);
8372         }
8373
8374         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8375
8376         /*
8377          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8378          * only do this if we aren't using compression, otherwise it's a
8379          * trickier case.
8380          */
8381         if (!btrfs_file_extent_compression(leaf, fi))
8382                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8383         else
8384                 printf("ram bytes may be wrong?\n");
8385         btrfs_mark_buffer_dirty(leaf);
8386 out:
8387         err = btrfs_commit_transaction(trans, root);
8388         btrfs_release_path(path);
8389         return ret ? ret : err;
8390 }
8391
8392 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8393                            struct extent_record *rec)
8394 {
8395         struct extent_backref *back;
8396         struct data_backref *dback;
8397         struct extent_entry *entry, *best = NULL;
8398         LIST_HEAD(entries);
8399         int nr_entries = 0;
8400         int broken_entries = 0;
8401         int ret = 0;
8402         short mismatch = 0;
8403
8404         /*
8405          * Metadata is easy and the backrefs should always agree on bytenr and
8406          * size, if not we've got bigger issues.
8407          */
8408         if (rec->metadata)
8409                 return 0;
8410
8411         list_for_each_entry(back, &rec->backrefs, list) {
8412                 if (back->full_backref || !back->is_data)
8413                         continue;
8414
8415                 dback = to_data_backref(back);
8416
8417                 /*
8418                  * We only pay attention to backrefs that we found a real
8419                  * backref for.
8420                  */
8421                 if (dback->found_ref == 0)
8422                         continue;
8423
8424                 /*
8425                  * For now we only catch when the bytes don't match, not the
8426                  * bytenr.  We can easily do this at the same time, but I want
8427                  * to have a fs image to test on before we just add repair
8428                  * functionality willy-nilly so we know we won't screw up the
8429                  * repair.
8430                  */
8431
8432                 entry = find_entry(&entries, dback->disk_bytenr,
8433                                    dback->bytes);
8434                 if (!entry) {
8435                         entry = malloc(sizeof(struct extent_entry));
8436                         if (!entry) {
8437                                 ret = -ENOMEM;
8438                                 goto out;
8439                         }
8440                         memset(entry, 0, sizeof(*entry));
8441                         entry->bytenr = dback->disk_bytenr;
8442                         entry->bytes = dback->bytes;
8443                         list_add_tail(&entry->list, &entries);
8444                         nr_entries++;
8445                 }
8446
8447                 /*
8448                  * If we only have on entry we may think the entries agree when
8449                  * in reality they don't so we have to do some extra checking.
8450                  */
8451                 if (dback->disk_bytenr != rec->start ||
8452                     dback->bytes != rec->nr || back->broken)
8453                         mismatch = 1;
8454
8455                 if (back->broken) {
8456                         entry->broken++;
8457                         broken_entries++;
8458                 }
8459
8460                 entry->count++;
8461         }
8462
8463         /* Yay all the backrefs agree, carry on good sir */
8464         if (nr_entries <= 1 && !mismatch)
8465                 goto out;
8466
8467         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8468                 "%Lu\n", rec->start);
8469
8470         /*
8471          * First we want to see if the backrefs can agree amongst themselves who
8472          * is right, so figure out which one of the entries has the highest
8473          * count.
8474          */
8475         best = find_most_right_entry(&entries);
8476
8477         /*
8478          * Ok so we may have an even split between what the backrefs think, so
8479          * this is where we use the extent ref to see what it thinks.
8480          */
8481         if (!best) {
8482                 entry = find_entry(&entries, rec->start, rec->nr);
8483                 if (!entry && (!broken_entries || !rec->found_rec)) {
8484                         fprintf(stderr, "Backrefs don't agree with each other "
8485                                 "and extent record doesn't agree with anybody,"
8486                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8487                                 rec->start, rec->nr);
8488                         ret = -EINVAL;
8489                         goto out;
8490                 } else if (!entry) {
8491                         /*
8492                          * Ok our backrefs were broken, we'll assume this is the
8493                          * correct value and add an entry for this range.
8494                          */
8495                         entry = malloc(sizeof(struct extent_entry));
8496                         if (!entry) {
8497                                 ret = -ENOMEM;
8498                                 goto out;
8499                         }
8500                         memset(entry, 0, sizeof(*entry));
8501                         entry->bytenr = rec->start;
8502                         entry->bytes = rec->nr;
8503                         list_add_tail(&entry->list, &entries);
8504                         nr_entries++;
8505                 }
8506                 entry->count++;
8507                 best = find_most_right_entry(&entries);
8508                 if (!best) {
8509                         fprintf(stderr, "Backrefs and extent record evenly "
8510                                 "split on who is right, this is going to "
8511                                 "require user input to fix bytenr %Lu bytes "
8512                                 "%Lu\n", rec->start, rec->nr);
8513                         ret = -EINVAL;
8514                         goto out;
8515                 }
8516         }
8517
8518         /*
8519          * I don't think this can happen currently as we'll abort() if we catch
8520          * this case higher up, but in case somebody removes that we still can't
8521          * deal with it properly here yet, so just bail out of that's the case.
8522          */
8523         if (best->bytenr != rec->start) {
8524                 fprintf(stderr, "Extent start and backref starts don't match, "
8525                         "please use btrfs-image on this file system and send "
8526                         "it to a btrfs developer so they can make fsck fix "
8527                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8528                         rec->start, rec->nr);
8529                 ret = -EINVAL;
8530                 goto out;
8531         }
8532
8533         /*
8534          * Ok great we all agreed on an extent record, let's go find the real
8535          * references and fix up the ones that don't match.
8536          */
8537         list_for_each_entry(back, &rec->backrefs, list) {
8538                 if (back->full_backref || !back->is_data)
8539                         continue;
8540
8541                 dback = to_data_backref(back);
8542
8543                 /*
8544                  * Still ignoring backrefs that don't have a real ref attached
8545                  * to them.
8546                  */
8547                 if (dback->found_ref == 0)
8548                         continue;
8549
8550                 if (dback->bytes == best->bytes &&
8551                     dback->disk_bytenr == best->bytenr)
8552                         continue;
8553
8554                 ret = repair_ref(info, path, dback, best);
8555                 if (ret)
8556                         goto out;
8557         }
8558
8559         /*
8560          * Ok we messed with the actual refs, which means we need to drop our
8561          * entire cache and go back and rescan.  I know this is a huge pain and
8562          * adds a lot of extra work, but it's the only way to be safe.  Once all
8563          * the backrefs agree we may not need to do anything to the extent
8564          * record itself.
8565          */
8566         ret = -EAGAIN;
8567 out:
8568         while (!list_empty(&entries)) {
8569                 entry = list_entry(entries.next, struct extent_entry, list);
8570                 list_del_init(&entry->list);
8571                 free(entry);
8572         }
8573         return ret;
8574 }
8575
8576 static int process_duplicates(struct btrfs_root *root,
8577                               struct cache_tree *extent_cache,
8578                               struct extent_record *rec)
8579 {
8580         struct extent_record *good, *tmp;
8581         struct cache_extent *cache;
8582         int ret;
8583
8584         /*
8585          * If we found a extent record for this extent then return, or if we
8586          * have more than one duplicate we are likely going to need to delete
8587          * something.
8588          */
8589         if (rec->found_rec || rec->num_duplicates > 1)
8590                 return 0;
8591
8592         /* Shouldn't happen but just in case */
8593         BUG_ON(!rec->num_duplicates);
8594
8595         /*
8596          * So this happens if we end up with a backref that doesn't match the
8597          * actual extent entry.  So either the backref is bad or the extent
8598          * entry is bad.  Either way we want to have the extent_record actually
8599          * reflect what we found in the extent_tree, so we need to take the
8600          * duplicate out and use that as the extent_record since the only way we
8601          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8602          */
8603         remove_cache_extent(extent_cache, &rec->cache);
8604
8605         good = to_extent_record(rec->dups.next);
8606         list_del_init(&good->list);
8607         INIT_LIST_HEAD(&good->backrefs);
8608         INIT_LIST_HEAD(&good->dups);
8609         good->cache.start = good->start;
8610         good->cache.size = good->nr;
8611         good->content_checked = 0;
8612         good->owner_ref_checked = 0;
8613         good->num_duplicates = 0;
8614         good->refs = rec->refs;
8615         list_splice_init(&rec->backrefs, &good->backrefs);
8616         while (1) {
8617                 cache = lookup_cache_extent(extent_cache, good->start,
8618                                             good->nr);
8619                 if (!cache)
8620                         break;
8621                 tmp = container_of(cache, struct extent_record, cache);
8622
8623                 /*
8624                  * If we find another overlapping extent and it's found_rec is
8625                  * set then it's a duplicate and we need to try and delete
8626                  * something.
8627                  */
8628                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8629                         if (list_empty(&good->list))
8630                                 list_add_tail(&good->list,
8631                                               &duplicate_extents);
8632                         good->num_duplicates += tmp->num_duplicates + 1;
8633                         list_splice_init(&tmp->dups, &good->dups);
8634                         list_del_init(&tmp->list);
8635                         list_add_tail(&tmp->list, &good->dups);
8636                         remove_cache_extent(extent_cache, &tmp->cache);
8637                         continue;
8638                 }
8639
8640                 /*
8641                  * Ok we have another non extent item backed extent rec, so lets
8642                  * just add it to this extent and carry on like we did above.
8643                  */
8644                 good->refs += tmp->refs;
8645                 list_splice_init(&tmp->backrefs, &good->backrefs);
8646                 remove_cache_extent(extent_cache, &tmp->cache);
8647                 free(tmp);
8648         }
8649         ret = insert_cache_extent(extent_cache, &good->cache);
8650         BUG_ON(ret);
8651         free(rec);
8652         return good->num_duplicates ? 0 : 1;
8653 }
8654
8655 static int delete_duplicate_records(struct btrfs_root *root,
8656                                     struct extent_record *rec)
8657 {
8658         struct btrfs_trans_handle *trans;
8659         LIST_HEAD(delete_list);
8660         struct btrfs_path path;
8661         struct extent_record *tmp, *good, *n;
8662         int nr_del = 0;
8663         int ret = 0, err;
8664         struct btrfs_key key;
8665
8666         btrfs_init_path(&path);
8667
8668         good = rec;
8669         /* Find the record that covers all of the duplicates. */
8670         list_for_each_entry(tmp, &rec->dups, list) {
8671                 if (good->start < tmp->start)
8672                         continue;
8673                 if (good->nr > tmp->nr)
8674                         continue;
8675
8676                 if (tmp->start + tmp->nr < good->start + good->nr) {
8677                         fprintf(stderr, "Ok we have overlapping extents that "
8678                                 "aren't completely covered by each other, this "
8679                                 "is going to require more careful thought.  "
8680                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8681                                 tmp->start, tmp->nr, good->start, good->nr);
8682                         abort();
8683                 }
8684                 good = tmp;
8685         }
8686
8687         if (good != rec)
8688                 list_add_tail(&rec->list, &delete_list);
8689
8690         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8691                 if (tmp == good)
8692                         continue;
8693                 list_move_tail(&tmp->list, &delete_list);
8694         }
8695
8696         root = root->fs_info->extent_root;
8697         trans = btrfs_start_transaction(root, 1);
8698         if (IS_ERR(trans)) {
8699                 ret = PTR_ERR(trans);
8700                 goto out;
8701         }
8702
8703         list_for_each_entry(tmp, &delete_list, list) {
8704                 if (tmp->found_rec == 0)
8705                         continue;
8706                 key.objectid = tmp->start;
8707                 key.type = BTRFS_EXTENT_ITEM_KEY;
8708                 key.offset = tmp->nr;
8709
8710                 /* Shouldn't happen but just in case */
8711                 if (tmp->metadata) {
8712                         fprintf(stderr, "Well this shouldn't happen, extent "
8713                                 "record overlaps but is metadata? "
8714                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8715                         abort();
8716                 }
8717
8718                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8719                 if (ret) {
8720                         if (ret > 0)
8721                                 ret = -EINVAL;
8722                         break;
8723                 }
8724                 ret = btrfs_del_item(trans, root, &path);
8725                 if (ret)
8726                         break;
8727                 btrfs_release_path(&path);
8728                 nr_del++;
8729         }
8730         err = btrfs_commit_transaction(trans, root);
8731         if (err && !ret)
8732                 ret = err;
8733 out:
8734         while (!list_empty(&delete_list)) {
8735                 tmp = to_extent_record(delete_list.next);
8736                 list_del_init(&tmp->list);
8737                 if (tmp == rec)
8738                         continue;
8739                 free(tmp);
8740         }
8741
8742         while (!list_empty(&rec->dups)) {
8743                 tmp = to_extent_record(rec->dups.next);
8744                 list_del_init(&tmp->list);
8745                 free(tmp);
8746         }
8747
8748         btrfs_release_path(&path);
8749
8750         if (!ret && !nr_del)
8751                 rec->num_duplicates = 0;
8752
8753         return ret ? ret : nr_del;
8754 }
8755
8756 static int find_possible_backrefs(struct btrfs_fs_info *info,
8757                                   struct btrfs_path *path,
8758                                   struct cache_tree *extent_cache,
8759                                   struct extent_record *rec)
8760 {
8761         struct btrfs_root *root;
8762         struct extent_backref *back;
8763         struct data_backref *dback;
8764         struct cache_extent *cache;
8765         struct btrfs_file_extent_item *fi;
8766         struct btrfs_key key;
8767         u64 bytenr, bytes;
8768         int ret;
8769
8770         list_for_each_entry(back, &rec->backrefs, list) {
8771                 /* Don't care about full backrefs (poor unloved backrefs) */
8772                 if (back->full_backref || !back->is_data)
8773                         continue;
8774
8775                 dback = to_data_backref(back);
8776
8777                 /* We found this one, we don't need to do a lookup */
8778                 if (dback->found_ref)
8779                         continue;
8780
8781                 key.objectid = dback->root;
8782                 key.type = BTRFS_ROOT_ITEM_KEY;
8783                 key.offset = (u64)-1;
8784
8785                 root = btrfs_read_fs_root(info, &key);
8786
8787                 /* No root, definitely a bad ref, skip */
8788                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8789                         continue;
8790                 /* Other err, exit */
8791                 if (IS_ERR(root))
8792                         return PTR_ERR(root);
8793
8794                 key.objectid = dback->owner;
8795                 key.type = BTRFS_EXTENT_DATA_KEY;
8796                 key.offset = dback->offset;
8797                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8798                 if (ret) {
8799                         btrfs_release_path(path);
8800                         if (ret < 0)
8801                                 return ret;
8802                         /* Didn't find it, we can carry on */
8803                         ret = 0;
8804                         continue;
8805                 }
8806
8807                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8808                                     struct btrfs_file_extent_item);
8809                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8810                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8811                 btrfs_release_path(path);
8812                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8813                 if (cache) {
8814                         struct extent_record *tmp;
8815                         tmp = container_of(cache, struct extent_record, cache);
8816
8817                         /*
8818                          * If we found an extent record for the bytenr for this
8819                          * particular backref then we can't add it to our
8820                          * current extent record.  We only want to add backrefs
8821                          * that don't have a corresponding extent item in the
8822                          * extent tree since they likely belong to this record
8823                          * and we need to fix it if it doesn't match bytenrs.
8824                          */
8825                         if  (tmp->found_rec)
8826                                 continue;
8827                 }
8828
8829                 dback->found_ref += 1;
8830                 dback->disk_bytenr = bytenr;
8831                 dback->bytes = bytes;
8832
8833                 /*
8834                  * Set this so the verify backref code knows not to trust the
8835                  * values in this backref.
8836                  */
8837                 back->broken = 1;
8838         }
8839
8840         return 0;
8841 }
8842
8843 /*
8844  * Record orphan data ref into corresponding root.
8845  *
8846  * Return 0 if the extent item contains data ref and recorded.
8847  * Return 1 if the extent item contains no useful data ref
8848  *   On that case, it may contains only shared_dataref or metadata backref
8849  *   or the file extent exists(this should be handled by the extent bytenr
8850  *   recovery routine)
8851  * Return <0 if something goes wrong.
8852  */
8853 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8854                                       struct extent_record *rec)
8855 {
8856         struct btrfs_key key;
8857         struct btrfs_root *dest_root;
8858         struct extent_backref *back;
8859         struct data_backref *dback;
8860         struct orphan_data_extent *orphan;
8861         struct btrfs_path path;
8862         int recorded_data_ref = 0;
8863         int ret = 0;
8864
8865         if (rec->metadata)
8866                 return 1;
8867         btrfs_init_path(&path);
8868         list_for_each_entry(back, &rec->backrefs, list) {
8869                 if (back->full_backref || !back->is_data ||
8870                     !back->found_extent_tree)
8871                         continue;
8872                 dback = to_data_backref(back);
8873                 if (dback->found_ref)
8874                         continue;
8875                 key.objectid = dback->root;
8876                 key.type = BTRFS_ROOT_ITEM_KEY;
8877                 key.offset = (u64)-1;
8878
8879                 dest_root = btrfs_read_fs_root(fs_info, &key);
8880
8881                 /* For non-exist root we just skip it */
8882                 if (IS_ERR(dest_root) || !dest_root)
8883                         continue;
8884
8885                 key.objectid = dback->owner;
8886                 key.type = BTRFS_EXTENT_DATA_KEY;
8887                 key.offset = dback->offset;
8888
8889                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8890                 btrfs_release_path(&path);
8891                 /*
8892                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8893                  * we need to record it for inode/file extent rebuild.
8894                  * For ret > 0, we record it only for file extent rebuild.
8895                  * For ret == 0, the file extent exists but only bytenr
8896                  * mismatch, let the original bytenr fix routine to handle,
8897                  * don't record it.
8898                  */
8899                 if (ret == 0)
8900                         continue;
8901                 ret = 0;
8902                 orphan = malloc(sizeof(*orphan));
8903                 if (!orphan) {
8904                         ret = -ENOMEM;
8905                         goto out;
8906                 }
8907                 INIT_LIST_HEAD(&orphan->list);
8908                 orphan->root = dback->root;
8909                 orphan->objectid = dback->owner;
8910                 orphan->offset = dback->offset;
8911                 orphan->disk_bytenr = rec->cache.start;
8912                 orphan->disk_len = rec->cache.size;
8913                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8914                 recorded_data_ref = 1;
8915         }
8916 out:
8917         btrfs_release_path(&path);
8918         if (!ret)
8919                 return !recorded_data_ref;
8920         else
8921                 return ret;
8922 }
8923
8924 /*
8925  * when an incorrect extent item is found, this will delete
8926  * all of the existing entries for it and recreate them
8927  * based on what the tree scan found.
8928  */
8929 static int fixup_extent_refs(struct btrfs_fs_info *info,
8930                              struct cache_tree *extent_cache,
8931                              struct extent_record *rec)
8932 {
8933         struct btrfs_trans_handle *trans = NULL;
8934         int ret;
8935         struct btrfs_path path;
8936         struct list_head *cur = rec->backrefs.next;
8937         struct cache_extent *cache;
8938         struct extent_backref *back;
8939         int allocated = 0;
8940         u64 flags = 0;
8941
8942         if (rec->flag_block_full_backref)
8943                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8944
8945         btrfs_init_path(&path);
8946         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8947                 /*
8948                  * Sometimes the backrefs themselves are so broken they don't
8949                  * get attached to any meaningful rec, so first go back and
8950                  * check any of our backrefs that we couldn't find and throw
8951                  * them into the list if we find the backref so that
8952                  * verify_backrefs can figure out what to do.
8953                  */
8954                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8955                 if (ret < 0)
8956                         goto out;
8957         }
8958
8959         /* step one, make sure all of the backrefs agree */
8960         ret = verify_backrefs(info, &path, rec);
8961         if (ret < 0)
8962                 goto out;
8963
8964         trans = btrfs_start_transaction(info->extent_root, 1);
8965         if (IS_ERR(trans)) {
8966                 ret = PTR_ERR(trans);
8967                 goto out;
8968         }
8969
8970         /* step two, delete all the existing records */
8971         ret = delete_extent_records(trans, info->extent_root, &path,
8972                                     rec->start);
8973
8974         if (ret < 0)
8975                 goto out;
8976
8977         /* was this block corrupt?  If so, don't add references to it */
8978         cache = lookup_cache_extent(info->corrupt_blocks,
8979                                     rec->start, rec->max_size);
8980         if (cache) {
8981                 ret = 0;
8982                 goto out;
8983         }
8984
8985         /* step three, recreate all the refs we did find */
8986         while(cur != &rec->backrefs) {
8987                 back = to_extent_backref(cur);
8988                 cur = cur->next;
8989
8990                 /*
8991                  * if we didn't find any references, don't create a
8992                  * new extent record
8993                  */
8994                 if (!back->found_ref)
8995                         continue;
8996
8997                 rec->bad_full_backref = 0;
8998                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8999                 allocated = 1;
9000
9001                 if (ret)
9002                         goto out;
9003         }
9004 out:
9005         if (trans) {
9006                 int err = btrfs_commit_transaction(trans, info->extent_root);
9007                 if (!ret)
9008                         ret = err;
9009         }
9010
9011         if (!ret)
9012                 fprintf(stderr, "Repaired extent references for %llu\n",
9013                                 (unsigned long long)rec->start);
9014
9015         btrfs_release_path(&path);
9016         return ret;
9017 }
9018
9019 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9020                               struct extent_record *rec)
9021 {
9022         struct btrfs_trans_handle *trans;
9023         struct btrfs_root *root = fs_info->extent_root;
9024         struct btrfs_path path;
9025         struct btrfs_extent_item *ei;
9026         struct btrfs_key key;
9027         u64 flags;
9028         int ret = 0;
9029
9030         key.objectid = rec->start;
9031         if (rec->metadata) {
9032                 key.type = BTRFS_METADATA_ITEM_KEY;
9033                 key.offset = rec->info_level;
9034         } else {
9035                 key.type = BTRFS_EXTENT_ITEM_KEY;
9036                 key.offset = rec->max_size;
9037         }
9038
9039         trans = btrfs_start_transaction(root, 0);
9040         if (IS_ERR(trans))
9041                 return PTR_ERR(trans);
9042
9043         btrfs_init_path(&path);
9044         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9045         if (ret < 0) {
9046                 btrfs_release_path(&path);
9047                 btrfs_commit_transaction(trans, root);
9048                 return ret;
9049         } else if (ret) {
9050                 fprintf(stderr, "Didn't find extent for %llu\n",
9051                         (unsigned long long)rec->start);
9052                 btrfs_release_path(&path);
9053                 btrfs_commit_transaction(trans, root);
9054                 return -ENOENT;
9055         }
9056
9057         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9058                             struct btrfs_extent_item);
9059         flags = btrfs_extent_flags(path.nodes[0], ei);
9060         if (rec->flag_block_full_backref) {
9061                 fprintf(stderr, "setting full backref on %llu\n",
9062                         (unsigned long long)key.objectid);
9063                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9064         } else {
9065                 fprintf(stderr, "clearing full backref on %llu\n",
9066                         (unsigned long long)key.objectid);
9067                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9068         }
9069         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9070         btrfs_mark_buffer_dirty(path.nodes[0]);
9071         btrfs_release_path(&path);
9072         ret = btrfs_commit_transaction(trans, root);
9073         if (!ret)
9074                 fprintf(stderr, "Repaired extent flags for %llu\n",
9075                                 (unsigned long long)rec->start);
9076
9077         return ret;
9078 }
9079
9080 /* right now we only prune from the extent allocation tree */
9081 static int prune_one_block(struct btrfs_trans_handle *trans,
9082                            struct btrfs_fs_info *info,
9083                            struct btrfs_corrupt_block *corrupt)
9084 {
9085         int ret;
9086         struct btrfs_path path;
9087         struct extent_buffer *eb;
9088         u64 found;
9089         int slot;
9090         int nritems;
9091         int level = corrupt->level + 1;
9092
9093         btrfs_init_path(&path);
9094 again:
9095         /* we want to stop at the parent to our busted block */
9096         path.lowest_level = level;
9097
9098         ret = btrfs_search_slot(trans, info->extent_root,
9099                                 &corrupt->key, &path, -1, 1);
9100
9101         if (ret < 0)
9102                 goto out;
9103
9104         eb = path.nodes[level];
9105         if (!eb) {
9106                 ret = -ENOENT;
9107                 goto out;
9108         }
9109
9110         /*
9111          * hopefully the search gave us the block we want to prune,
9112          * lets try that first
9113          */
9114         slot = path.slots[level];
9115         found =  btrfs_node_blockptr(eb, slot);
9116         if (found == corrupt->cache.start)
9117                 goto del_ptr;
9118
9119         nritems = btrfs_header_nritems(eb);
9120
9121         /* the search failed, lets scan this node and hope we find it */
9122         for (slot = 0; slot < nritems; slot++) {
9123                 found =  btrfs_node_blockptr(eb, slot);
9124                 if (found == corrupt->cache.start)
9125                         goto del_ptr;
9126         }
9127         /*
9128          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9129          * to this block
9130          */
9131         if (eb == info->extent_root->node) {
9132                 ret = -ENOENT;
9133                 goto out;
9134         } else {
9135                 level++;
9136                 btrfs_release_path(&path);
9137                 goto again;
9138         }
9139
9140 del_ptr:
9141         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9142         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9143
9144 out:
9145         btrfs_release_path(&path);
9146         return ret;
9147 }
9148
9149 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9150 {
9151         struct btrfs_trans_handle *trans = NULL;
9152         struct cache_extent *cache;
9153         struct btrfs_corrupt_block *corrupt;
9154
9155         while (1) {
9156                 cache = search_cache_extent(info->corrupt_blocks, 0);
9157                 if (!cache)
9158                         break;
9159                 if (!trans) {
9160                         trans = btrfs_start_transaction(info->extent_root, 1);
9161                         if (IS_ERR(trans))
9162                                 return PTR_ERR(trans);
9163                 }
9164                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9165                 prune_one_block(trans, info, corrupt);
9166                 remove_cache_extent(info->corrupt_blocks, cache);
9167         }
9168         if (trans)
9169                 return btrfs_commit_transaction(trans, info->extent_root);
9170         return 0;
9171 }
9172
9173 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9174 {
9175         struct btrfs_block_group_cache *cache;
9176         u64 start, end;
9177         int ret;
9178
9179         while (1) {
9180                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9181                                             &start, &end, EXTENT_DIRTY);
9182                 if (ret)
9183                         break;
9184                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9185         }
9186
9187         start = 0;
9188         while (1) {
9189                 cache = btrfs_lookup_first_block_group(fs_info, start);
9190                 if (!cache)
9191                         break;
9192                 if (cache->cached)
9193                         cache->cached = 0;
9194                 start = cache->key.objectid + cache->key.offset;
9195         }
9196 }
9197
9198 static int check_extent_refs(struct btrfs_root *root,
9199                              struct cache_tree *extent_cache)
9200 {
9201         struct extent_record *rec;
9202         struct cache_extent *cache;
9203         int ret = 0;
9204         int had_dups = 0;
9205
9206         if (repair) {
9207                 /*
9208                  * if we're doing a repair, we have to make sure
9209                  * we don't allocate from the problem extents.
9210                  * In the worst case, this will be all the
9211                  * extents in the FS
9212                  */
9213                 cache = search_cache_extent(extent_cache, 0);
9214                 while(cache) {
9215                         rec = container_of(cache, struct extent_record, cache);
9216                         set_extent_dirty(root->fs_info->excluded_extents,
9217                                          rec->start,
9218                                          rec->start + rec->max_size - 1);
9219                         cache = next_cache_extent(cache);
9220                 }
9221
9222                 /* pin down all the corrupted blocks too */
9223                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9224                 while(cache) {
9225                         set_extent_dirty(root->fs_info->excluded_extents,
9226                                          cache->start,
9227                                          cache->start + cache->size - 1);
9228                         cache = next_cache_extent(cache);
9229                 }
9230                 prune_corrupt_blocks(root->fs_info);
9231                 reset_cached_block_groups(root->fs_info);
9232         }
9233
9234         reset_cached_block_groups(root->fs_info);
9235
9236         /*
9237          * We need to delete any duplicate entries we find first otherwise we
9238          * could mess up the extent tree when we have backrefs that actually
9239          * belong to a different extent item and not the weird duplicate one.
9240          */
9241         while (repair && !list_empty(&duplicate_extents)) {
9242                 rec = to_extent_record(duplicate_extents.next);
9243                 list_del_init(&rec->list);
9244
9245                 /* Sometimes we can find a backref before we find an actual
9246                  * extent, so we need to process it a little bit to see if there
9247                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9248                  * if this is a backref screwup.  If we need to delete stuff
9249                  * process_duplicates() will return 0, otherwise it will return
9250                  * 1 and we
9251                  */
9252                 if (process_duplicates(root, extent_cache, rec))
9253                         continue;
9254                 ret = delete_duplicate_records(root, rec);
9255                 if (ret < 0)
9256                         return ret;
9257                 /*
9258                  * delete_duplicate_records will return the number of entries
9259                  * deleted, so if it's greater than 0 then we know we actually
9260                  * did something and we need to remove.
9261                  */
9262                 if (ret)
9263                         had_dups = 1;
9264         }
9265
9266         if (had_dups)
9267                 return -EAGAIN;
9268
9269         while(1) {
9270                 int cur_err = 0;
9271                 int fix = 0;
9272
9273                 cache = search_cache_extent(extent_cache, 0);
9274                 if (!cache)
9275                         break;
9276                 rec = container_of(cache, struct extent_record, cache);
9277                 if (rec->num_duplicates) {
9278                         fprintf(stderr, "extent item %llu has multiple extent "
9279                                 "items\n", (unsigned long long)rec->start);
9280                         cur_err = 1;
9281                 }
9282
9283                 if (rec->refs != rec->extent_item_refs) {
9284                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9285                                 (unsigned long long)rec->start,
9286                                 (unsigned long long)rec->nr);
9287                         fprintf(stderr, "extent item %llu, found %llu\n",
9288                                 (unsigned long long)rec->extent_item_refs,
9289                                 (unsigned long long)rec->refs);
9290                         ret = record_orphan_data_extents(root->fs_info, rec);
9291                         if (ret < 0)
9292                                 goto repair_abort;
9293                         fix = ret;
9294                         cur_err = 1;
9295                 }
9296                 if (all_backpointers_checked(rec, 1)) {
9297                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9298                                 (unsigned long long)rec->start,
9299                                 (unsigned long long)rec->nr);
9300                         fix = 1;
9301                         cur_err = 1;
9302                 }
9303                 if (!rec->owner_ref_checked) {
9304                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9305                                 (unsigned long long)rec->start,
9306                                 (unsigned long long)rec->nr);
9307                         fix = 1;
9308                         cur_err = 1;
9309                 }
9310
9311                 if (repair && fix) {
9312                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9313                         if (ret)
9314                                 goto repair_abort;
9315                 }
9316
9317
9318                 if (rec->bad_full_backref) {
9319                         fprintf(stderr, "bad full backref, on [%llu]\n",
9320                                 (unsigned long long)rec->start);
9321                         if (repair) {
9322                                 ret = fixup_extent_flags(root->fs_info, rec);
9323                                 if (ret)
9324                                         goto repair_abort;
9325                                 fix = 1;
9326                         }
9327                         cur_err = 1;
9328                 }
9329                 /*
9330                  * Although it's not a extent ref's problem, we reuse this
9331                  * routine for error reporting.
9332                  * No repair function yet.
9333                  */
9334                 if (rec->crossing_stripes) {
9335                         fprintf(stderr,
9336                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9337                                 rec->start, rec->start + rec->max_size);
9338                         cur_err = 1;
9339                 }
9340
9341                 if (rec->wrong_chunk_type) {
9342                         fprintf(stderr,
9343                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9344                                 rec->start, rec->start + rec->max_size);
9345                         cur_err = 1;
9346                 }
9347
9348                 remove_cache_extent(extent_cache, cache);
9349                 free_all_extent_backrefs(rec);
9350                 if (!init_extent_tree && repair && (!cur_err || fix))
9351                         clear_extent_dirty(root->fs_info->excluded_extents,
9352                                            rec->start,
9353                                            rec->start + rec->max_size - 1);
9354                 free(rec);
9355         }
9356 repair_abort:
9357         if (repair) {
9358                 if (ret && ret != -EAGAIN) {
9359                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9360                         exit(1);
9361                 } else if (!ret) {
9362                         struct btrfs_trans_handle *trans;
9363
9364                         root = root->fs_info->extent_root;
9365                         trans = btrfs_start_transaction(root, 1);
9366                         if (IS_ERR(trans)) {
9367                                 ret = PTR_ERR(trans);
9368                                 goto repair_abort;
9369                         }
9370
9371                         btrfs_fix_block_accounting(trans, root);
9372                         ret = btrfs_commit_transaction(trans, root);
9373                         if (ret)
9374                                 goto repair_abort;
9375                 }
9376                 return ret;
9377         }
9378         return 0;
9379 }
9380
9381 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9382 {
9383         u64 stripe_size;
9384
9385         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9386                 stripe_size = length;
9387                 stripe_size /= num_stripes;
9388         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9389                 stripe_size = length * 2;
9390                 stripe_size /= num_stripes;
9391         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9392                 stripe_size = length;
9393                 stripe_size /= (num_stripes - 1);
9394         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9395                 stripe_size = length;
9396                 stripe_size /= (num_stripes - 2);
9397         } else {
9398                 stripe_size = length;
9399         }
9400         return stripe_size;
9401 }
9402
9403 /*
9404  * Check the chunk with its block group/dev list ref:
9405  * Return 0 if all refs seems valid.
9406  * Return 1 if part of refs seems valid, need later check for rebuild ref
9407  * like missing block group and needs to search extent tree to rebuild them.
9408  * Return -1 if essential refs are missing and unable to rebuild.
9409  */
9410 static int check_chunk_refs(struct chunk_record *chunk_rec,
9411                             struct block_group_tree *block_group_cache,
9412                             struct device_extent_tree *dev_extent_cache,
9413                             int silent)
9414 {
9415         struct cache_extent *block_group_item;
9416         struct block_group_record *block_group_rec;
9417         struct cache_extent *dev_extent_item;
9418         struct device_extent_record *dev_extent_rec;
9419         u64 devid;
9420         u64 offset;
9421         u64 length;
9422         int metadump_v2 = 0;
9423         int i;
9424         int ret = 0;
9425
9426         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9427                                                chunk_rec->offset,
9428                                                chunk_rec->length);
9429         if (block_group_item) {
9430                 block_group_rec = container_of(block_group_item,
9431                                                struct block_group_record,
9432                                                cache);
9433                 if (chunk_rec->length != block_group_rec->offset ||
9434                     chunk_rec->offset != block_group_rec->objectid ||
9435                     (!metadump_v2 &&
9436                      chunk_rec->type_flags != block_group_rec->flags)) {
9437                         if (!silent)
9438                                 fprintf(stderr,
9439                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9440                                         chunk_rec->objectid,
9441                                         chunk_rec->type,
9442                                         chunk_rec->offset,
9443                                         chunk_rec->length,
9444                                         chunk_rec->offset,
9445                                         chunk_rec->type_flags,
9446                                         block_group_rec->objectid,
9447                                         block_group_rec->type,
9448                                         block_group_rec->offset,
9449                                         block_group_rec->offset,
9450                                         block_group_rec->objectid,
9451                                         block_group_rec->flags);
9452                         ret = -1;
9453                 } else {
9454                         list_del_init(&block_group_rec->list);
9455                         chunk_rec->bg_rec = block_group_rec;
9456                 }
9457         } else {
9458                 if (!silent)
9459                         fprintf(stderr,
9460                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9461                                 chunk_rec->objectid,
9462                                 chunk_rec->type,
9463                                 chunk_rec->offset,
9464                                 chunk_rec->length,
9465                                 chunk_rec->offset,
9466                                 chunk_rec->type_flags);
9467                 ret = 1;
9468         }
9469
9470         if (metadump_v2)
9471                 return ret;
9472
9473         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9474                                     chunk_rec->num_stripes);
9475         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9476                 devid = chunk_rec->stripes[i].devid;
9477                 offset = chunk_rec->stripes[i].offset;
9478                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9479                                                        devid, offset, length);
9480                 if (dev_extent_item) {
9481                         dev_extent_rec = container_of(dev_extent_item,
9482                                                 struct device_extent_record,
9483                                                 cache);
9484                         if (dev_extent_rec->objectid != devid ||
9485                             dev_extent_rec->offset != offset ||
9486                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9487                             dev_extent_rec->length != length) {
9488                                 if (!silent)
9489                                         fprintf(stderr,
9490                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9491                                                 chunk_rec->objectid,
9492                                                 chunk_rec->type,
9493                                                 chunk_rec->offset,
9494                                                 chunk_rec->stripes[i].devid,
9495                                                 chunk_rec->stripes[i].offset,
9496                                                 dev_extent_rec->objectid,
9497                                                 dev_extent_rec->offset,
9498                                                 dev_extent_rec->length);
9499                                 ret = -1;
9500                         } else {
9501                                 list_move(&dev_extent_rec->chunk_list,
9502                                           &chunk_rec->dextents);
9503                         }
9504                 } else {
9505                         if (!silent)
9506                                 fprintf(stderr,
9507                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9508                                         chunk_rec->objectid,
9509                                         chunk_rec->type,
9510                                         chunk_rec->offset,
9511                                         chunk_rec->stripes[i].devid,
9512                                         chunk_rec->stripes[i].offset);
9513                         ret = -1;
9514                 }
9515         }
9516         return ret;
9517 }
9518
9519 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9520 int check_chunks(struct cache_tree *chunk_cache,
9521                  struct block_group_tree *block_group_cache,
9522                  struct device_extent_tree *dev_extent_cache,
9523                  struct list_head *good, struct list_head *bad,
9524                  struct list_head *rebuild, int silent)
9525 {
9526         struct cache_extent *chunk_item;
9527         struct chunk_record *chunk_rec;
9528         struct block_group_record *bg_rec;
9529         struct device_extent_record *dext_rec;
9530         int err;
9531         int ret = 0;
9532
9533         chunk_item = first_cache_extent(chunk_cache);
9534         while (chunk_item) {
9535                 chunk_rec = container_of(chunk_item, struct chunk_record,
9536                                          cache);
9537                 err = check_chunk_refs(chunk_rec, block_group_cache,
9538                                        dev_extent_cache, silent);
9539                 if (err < 0)
9540                         ret = err;
9541                 if (err == 0 && good)
9542                         list_add_tail(&chunk_rec->list, good);
9543                 if (err > 0 && rebuild)
9544                         list_add_tail(&chunk_rec->list, rebuild);
9545                 if (err < 0 && bad)
9546                         list_add_tail(&chunk_rec->list, bad);
9547                 chunk_item = next_cache_extent(chunk_item);
9548         }
9549
9550         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9551                 if (!silent)
9552                         fprintf(stderr,
9553                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9554                                 bg_rec->objectid,
9555                                 bg_rec->offset,
9556                                 bg_rec->flags);
9557                 if (!ret)
9558                         ret = 1;
9559         }
9560
9561         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9562                             chunk_list) {
9563                 if (!silent)
9564                         fprintf(stderr,
9565                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9566                                 dext_rec->objectid,
9567                                 dext_rec->offset,
9568                                 dext_rec->length);
9569                 if (!ret)
9570                         ret = 1;
9571         }
9572         return ret;
9573 }
9574
9575
9576 static int check_device_used(struct device_record *dev_rec,
9577                              struct device_extent_tree *dext_cache)
9578 {
9579         struct cache_extent *cache;
9580         struct device_extent_record *dev_extent_rec;
9581         u64 total_byte = 0;
9582
9583         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9584         while (cache) {
9585                 dev_extent_rec = container_of(cache,
9586                                               struct device_extent_record,
9587                                               cache);
9588                 if (dev_extent_rec->objectid != dev_rec->devid)
9589                         break;
9590
9591                 list_del_init(&dev_extent_rec->device_list);
9592                 total_byte += dev_extent_rec->length;
9593                 cache = next_cache_extent(cache);
9594         }
9595
9596         if (total_byte != dev_rec->byte_used) {
9597                 fprintf(stderr,
9598                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9599                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9600                         dev_rec->type, dev_rec->offset);
9601                 return -1;
9602         } else {
9603                 return 0;
9604         }
9605 }
9606
9607 /* check btrfs_dev_item -> btrfs_dev_extent */
9608 static int check_devices(struct rb_root *dev_cache,
9609                          struct device_extent_tree *dev_extent_cache)
9610 {
9611         struct rb_node *dev_node;
9612         struct device_record *dev_rec;
9613         struct device_extent_record *dext_rec;
9614         int err;
9615         int ret = 0;
9616
9617         dev_node = rb_first(dev_cache);
9618         while (dev_node) {
9619                 dev_rec = container_of(dev_node, struct device_record, node);
9620                 err = check_device_used(dev_rec, dev_extent_cache);
9621                 if (err)
9622                         ret = err;
9623
9624                 dev_node = rb_next(dev_node);
9625         }
9626         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9627                             device_list) {
9628                 fprintf(stderr,
9629                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9630                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9631                 if (!ret)
9632                         ret = 1;
9633         }
9634         return ret;
9635 }
9636
9637 static int add_root_item_to_list(struct list_head *head,
9638                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9639                                   u8 level, u8 drop_level,
9640                                   int level_size, struct btrfs_key *drop_key)
9641 {
9642
9643         struct root_item_record *ri_rec;
9644         ri_rec = malloc(sizeof(*ri_rec));
9645         if (!ri_rec)
9646                 return -ENOMEM;
9647         ri_rec->bytenr = bytenr;
9648         ri_rec->objectid = objectid;
9649         ri_rec->level = level;
9650         ri_rec->level_size = level_size;
9651         ri_rec->drop_level = drop_level;
9652         ri_rec->last_snapshot = last_snapshot;
9653         if (drop_key)
9654                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9655         list_add_tail(&ri_rec->list, head);
9656
9657         return 0;
9658 }
9659
9660 static void free_root_item_list(struct list_head *list)
9661 {
9662         struct root_item_record *ri_rec;
9663
9664         while (!list_empty(list)) {
9665                 ri_rec = list_first_entry(list, struct root_item_record,
9666                                           list);
9667                 list_del_init(&ri_rec->list);
9668                 free(ri_rec);
9669         }
9670 }
9671
9672 static int deal_root_from_list(struct list_head *list,
9673                                struct btrfs_root *root,
9674                                struct block_info *bits,
9675                                int bits_nr,
9676                                struct cache_tree *pending,
9677                                struct cache_tree *seen,
9678                                struct cache_tree *reada,
9679                                struct cache_tree *nodes,
9680                                struct cache_tree *extent_cache,
9681                                struct cache_tree *chunk_cache,
9682                                struct rb_root *dev_cache,
9683                                struct block_group_tree *block_group_cache,
9684                                struct device_extent_tree *dev_extent_cache)
9685 {
9686         int ret = 0;
9687         u64 last;
9688
9689         while (!list_empty(list)) {
9690                 struct root_item_record *rec;
9691                 struct extent_buffer *buf;
9692                 rec = list_entry(list->next,
9693                                  struct root_item_record, list);
9694                 last = 0;
9695                 buf = read_tree_block(root->fs_info->tree_root,
9696                                       rec->bytenr, rec->level_size, 0);
9697                 if (!extent_buffer_uptodate(buf)) {
9698                         free_extent_buffer(buf);
9699                         ret = -EIO;
9700                         break;
9701                 }
9702                 ret = add_root_to_pending(buf, extent_cache, pending,
9703                                     seen, nodes, rec->objectid);
9704                 if (ret < 0)
9705                         break;
9706                 /*
9707                  * To rebuild extent tree, we need deal with snapshot
9708                  * one by one, otherwise we deal with node firstly which
9709                  * can maximize readahead.
9710                  */
9711                 while (1) {
9712                         ret = run_next_block(root, bits, bits_nr, &last,
9713                                              pending, seen, reada, nodes,
9714                                              extent_cache, chunk_cache,
9715                                              dev_cache, block_group_cache,
9716                                              dev_extent_cache, rec);
9717                         if (ret != 0)
9718                                 break;
9719                 }
9720                 free_extent_buffer(buf);
9721                 list_del(&rec->list);
9722                 free(rec);
9723                 if (ret < 0)
9724                         break;
9725         }
9726         while (ret >= 0) {
9727                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9728                                      reada, nodes, extent_cache, chunk_cache,
9729                                      dev_cache, block_group_cache,
9730                                      dev_extent_cache, NULL);
9731                 if (ret != 0) {
9732                         if (ret > 0)
9733                                 ret = 0;
9734                         break;
9735                 }
9736         }
9737         return ret;
9738 }
9739
9740 static int check_chunks_and_extents(struct btrfs_root *root)
9741 {
9742         struct rb_root dev_cache;
9743         struct cache_tree chunk_cache;
9744         struct block_group_tree block_group_cache;
9745         struct device_extent_tree dev_extent_cache;
9746         struct cache_tree extent_cache;
9747         struct cache_tree seen;
9748         struct cache_tree pending;
9749         struct cache_tree reada;
9750         struct cache_tree nodes;
9751         struct extent_io_tree excluded_extents;
9752         struct cache_tree corrupt_blocks;
9753         struct btrfs_path path;
9754         struct btrfs_key key;
9755         struct btrfs_key found_key;
9756         int ret, err = 0;
9757         struct block_info *bits;
9758         int bits_nr;
9759         struct extent_buffer *leaf;
9760         int slot;
9761         struct btrfs_root_item ri;
9762         struct list_head dropping_trees;
9763         struct list_head normal_trees;
9764         struct btrfs_root *root1;
9765         u64 objectid;
9766         u32 level_size;
9767         u8 level;
9768
9769         dev_cache = RB_ROOT;
9770         cache_tree_init(&chunk_cache);
9771         block_group_tree_init(&block_group_cache);
9772         device_extent_tree_init(&dev_extent_cache);
9773
9774         cache_tree_init(&extent_cache);
9775         cache_tree_init(&seen);
9776         cache_tree_init(&pending);
9777         cache_tree_init(&nodes);
9778         cache_tree_init(&reada);
9779         cache_tree_init(&corrupt_blocks);
9780         extent_io_tree_init(&excluded_extents);
9781         INIT_LIST_HEAD(&dropping_trees);
9782         INIT_LIST_HEAD(&normal_trees);
9783
9784         if (repair) {
9785                 root->fs_info->excluded_extents = &excluded_extents;
9786                 root->fs_info->fsck_extent_cache = &extent_cache;
9787                 root->fs_info->free_extent_hook = free_extent_hook;
9788                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9789         }
9790
9791         bits_nr = 1024;
9792         bits = malloc(bits_nr * sizeof(struct block_info));
9793         if (!bits) {
9794                 perror("malloc");
9795                 exit(1);
9796         }
9797
9798         if (ctx.progress_enabled) {
9799                 ctx.tp = TASK_EXTENTS;
9800                 task_start(ctx.info);
9801         }
9802
9803 again:
9804         root1 = root->fs_info->tree_root;
9805         level = btrfs_header_level(root1->node);
9806         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9807                                     root1->node->start, 0, level, 0,
9808                                     root1->nodesize, NULL);
9809         if (ret < 0)
9810                 goto out;
9811         root1 = root->fs_info->chunk_root;
9812         level = btrfs_header_level(root1->node);
9813         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9814                                     root1->node->start, 0, level, 0,
9815                                     root1->nodesize, NULL);
9816         if (ret < 0)
9817                 goto out;
9818         btrfs_init_path(&path);
9819         key.offset = 0;
9820         key.objectid = 0;
9821         key.type = BTRFS_ROOT_ITEM_KEY;
9822         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9823                                         &key, &path, 0, 0);
9824         if (ret < 0)
9825                 goto out;
9826         while(1) {
9827                 leaf = path.nodes[0];
9828                 slot = path.slots[0];
9829                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9830                         ret = btrfs_next_leaf(root, &path);
9831                         if (ret != 0)
9832                                 break;
9833                         leaf = path.nodes[0];
9834                         slot = path.slots[0];
9835                 }
9836                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9837                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9838                         unsigned long offset;
9839                         u64 last_snapshot;
9840
9841                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9842                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9843                         last_snapshot = btrfs_root_last_snapshot(&ri);
9844                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9845                                 level = btrfs_root_level(&ri);
9846                                 level_size = root->nodesize;
9847                                 ret = add_root_item_to_list(&normal_trees,
9848                                                 found_key.objectid,
9849                                                 btrfs_root_bytenr(&ri),
9850                                                 last_snapshot, level,
9851                                                 0, level_size, NULL);
9852                                 if (ret < 0)
9853                                         goto out;
9854                         } else {
9855                                 level = btrfs_root_level(&ri);
9856                                 level_size = root->nodesize;
9857                                 objectid = found_key.objectid;
9858                                 btrfs_disk_key_to_cpu(&found_key,
9859                                                       &ri.drop_progress);
9860                                 ret = add_root_item_to_list(&dropping_trees,
9861                                                 objectid,
9862                                                 btrfs_root_bytenr(&ri),
9863                                                 last_snapshot, level,
9864                                                 ri.drop_level,
9865                                                 level_size, &found_key);
9866                                 if (ret < 0)
9867                                         goto out;
9868                         }
9869                 }
9870                 path.slots[0]++;
9871         }
9872         btrfs_release_path(&path);
9873
9874         /*
9875          * check_block can return -EAGAIN if it fixes something, please keep
9876          * this in mind when dealing with return values from these functions, if
9877          * we get -EAGAIN we want to fall through and restart the loop.
9878          */
9879         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9880                                   &seen, &reada, &nodes, &extent_cache,
9881                                   &chunk_cache, &dev_cache, &block_group_cache,
9882                                   &dev_extent_cache);
9883         if (ret < 0) {
9884                 if (ret == -EAGAIN)
9885                         goto loop;
9886                 goto out;
9887         }
9888         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9889                                   &pending, &seen, &reada, &nodes,
9890                                   &extent_cache, &chunk_cache, &dev_cache,
9891                                   &block_group_cache, &dev_extent_cache);
9892         if (ret < 0) {
9893                 if (ret == -EAGAIN)
9894                         goto loop;
9895                 goto out;
9896         }
9897
9898         ret = check_chunks(&chunk_cache, &block_group_cache,
9899                            &dev_extent_cache, NULL, NULL, NULL, 0);
9900         if (ret) {
9901                 if (ret == -EAGAIN)
9902                         goto loop;
9903                 err = ret;
9904         }
9905
9906         ret = check_extent_refs(root, &extent_cache);
9907         if (ret < 0) {
9908                 if (ret == -EAGAIN)
9909                         goto loop;
9910                 goto out;
9911         }
9912
9913         ret = check_devices(&dev_cache, &dev_extent_cache);
9914         if (ret && err)
9915                 ret = err;
9916
9917 out:
9918         task_stop(ctx.info);
9919         if (repair) {
9920                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9921                 extent_io_tree_cleanup(&excluded_extents);
9922                 root->fs_info->fsck_extent_cache = NULL;
9923                 root->fs_info->free_extent_hook = NULL;
9924                 root->fs_info->corrupt_blocks = NULL;
9925                 root->fs_info->excluded_extents = NULL;
9926         }
9927         free(bits);
9928         free_chunk_cache_tree(&chunk_cache);
9929         free_device_cache_tree(&dev_cache);
9930         free_block_group_tree(&block_group_cache);
9931         free_device_extent_tree(&dev_extent_cache);
9932         free_extent_cache_tree(&seen);
9933         free_extent_cache_tree(&pending);
9934         free_extent_cache_tree(&reada);
9935         free_extent_cache_tree(&nodes);
9936         return ret;
9937 loop:
9938         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9939         free_extent_cache_tree(&seen);
9940         free_extent_cache_tree(&pending);
9941         free_extent_cache_tree(&reada);
9942         free_extent_cache_tree(&nodes);
9943         free_chunk_cache_tree(&chunk_cache);
9944         free_block_group_tree(&block_group_cache);
9945         free_device_cache_tree(&dev_cache);
9946         free_device_extent_tree(&dev_extent_cache);
9947         free_extent_record_cache(&extent_cache);
9948         free_root_item_list(&normal_trees);
9949         free_root_item_list(&dropping_trees);
9950         extent_io_tree_cleanup(&excluded_extents);
9951         goto again;
9952 }
9953
9954 /*
9955  * Check backrefs of a tree block given by @bytenr or @eb.
9956  *
9957  * @root:       the root containing the @bytenr or @eb
9958  * @eb:         tree block extent buffer, can be NULL
9959  * @bytenr:     bytenr of the tree block to search
9960  * @level:      tree level of the tree block
9961  * @owner:      owner of the tree block
9962  *
9963  * Return >0 for any error found and output error message
9964  * Return 0 for no error found
9965  */
9966 static int check_tree_block_ref(struct btrfs_root *root,
9967                                 struct extent_buffer *eb, u64 bytenr,
9968                                 int level, u64 owner)
9969 {
9970         struct btrfs_key key;
9971         struct btrfs_root *extent_root = root->fs_info->extent_root;
9972         struct btrfs_path path;
9973         struct btrfs_extent_item *ei;
9974         struct btrfs_extent_inline_ref *iref;
9975         struct extent_buffer *leaf;
9976         unsigned long end;
9977         unsigned long ptr;
9978         int slot;
9979         int skinny_level;
9980         int type;
9981         u32 nodesize = root->nodesize;
9982         u32 item_size;
9983         u64 offset;
9984         int tree_reloc_root = 0;
9985         int found_ref = 0;
9986         int err = 0;
9987         int ret;
9988
9989         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9990             btrfs_header_bytenr(root->node) == bytenr)
9991                 tree_reloc_root = 1;
9992
9993         btrfs_init_path(&path);
9994         key.objectid = bytenr;
9995         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9996                 key.type = BTRFS_METADATA_ITEM_KEY;
9997         else
9998                 key.type = BTRFS_EXTENT_ITEM_KEY;
9999         key.offset = (u64)-1;
10000
10001         /* Search for the backref in extent tree */
10002         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10003         if (ret < 0) {
10004                 err |= BACKREF_MISSING;
10005                 goto out;
10006         }
10007         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10008         if (ret) {
10009                 err |= BACKREF_MISSING;
10010                 goto out;
10011         }
10012
10013         leaf = path.nodes[0];
10014         slot = path.slots[0];
10015         btrfs_item_key_to_cpu(leaf, &key, slot);
10016
10017         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10018
10019         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10020                 skinny_level = (int)key.offset;
10021                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10022         } else {
10023                 struct btrfs_tree_block_info *info;
10024
10025                 info = (struct btrfs_tree_block_info *)(ei + 1);
10026                 skinny_level = btrfs_tree_block_level(leaf, info);
10027                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10028         }
10029
10030         if (eb) {
10031                 u64 header_gen;
10032                 u64 extent_gen;
10033
10034                 if (!(btrfs_extent_flags(leaf, ei) &
10035                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10036                         error(
10037                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10038                                 key.objectid, nodesize,
10039                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10040                         err = BACKREF_MISMATCH;
10041                 }
10042                 header_gen = btrfs_header_generation(eb);
10043                 extent_gen = btrfs_extent_generation(leaf, ei);
10044                 if (header_gen != extent_gen) {
10045                         error(
10046         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10047                                 key.objectid, nodesize, header_gen,
10048                                 extent_gen);
10049                         err = BACKREF_MISMATCH;
10050                 }
10051                 if (level != skinny_level) {
10052                         error(
10053                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10054                                 key.objectid, nodesize, level, skinny_level);
10055                         err = BACKREF_MISMATCH;
10056                 }
10057                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10058                         error(
10059                         "extent[%llu %u] is referred by other roots than %llu",
10060                                 key.objectid, nodesize, root->objectid);
10061                         err = BACKREF_MISMATCH;
10062                 }
10063         }
10064
10065         /*
10066          * Iterate the extent/metadata item to find the exact backref
10067          */
10068         item_size = btrfs_item_size_nr(leaf, slot);
10069         ptr = (unsigned long)iref;
10070         end = (unsigned long)ei + item_size;
10071         while (ptr < end) {
10072                 iref = (struct btrfs_extent_inline_ref *)ptr;
10073                 type = btrfs_extent_inline_ref_type(leaf, iref);
10074                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10075
10076                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10077                         (offset == root->objectid || offset == owner)) {
10078                         found_ref = 1;
10079                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10080                         /*
10081                          * Backref of tree reloc root points to itself, no need
10082                          * to check backref any more.
10083                          */
10084                         if (tree_reloc_root)
10085                                 found_ref = 1;
10086                         else
10087                         /* Check if the backref points to valid referencer */
10088                                 found_ref = !check_tree_block_ref(root, NULL,
10089                                                 offset, level + 1, owner);
10090                 }
10091
10092                 if (found_ref)
10093                         break;
10094                 ptr += btrfs_extent_inline_ref_size(type);
10095         }
10096
10097         /*
10098          * Inlined extent item doesn't have what we need, check
10099          * TREE_BLOCK_REF_KEY
10100          */
10101         if (!found_ref) {
10102                 btrfs_release_path(&path);
10103                 key.objectid = bytenr;
10104                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10105                 key.offset = root->objectid;
10106
10107                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10108                 if (!ret)
10109                         found_ref = 1;
10110         }
10111         if (!found_ref)
10112                 err |= BACKREF_MISSING;
10113 out:
10114         btrfs_release_path(&path);
10115         if (eb && (err & BACKREF_MISSING))
10116                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10117                         bytenr, nodesize, owner, level);
10118         return err;
10119 }
10120
10121 /*
10122  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10123  *
10124  * Return >0 any error found and output error message
10125  * Return 0 for no error found
10126  */
10127 static int check_extent_data_item(struct btrfs_root *root,
10128                                   struct extent_buffer *eb, int slot)
10129 {
10130         struct btrfs_file_extent_item *fi;
10131         struct btrfs_path path;
10132         struct btrfs_root *extent_root = root->fs_info->extent_root;
10133         struct btrfs_key fi_key;
10134         struct btrfs_key dbref_key;
10135         struct extent_buffer *leaf;
10136         struct btrfs_extent_item *ei;
10137         struct btrfs_extent_inline_ref *iref;
10138         struct btrfs_extent_data_ref *dref;
10139         u64 owner;
10140         u64 disk_bytenr;
10141         u64 disk_num_bytes;
10142         u64 extent_num_bytes;
10143         u64 extent_flags;
10144         u32 item_size;
10145         unsigned long end;
10146         unsigned long ptr;
10147         int type;
10148         u64 ref_root;
10149         int found_dbackref = 0;
10150         int err = 0;
10151         int ret;
10152
10153         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10154         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10155
10156         /* Nothing to check for hole and inline data extents */
10157         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10158             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10159                 return 0;
10160
10161         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10162         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10163         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10164
10165         /* Check unaligned disk_num_bytes and num_bytes */
10166         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10167                 error(
10168 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10169                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10170                         root->sectorsize);
10171                 err |= BYTES_UNALIGNED;
10172         } else {
10173                 data_bytes_allocated += disk_num_bytes;
10174         }
10175         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10176                 error(
10177 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10178                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10179                         root->sectorsize);
10180                 err |= BYTES_UNALIGNED;
10181         } else {
10182                 data_bytes_referenced += extent_num_bytes;
10183         }
10184         owner = btrfs_header_owner(eb);
10185
10186         /* Check the extent item of the file extent in extent tree */
10187         btrfs_init_path(&path);
10188         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10189         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10190         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10191
10192         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10193         if (ret) {
10194                 err |= BACKREF_MISSING;
10195                 goto error;
10196         }
10197
10198         leaf = path.nodes[0];
10199         slot = path.slots[0];
10200         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10201
10202         extent_flags = btrfs_extent_flags(leaf, ei);
10203
10204         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10205                 error(
10206                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10207                     disk_bytenr, disk_num_bytes,
10208                     BTRFS_EXTENT_FLAG_DATA);
10209                 err |= BACKREF_MISMATCH;
10210         }
10211
10212         /* Check data backref inside that extent item */
10213         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10214         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10215         ptr = (unsigned long)iref;
10216         end = (unsigned long)ei + item_size;
10217         while (ptr < end) {
10218                 iref = (struct btrfs_extent_inline_ref *)ptr;
10219                 type = btrfs_extent_inline_ref_type(leaf, iref);
10220                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10221
10222                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10223                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10224                         if (ref_root == owner || ref_root == root->objectid)
10225                                 found_dbackref = 1;
10226                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10227                         found_dbackref = !check_tree_block_ref(root, NULL,
10228                                 btrfs_extent_inline_ref_offset(leaf, iref),
10229                                 0, owner);
10230                 }
10231
10232                 if (found_dbackref)
10233                         break;
10234                 ptr += btrfs_extent_inline_ref_size(type);
10235         }
10236
10237         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10238         if (!found_dbackref) {
10239                 btrfs_release_path(&path);
10240
10241                 btrfs_init_path(&path);
10242                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10243                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10244                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10245                                 fi_key.objectid, fi_key.offset);
10246
10247                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10248                                         &dbref_key, &path, 0, 0);
10249                 if (!ret)
10250                         found_dbackref = 1;
10251         }
10252
10253         if (!found_dbackref)
10254                 err |= BACKREF_MISSING;
10255 error:
10256         btrfs_release_path(&path);
10257         if (err & BACKREF_MISSING) {
10258                 error("data extent[%llu %llu] backref lost",
10259                       disk_bytenr, disk_num_bytes);
10260         }
10261         return err;
10262 }
10263
10264 /*
10265  * Get real tree block level for the case like shared block
10266  * Return >= 0 as tree level
10267  * Return <0 for error
10268  */
10269 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10270 {
10271         struct extent_buffer *eb;
10272         struct btrfs_path path;
10273         struct btrfs_key key;
10274         struct btrfs_extent_item *ei;
10275         u64 flags;
10276         u64 transid;
10277         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10278         u8 backref_level;
10279         u8 header_level;
10280         int ret;
10281
10282         /* Search extent tree for extent generation and level */
10283         key.objectid = bytenr;
10284         key.type = BTRFS_METADATA_ITEM_KEY;
10285         key.offset = (u64)-1;
10286
10287         btrfs_init_path(&path);
10288         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10289         if (ret < 0)
10290                 goto release_out;
10291         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10292         if (ret < 0)
10293                 goto release_out;
10294         if (ret > 0) {
10295                 ret = -ENOENT;
10296                 goto release_out;
10297         }
10298
10299         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10300         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10301                             struct btrfs_extent_item);
10302         flags = btrfs_extent_flags(path.nodes[0], ei);
10303         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10304                 ret = -ENOENT;
10305                 goto release_out;
10306         }
10307
10308         /* Get transid for later read_tree_block() check */
10309         transid = btrfs_extent_generation(path.nodes[0], ei);
10310
10311         /* Get backref level as one source */
10312         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10313                 backref_level = key.offset;
10314         } else {
10315                 struct btrfs_tree_block_info *info;
10316
10317                 info = (struct btrfs_tree_block_info *)(ei + 1);
10318                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10319         }
10320         btrfs_release_path(&path);
10321
10322         /* Get level from tree block as an alternative source */
10323         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10324         if (!extent_buffer_uptodate(eb)) {
10325                 free_extent_buffer(eb);
10326                 return -EIO;
10327         }
10328         header_level = btrfs_header_level(eb);
10329         free_extent_buffer(eb);
10330
10331         if (header_level != backref_level)
10332                 return -EIO;
10333         return header_level;
10334
10335 release_out:
10336         btrfs_release_path(&path);
10337         return ret;
10338 }
10339
10340 /*
10341  * Check if a tree block backref is valid (points to a valid tree block)
10342  * if level == -1, level will be resolved
10343  * Return >0 for any error found and print error message
10344  */
10345 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10346                                     u64 bytenr, int level)
10347 {
10348         struct btrfs_root *root;
10349         struct btrfs_key key;
10350         struct btrfs_path path;
10351         struct extent_buffer *eb;
10352         struct extent_buffer *node;
10353         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10354         int err = 0;
10355         int ret;
10356
10357         /* Query level for level == -1 special case */
10358         if (level == -1)
10359                 level = query_tree_block_level(fs_info, bytenr);
10360         if (level < 0) {
10361                 err |= REFERENCER_MISSING;
10362                 goto out;
10363         }
10364
10365         key.objectid = root_id;
10366         key.type = BTRFS_ROOT_ITEM_KEY;
10367         key.offset = (u64)-1;
10368
10369         root = btrfs_read_fs_root(fs_info, &key);
10370         if (IS_ERR(root)) {
10371                 err |= REFERENCER_MISSING;
10372                 goto out;
10373         }
10374
10375         /* Read out the tree block to get item/node key */
10376         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10377         if (!extent_buffer_uptodate(eb)) {
10378                 err |= REFERENCER_MISSING;
10379                 free_extent_buffer(eb);
10380                 goto out;
10381         }
10382
10383         /* Empty tree, no need to check key */
10384         if (!btrfs_header_nritems(eb) && !level) {
10385                 free_extent_buffer(eb);
10386                 goto out;
10387         }
10388
10389         if (level)
10390                 btrfs_node_key_to_cpu(eb, &key, 0);
10391         else
10392                 btrfs_item_key_to_cpu(eb, &key, 0);
10393
10394         free_extent_buffer(eb);
10395
10396         btrfs_init_path(&path);
10397         path.lowest_level = level;
10398         /* Search with the first key, to ensure we can reach it */
10399         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10400         if (ret < 0) {
10401                 err |= REFERENCER_MISSING;
10402                 goto release_out;
10403         }
10404
10405         node = path.nodes[level];
10406         if (btrfs_header_bytenr(node) != bytenr) {
10407                 error(
10408         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10409                         bytenr, nodesize, bytenr,
10410                         btrfs_header_bytenr(node));
10411                 err |= REFERENCER_MISMATCH;
10412         }
10413         if (btrfs_header_level(node) != level) {
10414                 error(
10415         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10416                         bytenr, nodesize, level,
10417                         btrfs_header_level(node));
10418                 err |= REFERENCER_MISMATCH;
10419         }
10420
10421 release_out:
10422         btrfs_release_path(&path);
10423 out:
10424         if (err & REFERENCER_MISSING) {
10425                 if (level < 0)
10426                         error("extent [%llu %d] lost referencer (owner: %llu)",
10427                                 bytenr, nodesize, root_id);
10428                 else
10429                         error(
10430                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10431                                 bytenr, nodesize, root_id, level);
10432         }
10433
10434         return err;
10435 }
10436
10437 /*
10438  * Check if tree block @eb is tree reloc root.
10439  * Return 0 if it's not or any problem happens
10440  * Return 1 if it's a tree reloc root
10441  */
10442 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10443                                  struct extent_buffer *eb)
10444 {
10445         struct btrfs_root *tree_reloc_root;
10446         struct btrfs_key key;
10447         u64 bytenr = btrfs_header_bytenr(eb);
10448         u64 owner = btrfs_header_owner(eb);
10449         int ret = 0;
10450
10451         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10452         key.offset = owner;
10453         key.type = BTRFS_ROOT_ITEM_KEY;
10454
10455         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10456         if (IS_ERR(tree_reloc_root))
10457                 return 0;
10458
10459         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10460                 ret = 1;
10461         btrfs_free_fs_root(tree_reloc_root);
10462         return ret;
10463 }
10464
10465 /*
10466  * Check referencer for shared block backref
10467  * If level == -1, this function will resolve the level.
10468  */
10469 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10470                                      u64 parent, u64 bytenr, int level)
10471 {
10472         struct extent_buffer *eb;
10473         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10474         u32 nr;
10475         int found_parent = 0;
10476         int i;
10477
10478         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10479         if (!extent_buffer_uptodate(eb))
10480                 goto out;
10481
10482         if (level == -1)
10483                 level = query_tree_block_level(fs_info, bytenr);
10484         if (level < 0)
10485                 goto out;
10486
10487         /* It's possible it's a tree reloc root */
10488         if (parent == bytenr) {
10489                 if (is_tree_reloc_root(fs_info, eb))
10490                         found_parent = 1;
10491                 goto out;
10492         }
10493
10494         if (level + 1 != btrfs_header_level(eb))
10495                 goto out;
10496
10497         nr = btrfs_header_nritems(eb);
10498         for (i = 0; i < nr; i++) {
10499                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10500                         found_parent = 1;
10501                         break;
10502                 }
10503         }
10504 out:
10505         free_extent_buffer(eb);
10506         if (!found_parent) {
10507                 error(
10508         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10509                         bytenr, nodesize, parent, level);
10510                 return REFERENCER_MISSING;
10511         }
10512         return 0;
10513 }
10514
10515 /*
10516  * Check referencer for normal (inlined) data ref
10517  * If len == 0, it will be resolved by searching in extent tree
10518  */
10519 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10520                                      u64 root_id, u64 objectid, u64 offset,
10521                                      u64 bytenr, u64 len, u32 count)
10522 {
10523         struct btrfs_root *root;
10524         struct btrfs_root *extent_root = fs_info->extent_root;
10525         struct btrfs_key key;
10526         struct btrfs_path path;
10527         struct extent_buffer *leaf;
10528         struct btrfs_file_extent_item *fi;
10529         u32 found_count = 0;
10530         int slot;
10531         int ret = 0;
10532
10533         if (!len) {
10534                 key.objectid = bytenr;
10535                 key.type = BTRFS_EXTENT_ITEM_KEY;
10536                 key.offset = (u64)-1;
10537
10538                 btrfs_init_path(&path);
10539                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10540                 if (ret < 0)
10541                         goto out;
10542                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10543                 if (ret)
10544                         goto out;
10545                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10546                 if (key.objectid != bytenr ||
10547                     key.type != BTRFS_EXTENT_ITEM_KEY)
10548                         goto out;
10549                 len = key.offset;
10550                 btrfs_release_path(&path);
10551         }
10552         key.objectid = root_id;
10553         key.type = BTRFS_ROOT_ITEM_KEY;
10554         key.offset = (u64)-1;
10555         btrfs_init_path(&path);
10556
10557         root = btrfs_read_fs_root(fs_info, &key);
10558         if (IS_ERR(root))
10559                 goto out;
10560
10561         key.objectid = objectid;
10562         key.type = BTRFS_EXTENT_DATA_KEY;
10563         /*
10564          * It can be nasty as data backref offset is
10565          * file offset - file extent offset, which is smaller or
10566          * equal to original backref offset.  The only special case is
10567          * overflow.  So we need to special check and do further search.
10568          */
10569         key.offset = offset & (1ULL << 63) ? 0 : offset;
10570
10571         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10572         if (ret < 0)
10573                 goto out;
10574
10575         /*
10576          * Search afterwards to get correct one
10577          * NOTE: As we must do a comprehensive check on the data backref to
10578          * make sure the dref count also matches, we must iterate all file
10579          * extents for that inode.
10580          */
10581         while (1) {
10582                 leaf = path.nodes[0];
10583                 slot = path.slots[0];
10584
10585                 btrfs_item_key_to_cpu(leaf, &key, slot);
10586                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10587                         break;
10588                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10589                 /*
10590                  * Except normal disk bytenr and disk num bytes, we still
10591                  * need to do extra check on dbackref offset as
10592                  * dbackref offset = file_offset - file_extent_offset
10593                  */
10594                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10595                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10596                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10597                     offset)
10598                         found_count++;
10599
10600                 ret = btrfs_next_item(root, &path);
10601                 if (ret)
10602                         break;
10603         }
10604 out:
10605         btrfs_release_path(&path);
10606         if (found_count != count) {
10607                 error(
10608 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10609                         bytenr, len, root_id, objectid, offset, count, found_count);
10610                 return REFERENCER_MISSING;
10611         }
10612         return 0;
10613 }
10614
10615 /*
10616  * Check if the referencer of a shared data backref exists
10617  */
10618 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10619                                      u64 parent, u64 bytenr)
10620 {
10621         struct extent_buffer *eb;
10622         struct btrfs_key key;
10623         struct btrfs_file_extent_item *fi;
10624         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10625         u32 nr;
10626         int found_parent = 0;
10627         int i;
10628
10629         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10630         if (!extent_buffer_uptodate(eb))
10631                 goto out;
10632
10633         nr = btrfs_header_nritems(eb);
10634         for (i = 0; i < nr; i++) {
10635                 btrfs_item_key_to_cpu(eb, &key, i);
10636                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10637                         continue;
10638
10639                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10640                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10641                         continue;
10642
10643                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10644                         found_parent = 1;
10645                         break;
10646                 }
10647         }
10648
10649 out:
10650         free_extent_buffer(eb);
10651         if (!found_parent) {
10652                 error("shared extent %llu referencer lost (parent: %llu)",
10653                         bytenr, parent);
10654                 return REFERENCER_MISSING;
10655         }
10656         return 0;
10657 }
10658
10659 /*
10660  * This function will check a given extent item, including its backref and
10661  * itself (like crossing stripe boundary and type)
10662  *
10663  * Since we don't use extent_record anymore, introduce new error bit
10664  */
10665 static int check_extent_item(struct btrfs_fs_info *fs_info,
10666                              struct extent_buffer *eb, int slot)
10667 {
10668         struct btrfs_extent_item *ei;
10669         struct btrfs_extent_inline_ref *iref;
10670         struct btrfs_extent_data_ref *dref;
10671         unsigned long end;
10672         unsigned long ptr;
10673         int type;
10674         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10675         u32 item_size = btrfs_item_size_nr(eb, slot);
10676         u64 flags;
10677         u64 offset;
10678         int metadata = 0;
10679         int level;
10680         struct btrfs_key key;
10681         int ret;
10682         int err = 0;
10683
10684         btrfs_item_key_to_cpu(eb, &key, slot);
10685         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10686                 bytes_used += key.offset;
10687         else
10688                 bytes_used += nodesize;
10689
10690         if (item_size < sizeof(*ei)) {
10691                 /*
10692                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10693                  * old thing when on disk format is still un-determined.
10694                  * No need to care about it anymore
10695                  */
10696                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10697                 return -ENOTTY;
10698         }
10699
10700         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10701         flags = btrfs_extent_flags(eb, ei);
10702
10703         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10704                 metadata = 1;
10705         if (metadata && check_crossing_stripes(global_info, key.objectid,
10706                                                eb->len)) {
10707                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10708                       key.objectid, key.objectid + nodesize);
10709                 err |= CROSSING_STRIPE_BOUNDARY;
10710         }
10711
10712         ptr = (unsigned long)(ei + 1);
10713
10714         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10715                 /* Old EXTENT_ITEM metadata */
10716                 struct btrfs_tree_block_info *info;
10717
10718                 info = (struct btrfs_tree_block_info *)ptr;
10719                 level = btrfs_tree_block_level(eb, info);
10720                 ptr += sizeof(struct btrfs_tree_block_info);
10721         } else {
10722                 /* New METADATA_ITEM */
10723                 level = key.offset;
10724         }
10725         end = (unsigned long)ei + item_size;
10726
10727         if (ptr >= end) {
10728                 err |= ITEM_SIZE_MISMATCH;
10729                 goto out;
10730         }
10731
10732         /* Now check every backref in this extent item */
10733 next:
10734         iref = (struct btrfs_extent_inline_ref *)ptr;
10735         type = btrfs_extent_inline_ref_type(eb, iref);
10736         offset = btrfs_extent_inline_ref_offset(eb, iref);
10737         switch (type) {
10738         case BTRFS_TREE_BLOCK_REF_KEY:
10739                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10740                                                level);
10741                 err |= ret;
10742                 break;
10743         case BTRFS_SHARED_BLOCK_REF_KEY:
10744                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10745                                                  level);
10746                 err |= ret;
10747                 break;
10748         case BTRFS_EXTENT_DATA_REF_KEY:
10749                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10750                 ret = check_extent_data_backref(fs_info,
10751                                 btrfs_extent_data_ref_root(eb, dref),
10752                                 btrfs_extent_data_ref_objectid(eb, dref),
10753                                 btrfs_extent_data_ref_offset(eb, dref),
10754                                 key.objectid, key.offset,
10755                                 btrfs_extent_data_ref_count(eb, dref));
10756                 err |= ret;
10757                 break;
10758         case BTRFS_SHARED_DATA_REF_KEY:
10759                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10760                 err |= ret;
10761                 break;
10762         default:
10763                 error("extent[%llu %d %llu] has unknown ref type: %d",
10764                         key.objectid, key.type, key.offset, type);
10765                 err |= UNKNOWN_TYPE;
10766                 goto out;
10767         }
10768
10769         ptr += btrfs_extent_inline_ref_size(type);
10770         if (ptr < end)
10771                 goto next;
10772
10773 out:
10774         return err;
10775 }
10776
10777 /*
10778  * Check if a dev extent item is referred correctly by its chunk
10779  */
10780 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10781                                  struct extent_buffer *eb, int slot)
10782 {
10783         struct btrfs_root *chunk_root = fs_info->chunk_root;
10784         struct btrfs_dev_extent *ptr;
10785         struct btrfs_path path;
10786         struct btrfs_key chunk_key;
10787         struct btrfs_key devext_key;
10788         struct btrfs_chunk *chunk;
10789         struct extent_buffer *l;
10790         int num_stripes;
10791         u64 length;
10792         int i;
10793         int found_chunk = 0;
10794         int ret;
10795
10796         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10797         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10798         length = btrfs_dev_extent_length(eb, ptr);
10799
10800         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10801         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10802         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10803
10804         btrfs_init_path(&path);
10805         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10806         if (ret)
10807                 goto out;
10808
10809         l = path.nodes[0];
10810         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10811         if (btrfs_chunk_length(l, chunk) != length)
10812                 goto out;
10813
10814         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10815         for (i = 0; i < num_stripes; i++) {
10816                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10817                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10818
10819                 if (devid == devext_key.objectid &&
10820                     offset == devext_key.offset) {
10821                         found_chunk = 1;
10822                         break;
10823                 }
10824         }
10825 out:
10826         btrfs_release_path(&path);
10827         if (!found_chunk) {
10828                 error(
10829                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10830                         devext_key.objectid, devext_key.offset, length);
10831                 return REFERENCER_MISSING;
10832         }
10833         return 0;
10834 }
10835
10836 /*
10837  * Check if the used space is correct with the dev item
10838  */
10839 static int check_dev_item(struct btrfs_fs_info *fs_info,
10840                           struct extent_buffer *eb, int slot)
10841 {
10842         struct btrfs_root *dev_root = fs_info->dev_root;
10843         struct btrfs_dev_item *dev_item;
10844         struct btrfs_path path;
10845         struct btrfs_key key;
10846         struct btrfs_dev_extent *ptr;
10847         u64 dev_id;
10848         u64 used;
10849         u64 total = 0;
10850         int ret;
10851
10852         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10853         dev_id = btrfs_device_id(eb, dev_item);
10854         used = btrfs_device_bytes_used(eb, dev_item);
10855
10856         key.objectid = dev_id;
10857         key.type = BTRFS_DEV_EXTENT_KEY;
10858         key.offset = 0;
10859
10860         btrfs_init_path(&path);
10861         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10862         if (ret < 0) {
10863                 btrfs_item_key_to_cpu(eb, &key, slot);
10864                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10865                         key.objectid, key.type, key.offset);
10866                 btrfs_release_path(&path);
10867                 return REFERENCER_MISSING;
10868         }
10869
10870         /* Iterate dev_extents to calculate the used space of a device */
10871         while (1) {
10872                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10873
10874                 if (key.objectid > dev_id)
10875                         break;
10876                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10877                         goto next;
10878
10879                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10880                                      struct btrfs_dev_extent);
10881                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10882 next:
10883                 ret = btrfs_next_item(dev_root, &path);
10884                 if (ret)
10885                         break;
10886         }
10887         btrfs_release_path(&path);
10888
10889         if (used != total) {
10890                 btrfs_item_key_to_cpu(eb, &key, slot);
10891                 error(
10892 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10893                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10894                         BTRFS_DEV_EXTENT_KEY, dev_id);
10895                 return ACCOUNTING_MISMATCH;
10896         }
10897         return 0;
10898 }
10899
10900 /*
10901  * Check a block group item with its referener (chunk) and its used space
10902  * with extent/metadata item
10903  */
10904 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10905                                   struct extent_buffer *eb, int slot)
10906 {
10907         struct btrfs_root *extent_root = fs_info->extent_root;
10908         struct btrfs_root *chunk_root = fs_info->chunk_root;
10909         struct btrfs_block_group_item *bi;
10910         struct btrfs_block_group_item bg_item;
10911         struct btrfs_path path;
10912         struct btrfs_key bg_key;
10913         struct btrfs_key chunk_key;
10914         struct btrfs_key extent_key;
10915         struct btrfs_chunk *chunk;
10916         struct extent_buffer *leaf;
10917         struct btrfs_extent_item *ei;
10918         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10919         u64 flags;
10920         u64 bg_flags;
10921         u64 used;
10922         u64 total = 0;
10923         int ret;
10924         int err = 0;
10925
10926         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10927         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10928         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10929         used = btrfs_block_group_used(&bg_item);
10930         bg_flags = btrfs_block_group_flags(&bg_item);
10931
10932         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10933         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10934         chunk_key.offset = bg_key.objectid;
10935
10936         btrfs_init_path(&path);
10937         /* Search for the referencer chunk */
10938         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10939         if (ret) {
10940                 error(
10941                 "block group[%llu %llu] did not find the related chunk item",
10942                         bg_key.objectid, bg_key.offset);
10943                 err |= REFERENCER_MISSING;
10944         } else {
10945                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10946                                         struct btrfs_chunk);
10947                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10948                                                 bg_key.offset) {
10949                         error(
10950         "block group[%llu %llu] related chunk item length does not match",
10951                                 bg_key.objectid, bg_key.offset);
10952                         err |= REFERENCER_MISMATCH;
10953                 }
10954         }
10955         btrfs_release_path(&path);
10956
10957         /* Search from the block group bytenr */
10958         extent_key.objectid = bg_key.objectid;
10959         extent_key.type = 0;
10960         extent_key.offset = 0;
10961
10962         btrfs_init_path(&path);
10963         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10964         if (ret < 0)
10965                 goto out;
10966
10967         /* Iterate extent tree to account used space */
10968         while (1) {
10969                 leaf = path.nodes[0];
10970                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10971                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10972                         break;
10973
10974                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10975                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10976                         goto next;
10977                 if (extent_key.objectid < bg_key.objectid)
10978                         goto next;
10979
10980                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10981                         total += nodesize;
10982                 else
10983                         total += extent_key.offset;
10984
10985                 ei = btrfs_item_ptr(leaf, path.slots[0],
10986                                     struct btrfs_extent_item);
10987                 flags = btrfs_extent_flags(leaf, ei);
10988                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10989                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10990                                 error(
10991                         "bad extent[%llu, %llu) type mismatch with chunk",
10992                                         extent_key.objectid,
10993                                         extent_key.objectid + extent_key.offset);
10994                                 err |= CHUNK_TYPE_MISMATCH;
10995                         }
10996                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10997                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10998                                     BTRFS_BLOCK_GROUP_METADATA))) {
10999                                 error(
11000                         "bad extent[%llu, %llu) type mismatch with chunk",
11001                                         extent_key.objectid,
11002                                         extent_key.objectid + nodesize);
11003                                 err |= CHUNK_TYPE_MISMATCH;
11004                         }
11005                 }
11006 next:
11007                 ret = btrfs_next_item(extent_root, &path);
11008                 if (ret)
11009                         break;
11010         }
11011
11012 out:
11013         btrfs_release_path(&path);
11014
11015         if (total != used) {
11016                 error(
11017                 "block group[%llu %llu] used %llu but extent items used %llu",
11018                         bg_key.objectid, bg_key.offset, used, total);
11019                 err |= ACCOUNTING_MISMATCH;
11020         }
11021         return err;
11022 }
11023
11024 /*
11025  * Check a chunk item.
11026  * Including checking all referred dev_extents and block group
11027  */
11028 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11029                             struct extent_buffer *eb, int slot)
11030 {
11031         struct btrfs_root *extent_root = fs_info->extent_root;
11032         struct btrfs_root *dev_root = fs_info->dev_root;
11033         struct btrfs_path path;
11034         struct btrfs_key chunk_key;
11035         struct btrfs_key bg_key;
11036         struct btrfs_key devext_key;
11037         struct btrfs_chunk *chunk;
11038         struct extent_buffer *leaf;
11039         struct btrfs_block_group_item *bi;
11040         struct btrfs_block_group_item bg_item;
11041         struct btrfs_dev_extent *ptr;
11042         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11043         u64 length;
11044         u64 chunk_end;
11045         u64 type;
11046         u64 profile;
11047         int num_stripes;
11048         u64 offset;
11049         u64 objectid;
11050         int i;
11051         int ret;
11052         int err = 0;
11053
11054         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11055         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11056         length = btrfs_chunk_length(eb, chunk);
11057         chunk_end = chunk_key.offset + length;
11058         if (!IS_ALIGNED(length, sectorsize)) {
11059                 error("chunk[%llu %llu) not aligned to %u",
11060                         chunk_key.offset, chunk_end, sectorsize);
11061                 err |= BYTES_UNALIGNED;
11062                 goto out;
11063         }
11064
11065         type = btrfs_chunk_type(eb, chunk);
11066         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11067         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11068                 error("chunk[%llu %llu) has no chunk type",
11069                         chunk_key.offset, chunk_end);
11070                 err |= UNKNOWN_TYPE;
11071         }
11072         if (profile && (profile & (profile - 1))) {
11073                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11074                         chunk_key.offset, chunk_end, profile);
11075                 err |= UNKNOWN_TYPE;
11076         }
11077
11078         bg_key.objectid = chunk_key.offset;
11079         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11080         bg_key.offset = length;
11081
11082         btrfs_init_path(&path);
11083         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11084         if (ret) {
11085                 error(
11086                 "chunk[%llu %llu) did not find the related block group item",
11087                         chunk_key.offset, chunk_end);
11088                 err |= REFERENCER_MISSING;
11089         } else{
11090                 leaf = path.nodes[0];
11091                 bi = btrfs_item_ptr(leaf, path.slots[0],
11092                                     struct btrfs_block_group_item);
11093                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11094                                    sizeof(bg_item));
11095                 if (btrfs_block_group_flags(&bg_item) != type) {
11096                         error(
11097 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11098                                 chunk_key.offset, chunk_end, type,
11099                                 btrfs_block_group_flags(&bg_item));
11100                         err |= REFERENCER_MISSING;
11101                 }
11102         }
11103
11104         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11105         for (i = 0; i < num_stripes; i++) {
11106                 btrfs_release_path(&path);
11107                 btrfs_init_path(&path);
11108                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11109                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11110                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11111
11112                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11113                                         0, 0);
11114                 if (ret)
11115                         goto not_match_dev;
11116
11117                 leaf = path.nodes[0];
11118                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11119                                      struct btrfs_dev_extent);
11120                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11121                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11122                 if (objectid != chunk_key.objectid ||
11123                     offset != chunk_key.offset ||
11124                     btrfs_dev_extent_length(leaf, ptr) != length)
11125                         goto not_match_dev;
11126                 continue;
11127 not_match_dev:
11128                 err |= BACKREF_MISSING;
11129                 error(
11130                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11131                         chunk_key.objectid, chunk_end, i);
11132                 continue;
11133         }
11134         btrfs_release_path(&path);
11135 out:
11136         return err;
11137 }
11138
11139 /*
11140  * Main entry function to check known items and update related accounting info
11141  */
11142 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11143 {
11144         struct btrfs_fs_info *fs_info = root->fs_info;
11145         struct btrfs_key key;
11146         int slot = 0;
11147         int type;
11148         struct btrfs_extent_data_ref *dref;
11149         int ret;
11150         int err = 0;
11151
11152 next:
11153         btrfs_item_key_to_cpu(eb, &key, slot);
11154         type = key.type;
11155
11156         switch (type) {
11157         case BTRFS_EXTENT_DATA_KEY:
11158                 ret = check_extent_data_item(root, eb, slot);
11159                 err |= ret;
11160                 break;
11161         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11162                 ret = check_block_group_item(fs_info, eb, slot);
11163                 err |= ret;
11164                 break;
11165         case BTRFS_DEV_ITEM_KEY:
11166                 ret = check_dev_item(fs_info, eb, slot);
11167                 err |= ret;
11168                 break;
11169         case BTRFS_CHUNK_ITEM_KEY:
11170                 ret = check_chunk_item(fs_info, eb, slot);
11171                 err |= ret;
11172                 break;
11173         case BTRFS_DEV_EXTENT_KEY:
11174                 ret = check_dev_extent_item(fs_info, eb, slot);
11175                 err |= ret;
11176                 break;
11177         case BTRFS_EXTENT_ITEM_KEY:
11178         case BTRFS_METADATA_ITEM_KEY:
11179                 ret = check_extent_item(fs_info, eb, slot);
11180                 err |= ret;
11181                 break;
11182         case BTRFS_EXTENT_CSUM_KEY:
11183                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11184                 break;
11185         case BTRFS_TREE_BLOCK_REF_KEY:
11186                 ret = check_tree_block_backref(fs_info, key.offset,
11187                                                key.objectid, -1);
11188                 err |= ret;
11189                 break;
11190         case BTRFS_EXTENT_DATA_REF_KEY:
11191                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11192                 ret = check_extent_data_backref(fs_info,
11193                                 btrfs_extent_data_ref_root(eb, dref),
11194                                 btrfs_extent_data_ref_objectid(eb, dref),
11195                                 btrfs_extent_data_ref_offset(eb, dref),
11196                                 key.objectid, 0,
11197                                 btrfs_extent_data_ref_count(eb, dref));
11198                 err |= ret;
11199                 break;
11200         case BTRFS_SHARED_BLOCK_REF_KEY:
11201                 ret = check_shared_block_backref(fs_info, key.offset,
11202                                                  key.objectid, -1);
11203                 err |= ret;
11204                 break;
11205         case BTRFS_SHARED_DATA_REF_KEY:
11206                 ret = check_shared_data_backref(fs_info, key.offset,
11207                                                 key.objectid);
11208                 err |= ret;
11209                 break;
11210         default:
11211                 break;
11212         }
11213
11214         if (++slot < btrfs_header_nritems(eb))
11215                 goto next;
11216
11217         return err;
11218 }
11219
11220 /*
11221  * Helper function for later fs/subvol tree check.  To determine if a tree
11222  * block should be checked.
11223  * This function will ensure only the direct referencer with lowest rootid to
11224  * check a fs/subvolume tree block.
11225  *
11226  * Backref check at extent tree would detect errors like missing subvolume
11227  * tree, so we can do aggressive check to reduce duplicated checks.
11228  */
11229 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11230 {
11231         struct btrfs_root *extent_root = root->fs_info->extent_root;
11232         struct btrfs_key key;
11233         struct btrfs_path path;
11234         struct extent_buffer *leaf;
11235         int slot;
11236         struct btrfs_extent_item *ei;
11237         unsigned long ptr;
11238         unsigned long end;
11239         int type;
11240         u32 item_size;
11241         u64 offset;
11242         struct btrfs_extent_inline_ref *iref;
11243         int ret;
11244
11245         btrfs_init_path(&path);
11246         key.objectid = btrfs_header_bytenr(eb);
11247         key.type = BTRFS_METADATA_ITEM_KEY;
11248         key.offset = (u64)-1;
11249
11250         /*
11251          * Any failure in backref resolving means we can't determine
11252          * whom the tree block belongs to.
11253          * So in that case, we need to check that tree block
11254          */
11255         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11256         if (ret < 0)
11257                 goto need_check;
11258
11259         ret = btrfs_previous_extent_item(extent_root, &path,
11260                                          btrfs_header_bytenr(eb));
11261         if (ret)
11262                 goto need_check;
11263
11264         leaf = path.nodes[0];
11265         slot = path.slots[0];
11266         btrfs_item_key_to_cpu(leaf, &key, slot);
11267         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11268
11269         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11270                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11271         } else {
11272                 struct btrfs_tree_block_info *info;
11273
11274                 info = (struct btrfs_tree_block_info *)(ei + 1);
11275                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11276         }
11277
11278         item_size = btrfs_item_size_nr(leaf, slot);
11279         ptr = (unsigned long)iref;
11280         end = (unsigned long)ei + item_size;
11281         while (ptr < end) {
11282                 iref = (struct btrfs_extent_inline_ref *)ptr;
11283                 type = btrfs_extent_inline_ref_type(leaf, iref);
11284                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11285
11286                 /*
11287                  * We only check the tree block if current root is
11288                  * the lowest referencer of it.
11289                  */
11290                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11291                     offset < root->objectid) {
11292                         btrfs_release_path(&path);
11293                         return 0;
11294                 }
11295
11296                 ptr += btrfs_extent_inline_ref_size(type);
11297         }
11298         /*
11299          * Normally we should also check keyed tree block ref, but that may be
11300          * very time consuming.  Inlined ref should already make us skip a lot
11301          * of refs now.  So skip search keyed tree block ref.
11302          */
11303
11304 need_check:
11305         btrfs_release_path(&path);
11306         return 1;
11307 }
11308
11309 /*
11310  * Traversal function for tree block. We will do:
11311  * 1) Skip shared fs/subvolume tree blocks
11312  * 2) Update related bytes accounting
11313  * 3) Pre-order traversal
11314  */
11315 static int traverse_tree_block(struct btrfs_root *root,
11316                                 struct extent_buffer *node)
11317 {
11318         struct extent_buffer *eb;
11319         struct btrfs_key key;
11320         struct btrfs_key drop_key;
11321         int level;
11322         u64 nr;
11323         int i;
11324         int err = 0;
11325         int ret;
11326
11327         /*
11328          * Skip shared fs/subvolume tree block, in that case they will
11329          * be checked by referencer with lowest rootid
11330          */
11331         if (is_fstree(root->objectid) && !should_check(root, node))
11332                 return 0;
11333
11334         /* Update bytes accounting */
11335         total_btree_bytes += node->len;
11336         if (fs_root_objectid(btrfs_header_owner(node)))
11337                 total_fs_tree_bytes += node->len;
11338         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11339                 total_extent_tree_bytes += node->len;
11340         if (!found_old_backref &&
11341             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11342             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11343             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11344                 found_old_backref = 1;
11345
11346         /* pre-order tranversal, check itself first */
11347         level = btrfs_header_level(node);
11348         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11349                                    btrfs_header_level(node),
11350                                    btrfs_header_owner(node));
11351         err |= ret;
11352         if (err)
11353                 error(
11354         "check %s failed root %llu bytenr %llu level %d, force continue check",
11355                         level ? "node":"leaf", root->objectid,
11356                         btrfs_header_bytenr(node), btrfs_header_level(node));
11357
11358         if (!level) {
11359                 btree_space_waste += btrfs_leaf_free_space(root, node);
11360                 ret = check_leaf_items(root, node);
11361                 err |= ret;
11362                 return err;
11363         }
11364
11365         nr = btrfs_header_nritems(node);
11366         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11367         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11368                 sizeof(struct btrfs_key_ptr);
11369
11370         /* Then check all its children */
11371         for (i = 0; i < nr; i++) {
11372                 u64 blocknr = btrfs_node_blockptr(node, i);
11373
11374                 btrfs_node_key_to_cpu(node, &key, i);
11375                 if (level == root->root_item.drop_level &&
11376                     is_dropped_key(&key, &drop_key))
11377                         continue;
11378
11379                 /*
11380                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11381                  * to call the function itself.
11382                  */
11383                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11384                 if (extent_buffer_uptodate(eb)) {
11385                         ret = traverse_tree_block(root, eb);
11386                         err |= ret;
11387                 }
11388                 free_extent_buffer(eb);
11389         }
11390
11391         return err;
11392 }
11393
11394 /*
11395  * Low memory usage version check_chunks_and_extents.
11396  */
11397 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11398 {
11399         struct btrfs_path path;
11400         struct btrfs_key key;
11401         struct btrfs_root *root1;
11402         struct btrfs_root *cur_root;
11403         int err = 0;
11404         int ret;
11405
11406         root1 = root->fs_info->chunk_root;
11407         ret = traverse_tree_block(root1, root1->node);
11408         err |= ret;
11409
11410         root1 = root->fs_info->tree_root;
11411         ret = traverse_tree_block(root1, root1->node);
11412         err |= ret;
11413
11414         btrfs_init_path(&path);
11415         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11416         key.offset = 0;
11417         key.type = BTRFS_ROOT_ITEM_KEY;
11418
11419         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11420         if (ret) {
11421                 error("cannot find extent treet in tree_root");
11422                 goto out;
11423         }
11424
11425         while (1) {
11426                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11427                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11428                         goto next;
11429                 key.offset = (u64)-1;
11430
11431                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11432                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11433                                         &key);
11434                 else
11435                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11436                 if (IS_ERR(cur_root) || !cur_root) {
11437                         error("failed to read tree: %lld", key.objectid);
11438                         goto next;
11439                 }
11440
11441                 ret = traverse_tree_block(cur_root, cur_root->node);
11442                 err |= ret;
11443
11444                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11445                         btrfs_free_fs_root(cur_root);
11446 next:
11447                 ret = btrfs_next_item(root1, &path);
11448                 if (ret)
11449                         goto out;
11450         }
11451
11452 out:
11453         btrfs_release_path(&path);
11454         return err;
11455 }
11456
11457 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11458                            struct btrfs_root *root, int overwrite)
11459 {
11460         struct extent_buffer *c;
11461         struct extent_buffer *old = root->node;
11462         int level;
11463         int ret;
11464         struct btrfs_disk_key disk_key = {0,0,0};
11465
11466         level = 0;
11467
11468         if (overwrite) {
11469                 c = old;
11470                 extent_buffer_get(c);
11471                 goto init;
11472         }
11473         c = btrfs_alloc_free_block(trans, root,
11474                                    root->nodesize,
11475                                    root->root_key.objectid,
11476                                    &disk_key, level, 0, 0);
11477         if (IS_ERR(c)) {
11478                 c = old;
11479                 extent_buffer_get(c);
11480                 overwrite = 1;
11481         }
11482 init:
11483         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11484         btrfs_set_header_level(c, level);
11485         btrfs_set_header_bytenr(c, c->start);
11486         btrfs_set_header_generation(c, trans->transid);
11487         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11488         btrfs_set_header_owner(c, root->root_key.objectid);
11489
11490         write_extent_buffer(c, root->fs_info->fsid,
11491                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11492
11493         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11494                             btrfs_header_chunk_tree_uuid(c),
11495                             BTRFS_UUID_SIZE);
11496
11497         btrfs_mark_buffer_dirty(c);
11498         /*
11499          * this case can happen in the following case:
11500          *
11501          * 1.overwrite previous root.
11502          *
11503          * 2.reinit reloc data root, this is because we skip pin
11504          * down reloc data tree before which means we can allocate
11505          * same block bytenr here.
11506          */
11507         if (old->start == c->start) {
11508                 btrfs_set_root_generation(&root->root_item,
11509                                           trans->transid);
11510                 root->root_item.level = btrfs_header_level(root->node);
11511                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11512                                         &root->root_key, &root->root_item);
11513                 if (ret) {
11514                         free_extent_buffer(c);
11515                         return ret;
11516                 }
11517         }
11518         free_extent_buffer(old);
11519         root->node = c;
11520         add_root_to_dirty_list(root);
11521         return 0;
11522 }
11523
11524 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11525                                 struct extent_buffer *eb, int tree_root)
11526 {
11527         struct extent_buffer *tmp;
11528         struct btrfs_root_item *ri;
11529         struct btrfs_key key;
11530         u64 bytenr;
11531         u32 nodesize;
11532         int level = btrfs_header_level(eb);
11533         int nritems;
11534         int ret;
11535         int i;
11536
11537         /*
11538          * If we have pinned this block before, don't pin it again.
11539          * This can not only avoid forever loop with broken filesystem
11540          * but also give us some speedups.
11541          */
11542         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11543                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11544                 return 0;
11545
11546         btrfs_pin_extent(fs_info, eb->start, eb->len);
11547
11548         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11549         nritems = btrfs_header_nritems(eb);
11550         for (i = 0; i < nritems; i++) {
11551                 if (level == 0) {
11552                         btrfs_item_key_to_cpu(eb, &key, i);
11553                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11554                                 continue;
11555                         /* Skip the extent root and reloc roots */
11556                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11557                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11558                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11559                                 continue;
11560                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11561                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11562
11563                         /*
11564                          * If at any point we start needing the real root we
11565                          * will have to build a stump root for the root we are
11566                          * in, but for now this doesn't actually use the root so
11567                          * just pass in extent_root.
11568                          */
11569                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11570                                               nodesize, 0);
11571                         if (!extent_buffer_uptodate(tmp)) {
11572                                 fprintf(stderr, "Error reading root block\n");
11573                                 return -EIO;
11574                         }
11575                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11576                         free_extent_buffer(tmp);
11577                         if (ret)
11578                                 return ret;
11579                 } else {
11580                         bytenr = btrfs_node_blockptr(eb, i);
11581
11582                         /* If we aren't the tree root don't read the block */
11583                         if (level == 1 && !tree_root) {
11584                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11585                                 continue;
11586                         }
11587
11588                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11589                                               nodesize, 0);
11590                         if (!extent_buffer_uptodate(tmp)) {
11591                                 fprintf(stderr, "Error reading tree block\n");
11592                                 return -EIO;
11593                         }
11594                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11595                         free_extent_buffer(tmp);
11596                         if (ret)
11597                                 return ret;
11598                 }
11599         }
11600
11601         return 0;
11602 }
11603
11604 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11605 {
11606         int ret;
11607
11608         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11609         if (ret)
11610                 return ret;
11611
11612         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11613 }
11614
11615 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11616 {
11617         struct btrfs_block_group_cache *cache;
11618         struct btrfs_path path;
11619         struct extent_buffer *leaf;
11620         struct btrfs_chunk *chunk;
11621         struct btrfs_key key;
11622         int ret;
11623         u64 start;
11624
11625         btrfs_init_path(&path);
11626         key.objectid = 0;
11627         key.type = BTRFS_CHUNK_ITEM_KEY;
11628         key.offset = 0;
11629         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11630         if (ret < 0) {
11631                 btrfs_release_path(&path);
11632                 return ret;
11633         }
11634
11635         /*
11636          * We do this in case the block groups were screwed up and had alloc
11637          * bits that aren't actually set on the chunks.  This happens with
11638          * restored images every time and could happen in real life I guess.
11639          */
11640         fs_info->avail_data_alloc_bits = 0;
11641         fs_info->avail_metadata_alloc_bits = 0;
11642         fs_info->avail_system_alloc_bits = 0;
11643
11644         /* First we need to create the in-memory block groups */
11645         while (1) {
11646                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11647                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11648                         if (ret < 0) {
11649                                 btrfs_release_path(&path);
11650                                 return ret;
11651                         }
11652                         if (ret) {
11653                                 ret = 0;
11654                                 break;
11655                         }
11656                 }
11657                 leaf = path.nodes[0];
11658                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11659                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11660                         path.slots[0]++;
11661                         continue;
11662                 }
11663
11664                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11665                 btrfs_add_block_group(fs_info, 0,
11666                                       btrfs_chunk_type(leaf, chunk),
11667                                       key.objectid, key.offset,
11668                                       btrfs_chunk_length(leaf, chunk));
11669                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11670                                  key.offset + btrfs_chunk_length(leaf, chunk));
11671                 path.slots[0]++;
11672         }
11673         start = 0;
11674         while (1) {
11675                 cache = btrfs_lookup_first_block_group(fs_info, start);
11676                 if (!cache)
11677                         break;
11678                 cache->cached = 1;
11679                 start = cache->key.objectid + cache->key.offset;
11680         }
11681
11682         btrfs_release_path(&path);
11683         return 0;
11684 }
11685
11686 static int reset_balance(struct btrfs_trans_handle *trans,
11687                          struct btrfs_fs_info *fs_info)
11688 {
11689         struct btrfs_root *root = fs_info->tree_root;
11690         struct btrfs_path path;
11691         struct extent_buffer *leaf;
11692         struct btrfs_key key;
11693         int del_slot, del_nr = 0;
11694         int ret;
11695         int found = 0;
11696
11697         btrfs_init_path(&path);
11698         key.objectid = BTRFS_BALANCE_OBJECTID;
11699         key.type = BTRFS_BALANCE_ITEM_KEY;
11700         key.offset = 0;
11701         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11702         if (ret) {
11703                 if (ret > 0)
11704                         ret = 0;
11705                 if (!ret)
11706                         goto reinit_data_reloc;
11707                 else
11708                         goto out;
11709         }
11710
11711         ret = btrfs_del_item(trans, root, &path);
11712         if (ret)
11713                 goto out;
11714         btrfs_release_path(&path);
11715
11716         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11717         key.type = BTRFS_ROOT_ITEM_KEY;
11718         key.offset = 0;
11719         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11720         if (ret < 0)
11721                 goto out;
11722         while (1) {
11723                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11724                         if (!found)
11725                                 break;
11726
11727                         if (del_nr) {
11728                                 ret = btrfs_del_items(trans, root, &path,
11729                                                       del_slot, del_nr);
11730                                 del_nr = 0;
11731                                 if (ret)
11732                                         goto out;
11733                         }
11734                         key.offset++;
11735                         btrfs_release_path(&path);
11736
11737                         found = 0;
11738                         ret = btrfs_search_slot(trans, root, &key, &path,
11739                                                 -1, 1);
11740                         if (ret < 0)
11741                                 goto out;
11742                         continue;
11743                 }
11744                 found = 1;
11745                 leaf = path.nodes[0];
11746                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11747                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11748                         break;
11749                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11750                         path.slots[0]++;
11751                         continue;
11752                 }
11753                 if (!del_nr) {
11754                         del_slot = path.slots[0];
11755                         del_nr = 1;
11756                 } else {
11757                         del_nr++;
11758                 }
11759                 path.slots[0]++;
11760         }
11761
11762         if (del_nr) {
11763                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11764                 if (ret)
11765                         goto out;
11766         }
11767         btrfs_release_path(&path);
11768
11769 reinit_data_reloc:
11770         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11771         key.type = BTRFS_ROOT_ITEM_KEY;
11772         key.offset = (u64)-1;
11773         root = btrfs_read_fs_root(fs_info, &key);
11774         if (IS_ERR(root)) {
11775                 fprintf(stderr, "Error reading data reloc tree\n");
11776                 ret = PTR_ERR(root);
11777                 goto out;
11778         }
11779         record_root_in_trans(trans, root);
11780         ret = btrfs_fsck_reinit_root(trans, root, 0);
11781         if (ret)
11782                 goto out;
11783         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11784 out:
11785         btrfs_release_path(&path);
11786         return ret;
11787 }
11788
11789 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11790                               struct btrfs_fs_info *fs_info)
11791 {
11792         u64 start = 0;
11793         int ret;
11794
11795         /*
11796          * The only reason we don't do this is because right now we're just
11797          * walking the trees we find and pinning down their bytes, we don't look
11798          * at any of the leaves.  In order to do mixed groups we'd have to check
11799          * the leaves of any fs roots and pin down the bytes for any file
11800          * extents we find.  Not hard but why do it if we don't have to?
11801          */
11802         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11803                 fprintf(stderr, "We don't support re-initing the extent tree "
11804                         "for mixed block groups yet, please notify a btrfs "
11805                         "developer you want to do this so they can add this "
11806                         "functionality.\n");
11807                 return -EINVAL;
11808         }
11809
11810         /*
11811          * first we need to walk all of the trees except the extent tree and pin
11812          * down the bytes that are in use so we don't overwrite any existing
11813          * metadata.
11814          */
11815         ret = pin_metadata_blocks(fs_info);
11816         if (ret) {
11817                 fprintf(stderr, "error pinning down used bytes\n");
11818                 return ret;
11819         }
11820
11821         /*
11822          * Need to drop all the block groups since we're going to recreate all
11823          * of them again.
11824          */
11825         btrfs_free_block_groups(fs_info);
11826         ret = reset_block_groups(fs_info);
11827         if (ret) {
11828                 fprintf(stderr, "error resetting the block groups\n");
11829                 return ret;
11830         }
11831
11832         /* Ok we can allocate now, reinit the extent root */
11833         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11834         if (ret) {
11835                 fprintf(stderr, "extent root initialization failed\n");
11836                 /*
11837                  * When the transaction code is updated we should end the
11838                  * transaction, but for now progs only knows about commit so
11839                  * just return an error.
11840                  */
11841                 return ret;
11842         }
11843
11844         /*
11845          * Now we have all the in-memory block groups setup so we can make
11846          * allocations properly, and the metadata we care about is safe since we
11847          * pinned all of it above.
11848          */
11849         while (1) {
11850                 struct btrfs_block_group_cache *cache;
11851
11852                 cache = btrfs_lookup_first_block_group(fs_info, start);
11853                 if (!cache)
11854                         break;
11855                 start = cache->key.objectid + cache->key.offset;
11856                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11857                                         &cache->key, &cache->item,
11858                                         sizeof(cache->item));
11859                 if (ret) {
11860                         fprintf(stderr, "Error adding block group\n");
11861                         return ret;
11862                 }
11863                 btrfs_extent_post_op(trans, fs_info->extent_root);
11864         }
11865
11866         ret = reset_balance(trans, fs_info);
11867         if (ret)
11868                 fprintf(stderr, "error resetting the pending balance\n");
11869
11870         return ret;
11871 }
11872
11873 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11874 {
11875         struct btrfs_path path;
11876         struct btrfs_trans_handle *trans;
11877         struct btrfs_key key;
11878         int ret;
11879
11880         printf("Recowing metadata block %llu\n", eb->start);
11881         key.objectid = btrfs_header_owner(eb);
11882         key.type = BTRFS_ROOT_ITEM_KEY;
11883         key.offset = (u64)-1;
11884
11885         root = btrfs_read_fs_root(root->fs_info, &key);
11886         if (IS_ERR(root)) {
11887                 fprintf(stderr, "Couldn't find owner root %llu\n",
11888                         key.objectid);
11889                 return PTR_ERR(root);
11890         }
11891
11892         trans = btrfs_start_transaction(root, 1);
11893         if (IS_ERR(trans))
11894                 return PTR_ERR(trans);
11895
11896         btrfs_init_path(&path);
11897         path.lowest_level = btrfs_header_level(eb);
11898         if (path.lowest_level)
11899                 btrfs_node_key_to_cpu(eb, &key, 0);
11900         else
11901                 btrfs_item_key_to_cpu(eb, &key, 0);
11902
11903         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11904         btrfs_commit_transaction(trans, root);
11905         btrfs_release_path(&path);
11906         return ret;
11907 }
11908
11909 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11910 {
11911         struct btrfs_path path;
11912         struct btrfs_trans_handle *trans;
11913         struct btrfs_key key;
11914         int ret;
11915
11916         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11917                bad->key.type, bad->key.offset);
11918         key.objectid = bad->root_id;
11919         key.type = BTRFS_ROOT_ITEM_KEY;
11920         key.offset = (u64)-1;
11921
11922         root = btrfs_read_fs_root(root->fs_info, &key);
11923         if (IS_ERR(root)) {
11924                 fprintf(stderr, "Couldn't find owner root %llu\n",
11925                         key.objectid);
11926                 return PTR_ERR(root);
11927         }
11928
11929         trans = btrfs_start_transaction(root, 1);
11930         if (IS_ERR(trans))
11931                 return PTR_ERR(trans);
11932
11933         btrfs_init_path(&path);
11934         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11935         if (ret) {
11936                 if (ret > 0)
11937                         ret = 0;
11938                 goto out;
11939         }
11940         ret = btrfs_del_item(trans, root, &path);
11941 out:
11942         btrfs_commit_transaction(trans, root);
11943         btrfs_release_path(&path);
11944         return ret;
11945 }
11946
11947 static int zero_log_tree(struct btrfs_root *root)
11948 {
11949         struct btrfs_trans_handle *trans;
11950         int ret;
11951
11952         trans = btrfs_start_transaction(root, 1);
11953         if (IS_ERR(trans)) {
11954                 ret = PTR_ERR(trans);
11955                 return ret;
11956         }
11957         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11958         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11959         ret = btrfs_commit_transaction(trans, root);
11960         return ret;
11961 }
11962
11963 static int populate_csum(struct btrfs_trans_handle *trans,
11964                          struct btrfs_root *csum_root, char *buf, u64 start,
11965                          u64 len)
11966 {
11967         u64 offset = 0;
11968         u64 sectorsize;
11969         int ret = 0;
11970
11971         while (offset < len) {
11972                 sectorsize = csum_root->sectorsize;
11973                 ret = read_extent_data(csum_root, buf, start + offset,
11974                                        &sectorsize, 0);
11975                 if (ret)
11976                         break;
11977                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11978                                             start + offset, buf, sectorsize);
11979                 if (ret)
11980                         break;
11981                 offset += sectorsize;
11982         }
11983         return ret;
11984 }
11985
11986 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11987                                       struct btrfs_root *csum_root,
11988                                       struct btrfs_root *cur_root)
11989 {
11990         struct btrfs_path path;
11991         struct btrfs_key key;
11992         struct extent_buffer *node;
11993         struct btrfs_file_extent_item *fi;
11994         char *buf = NULL;
11995         u64 start = 0;
11996         u64 len = 0;
11997         int slot = 0;
11998         int ret = 0;
11999
12000         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12001         if (!buf)
12002                 return -ENOMEM;
12003
12004         btrfs_init_path(&path);
12005         key.objectid = 0;
12006         key.offset = 0;
12007         key.type = 0;
12008         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12009         if (ret < 0)
12010                 goto out;
12011         /* Iterate all regular file extents and fill its csum */
12012         while (1) {
12013                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12014
12015                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12016                         goto next;
12017                 node = path.nodes[0];
12018                 slot = path.slots[0];
12019                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12020                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12021                         goto next;
12022                 start = btrfs_file_extent_disk_bytenr(node, fi);
12023                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12024
12025                 ret = populate_csum(trans, csum_root, buf, start, len);
12026                 if (ret == -EEXIST)
12027                         ret = 0;
12028                 if (ret < 0)
12029                         goto out;
12030 next:
12031                 /*
12032                  * TODO: if next leaf is corrupted, jump to nearest next valid
12033                  * leaf.
12034                  */
12035                 ret = btrfs_next_item(cur_root, &path);
12036                 if (ret < 0)
12037                         goto out;
12038                 if (ret > 0) {
12039                         ret = 0;
12040                         goto out;
12041                 }
12042         }
12043
12044 out:
12045         btrfs_release_path(&path);
12046         free(buf);
12047         return ret;
12048 }
12049
12050 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12051                                   struct btrfs_root *csum_root)
12052 {
12053         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12054         struct btrfs_path path;
12055         struct btrfs_root *tree_root = fs_info->tree_root;
12056         struct btrfs_root *cur_root;
12057         struct extent_buffer *node;
12058         struct btrfs_key key;
12059         int slot = 0;
12060         int ret = 0;
12061
12062         btrfs_init_path(&path);
12063         key.objectid = BTRFS_FS_TREE_OBJECTID;
12064         key.offset = 0;
12065         key.type = BTRFS_ROOT_ITEM_KEY;
12066         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12067         if (ret < 0)
12068                 goto out;
12069         if (ret > 0) {
12070                 ret = -ENOENT;
12071                 goto out;
12072         }
12073
12074         while (1) {
12075                 node = path.nodes[0];
12076                 slot = path.slots[0];
12077                 btrfs_item_key_to_cpu(node, &key, slot);
12078                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12079                         goto out;
12080                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12081                         goto next;
12082                 if (!is_fstree(key.objectid))
12083                         goto next;
12084                 key.offset = (u64)-1;
12085
12086                 cur_root = btrfs_read_fs_root(fs_info, &key);
12087                 if (IS_ERR(cur_root) || !cur_root) {
12088                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12089                                 key.objectid);
12090                         goto out;
12091                 }
12092                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12093                                 cur_root);
12094                 if (ret < 0)
12095                         goto out;
12096 next:
12097                 ret = btrfs_next_item(tree_root, &path);
12098                 if (ret > 0) {
12099                         ret = 0;
12100                         goto out;
12101                 }
12102                 if (ret < 0)
12103                         goto out;
12104         }
12105
12106 out:
12107         btrfs_release_path(&path);
12108         return ret;
12109 }
12110
12111 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12112                                       struct btrfs_root *csum_root)
12113 {
12114         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12115         struct btrfs_path path;
12116         struct btrfs_extent_item *ei;
12117         struct extent_buffer *leaf;
12118         char *buf;
12119         struct btrfs_key key;
12120         int ret;
12121
12122         btrfs_init_path(&path);
12123         key.objectid = 0;
12124         key.type = BTRFS_EXTENT_ITEM_KEY;
12125         key.offset = 0;
12126         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12127         if (ret < 0) {
12128                 btrfs_release_path(&path);
12129                 return ret;
12130         }
12131
12132         buf = malloc(csum_root->sectorsize);
12133         if (!buf) {
12134                 btrfs_release_path(&path);
12135                 return -ENOMEM;
12136         }
12137
12138         while (1) {
12139                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12140                         ret = btrfs_next_leaf(extent_root, &path);
12141                         if (ret < 0)
12142                                 break;
12143                         if (ret) {
12144                                 ret = 0;
12145                                 break;
12146                         }
12147                 }
12148                 leaf = path.nodes[0];
12149
12150                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12151                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12152                         path.slots[0]++;
12153                         continue;
12154                 }
12155
12156                 ei = btrfs_item_ptr(leaf, path.slots[0],
12157                                     struct btrfs_extent_item);
12158                 if (!(btrfs_extent_flags(leaf, ei) &
12159                       BTRFS_EXTENT_FLAG_DATA)) {
12160                         path.slots[0]++;
12161                         continue;
12162                 }
12163
12164                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12165                                     key.offset);
12166                 if (ret)
12167                         break;
12168                 path.slots[0]++;
12169         }
12170
12171         btrfs_release_path(&path);
12172         free(buf);
12173         return ret;
12174 }
12175
12176 /*
12177  * Recalculate the csum and put it into the csum tree.
12178  *
12179  * Extent tree init will wipe out all the extent info, so in that case, we
12180  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12181  * will use fs/subvol trees to init the csum tree.
12182  */
12183 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12184                           struct btrfs_root *csum_root,
12185                           int search_fs_tree)
12186 {
12187         if (search_fs_tree)
12188                 return fill_csum_tree_from_fs(trans, csum_root);
12189         else
12190                 return fill_csum_tree_from_extent(trans, csum_root);
12191 }
12192
12193 static void free_roots_info_cache(void)
12194 {
12195         if (!roots_info_cache)
12196                 return;
12197
12198         while (!cache_tree_empty(roots_info_cache)) {
12199                 struct cache_extent *entry;
12200                 struct root_item_info *rii;
12201
12202                 entry = first_cache_extent(roots_info_cache);
12203                 if (!entry)
12204                         break;
12205                 remove_cache_extent(roots_info_cache, entry);
12206                 rii = container_of(entry, struct root_item_info, cache_extent);
12207                 free(rii);
12208         }
12209
12210         free(roots_info_cache);
12211         roots_info_cache = NULL;
12212 }
12213
12214 static int build_roots_info_cache(struct btrfs_fs_info *info)
12215 {
12216         int ret = 0;
12217         struct btrfs_key key;
12218         struct extent_buffer *leaf;
12219         struct btrfs_path path;
12220
12221         if (!roots_info_cache) {
12222                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12223                 if (!roots_info_cache)
12224                         return -ENOMEM;
12225                 cache_tree_init(roots_info_cache);
12226         }
12227
12228         btrfs_init_path(&path);
12229         key.objectid = 0;
12230         key.type = BTRFS_EXTENT_ITEM_KEY;
12231         key.offset = 0;
12232         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12233         if (ret < 0)
12234                 goto out;
12235         leaf = path.nodes[0];
12236
12237         while (1) {
12238                 struct btrfs_key found_key;
12239                 struct btrfs_extent_item *ei;
12240                 struct btrfs_extent_inline_ref *iref;
12241                 int slot = path.slots[0];
12242                 int type;
12243                 u64 flags;
12244                 u64 root_id;
12245                 u8 level;
12246                 struct cache_extent *entry;
12247                 struct root_item_info *rii;
12248
12249                 if (slot >= btrfs_header_nritems(leaf)) {
12250                         ret = btrfs_next_leaf(info->extent_root, &path);
12251                         if (ret < 0) {
12252                                 break;
12253                         } else if (ret) {
12254                                 ret = 0;
12255                                 break;
12256                         }
12257                         leaf = path.nodes[0];
12258                         slot = path.slots[0];
12259                 }
12260
12261                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12262
12263                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12264                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12265                         goto next;
12266
12267                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12268                 flags = btrfs_extent_flags(leaf, ei);
12269
12270                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12271                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12272                         goto next;
12273
12274                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12275                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12276                         level = found_key.offset;
12277                 } else {
12278                         struct btrfs_tree_block_info *binfo;
12279
12280                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12281                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12282                         level = btrfs_tree_block_level(leaf, binfo);
12283                 }
12284
12285                 /*
12286                  * For a root extent, it must be of the following type and the
12287                  * first (and only one) iref in the item.
12288                  */
12289                 type = btrfs_extent_inline_ref_type(leaf, iref);
12290                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12291                         goto next;
12292
12293                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12294                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12295                 if (!entry) {
12296                         rii = malloc(sizeof(struct root_item_info));
12297                         if (!rii) {
12298                                 ret = -ENOMEM;
12299                                 goto out;
12300                         }
12301                         rii->cache_extent.start = root_id;
12302                         rii->cache_extent.size = 1;
12303                         rii->level = (u8)-1;
12304                         entry = &rii->cache_extent;
12305                         ret = insert_cache_extent(roots_info_cache, entry);
12306                         ASSERT(ret == 0);
12307                 } else {
12308                         rii = container_of(entry, struct root_item_info,
12309                                            cache_extent);
12310                 }
12311
12312                 ASSERT(rii->cache_extent.start == root_id);
12313                 ASSERT(rii->cache_extent.size == 1);
12314
12315                 if (level > rii->level || rii->level == (u8)-1) {
12316                         rii->level = level;
12317                         rii->bytenr = found_key.objectid;
12318                         rii->gen = btrfs_extent_generation(leaf, ei);
12319                         rii->node_count = 1;
12320                 } else if (level == rii->level) {
12321                         rii->node_count++;
12322                 }
12323 next:
12324                 path.slots[0]++;
12325         }
12326
12327 out:
12328         btrfs_release_path(&path);
12329
12330         return ret;
12331 }
12332
12333 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12334                                   struct btrfs_path *path,
12335                                   const struct btrfs_key *root_key,
12336                                   const int read_only_mode)
12337 {
12338         const u64 root_id = root_key->objectid;
12339         struct cache_extent *entry;
12340         struct root_item_info *rii;
12341         struct btrfs_root_item ri;
12342         unsigned long offset;
12343
12344         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12345         if (!entry) {
12346                 fprintf(stderr,
12347                         "Error: could not find extent items for root %llu\n",
12348                         root_key->objectid);
12349                 return -ENOENT;
12350         }
12351
12352         rii = container_of(entry, struct root_item_info, cache_extent);
12353         ASSERT(rii->cache_extent.start == root_id);
12354         ASSERT(rii->cache_extent.size == 1);
12355
12356         if (rii->node_count != 1) {
12357                 fprintf(stderr,
12358                         "Error: could not find btree root extent for root %llu\n",
12359                         root_id);
12360                 return -ENOENT;
12361         }
12362
12363         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12364         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12365
12366         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12367             btrfs_root_level(&ri) != rii->level ||
12368             btrfs_root_generation(&ri) != rii->gen) {
12369
12370                 /*
12371                  * If we're in repair mode but our caller told us to not update
12372                  * the root item, i.e. just check if it needs to be updated, don't
12373                  * print this message, since the caller will call us again shortly
12374                  * for the same root item without read only mode (the caller will
12375                  * open a transaction first).
12376                  */
12377                 if (!(read_only_mode && repair))
12378                         fprintf(stderr,
12379                                 "%sroot item for root %llu,"
12380                                 " current bytenr %llu, current gen %llu, current level %u,"
12381                                 " new bytenr %llu, new gen %llu, new level %u\n",
12382                                 (read_only_mode ? "" : "fixing "),
12383                                 root_id,
12384                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12385                                 btrfs_root_level(&ri),
12386                                 rii->bytenr, rii->gen, rii->level);
12387
12388                 if (btrfs_root_generation(&ri) > rii->gen) {
12389                         fprintf(stderr,
12390                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12391                                 root_id, btrfs_root_generation(&ri), rii->gen);
12392                         return -EINVAL;
12393                 }
12394
12395                 if (!read_only_mode) {
12396                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12397                         btrfs_set_root_level(&ri, rii->level);
12398                         btrfs_set_root_generation(&ri, rii->gen);
12399                         write_extent_buffer(path->nodes[0], &ri,
12400                                             offset, sizeof(ri));
12401                 }
12402
12403                 return 1;
12404         }
12405
12406         return 0;
12407 }
12408
12409 /*
12410  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12411  * caused read-only snapshots to be corrupted if they were created at a moment
12412  * when the source subvolume/snapshot had orphan items. The issue was that the
12413  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12414  * node instead of the post orphan cleanup root node.
12415  * So this function, and its callees, just detects and fixes those cases. Even
12416  * though the regression was for read-only snapshots, this function applies to
12417  * any snapshot/subvolume root.
12418  * This must be run before any other repair code - not doing it so, makes other
12419  * repair code delete or modify backrefs in the extent tree for example, which
12420  * will result in an inconsistent fs after repairing the root items.
12421  */
12422 static int repair_root_items(struct btrfs_fs_info *info)
12423 {
12424         struct btrfs_path path;
12425         struct btrfs_key key;
12426         struct extent_buffer *leaf;
12427         struct btrfs_trans_handle *trans = NULL;
12428         int ret = 0;
12429         int bad_roots = 0;
12430         int need_trans = 0;
12431
12432         btrfs_init_path(&path);
12433
12434         ret = build_roots_info_cache(info);
12435         if (ret)
12436                 goto out;
12437
12438         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12439         key.type = BTRFS_ROOT_ITEM_KEY;
12440         key.offset = 0;
12441
12442 again:
12443         /*
12444          * Avoid opening and committing transactions if a leaf doesn't have
12445          * any root items that need to be fixed, so that we avoid rotating
12446          * backup roots unnecessarily.
12447          */
12448         if (need_trans) {
12449                 trans = btrfs_start_transaction(info->tree_root, 1);
12450                 if (IS_ERR(trans)) {
12451                         ret = PTR_ERR(trans);
12452                         goto out;
12453                 }
12454         }
12455
12456         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12457                                 0, trans ? 1 : 0);
12458         if (ret < 0)
12459                 goto out;
12460         leaf = path.nodes[0];
12461
12462         while (1) {
12463                 struct btrfs_key found_key;
12464
12465                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12466                         int no_more_keys = find_next_key(&path, &key);
12467
12468                         btrfs_release_path(&path);
12469                         if (trans) {
12470                                 ret = btrfs_commit_transaction(trans,
12471                                                                info->tree_root);
12472                                 trans = NULL;
12473                                 if (ret < 0)
12474                                         goto out;
12475                         }
12476                         need_trans = 0;
12477                         if (no_more_keys)
12478                                 break;
12479                         goto again;
12480                 }
12481
12482                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12483
12484                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12485                         goto next;
12486                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12487                         goto next;
12488
12489                 ret = maybe_repair_root_item(info, &path, &found_key,
12490                                              trans ? 0 : 1);
12491                 if (ret < 0)
12492                         goto out;
12493                 if (ret) {
12494                         if (!trans && repair) {
12495                                 need_trans = 1;
12496                                 key = found_key;
12497                                 btrfs_release_path(&path);
12498                                 goto again;
12499                         }
12500                         bad_roots++;
12501                 }
12502 next:
12503                 path.slots[0]++;
12504         }
12505         ret = 0;
12506 out:
12507         free_roots_info_cache();
12508         btrfs_release_path(&path);
12509         if (trans)
12510                 btrfs_commit_transaction(trans, info->tree_root);
12511         if (ret < 0)
12512                 return ret;
12513
12514         return bad_roots;
12515 }
12516
12517 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12518 {
12519         struct btrfs_trans_handle *trans;
12520         struct btrfs_block_group_cache *bg_cache;
12521         u64 current = 0;
12522         int ret = 0;
12523
12524         /* Clear all free space cache inodes and its extent data */
12525         while (1) {
12526                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12527                 if (!bg_cache)
12528                         break;
12529                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12530                 if (ret < 0)
12531                         return ret;
12532                 current = bg_cache->key.objectid + bg_cache->key.offset;
12533         }
12534
12535         /* Don't forget to set cache_generation to -1 */
12536         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12537         if (IS_ERR(trans)) {
12538                 error("failed to update super block cache generation");
12539                 return PTR_ERR(trans);
12540         }
12541         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12542         btrfs_commit_transaction(trans, fs_info->tree_root);
12543
12544         return ret;
12545 }
12546
12547 const char * const cmd_check_usage[] = {
12548         "btrfs check [options] <device>",
12549         "Check structural integrity of a filesystem (unmounted).",
12550         "Check structural integrity of an unmounted filesystem. Verify internal",
12551         "trees' consistency and item connectivity. In the repair mode try to",
12552         "fix the problems found. ",
12553         "WARNING: the repair mode is considered dangerous",
12554         "",
12555         "-s|--super <superblock>     use this superblock copy",
12556         "-b|--backup                 use the first valid backup root copy",
12557         "--repair                    try to repair the filesystem",
12558         "--readonly                  run in read-only mode (default)",
12559         "--init-csum-tree            create a new CRC tree",
12560         "--init-extent-tree          create a new extent tree",
12561         "--mode <MODE>               allows choice of memory/IO trade-offs",
12562         "                            where MODE is one of:",
12563         "                            original - read inodes and extents to memory (requires",
12564         "                                       more memory, does less IO)",
12565         "                            lowmem   - try to use less memory but read blocks again",
12566         "                                       when needed",
12567         "--check-data-csum           verify checksums of data blocks",
12568         "-Q|--qgroup-report          print a report on qgroup consistency",
12569         "-E|--subvol-extents <subvolid>",
12570         "                            print subvolume extents and sharing state",
12571         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12572         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12573         "-p|--progress               indicate progress",
12574         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12575         NULL
12576 };
12577
12578 int cmd_check(int argc, char **argv)
12579 {
12580         struct cache_tree root_cache;
12581         struct btrfs_root *root;
12582         struct btrfs_fs_info *info;
12583         u64 bytenr = 0;
12584         u64 subvolid = 0;
12585         u64 tree_root_bytenr = 0;
12586         u64 chunk_root_bytenr = 0;
12587         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12588         int ret;
12589         int err = 0;
12590         u64 num;
12591         int init_csum_tree = 0;
12592         int readonly = 0;
12593         int clear_space_cache = 0;
12594         int qgroup_report = 0;
12595         int qgroups_repaired = 0;
12596         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12597
12598         while(1) {
12599                 int c;
12600                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12601                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12602                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12603                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12604                 static const struct option long_options[] = {
12605                         { "super", required_argument, NULL, 's' },
12606                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12607                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12608                         { "init-csum-tree", no_argument, NULL,
12609                                 GETOPT_VAL_INIT_CSUM },
12610                         { "init-extent-tree", no_argument, NULL,
12611                                 GETOPT_VAL_INIT_EXTENT },
12612                         { "check-data-csum", no_argument, NULL,
12613                                 GETOPT_VAL_CHECK_CSUM },
12614                         { "backup", no_argument, NULL, 'b' },
12615                         { "subvol-extents", required_argument, NULL, 'E' },
12616                         { "qgroup-report", no_argument, NULL, 'Q' },
12617                         { "tree-root", required_argument, NULL, 'r' },
12618                         { "chunk-root", required_argument, NULL,
12619                                 GETOPT_VAL_CHUNK_TREE },
12620                         { "progress", no_argument, NULL, 'p' },
12621                         { "mode", required_argument, NULL,
12622                                 GETOPT_VAL_MODE },
12623                         { "clear-space-cache", required_argument, NULL,
12624                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12625                         { NULL, 0, NULL, 0}
12626                 };
12627
12628                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12629                 if (c < 0)
12630                         break;
12631                 switch(c) {
12632                         case 'a': /* ignored */ break;
12633                         case 'b':
12634                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12635                                 break;
12636                         case 's':
12637                                 num = arg_strtou64(optarg);
12638                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12639                                         error(
12640                                         "super mirror should be less than %d",
12641                                                 BTRFS_SUPER_MIRROR_MAX);
12642                                         exit(1);
12643                                 }
12644                                 bytenr = btrfs_sb_offset(((int)num));
12645                                 printf("using SB copy %llu, bytenr %llu\n", num,
12646                                        (unsigned long long)bytenr);
12647                                 break;
12648                         case 'Q':
12649                                 qgroup_report = 1;
12650                                 break;
12651                         case 'E':
12652                                 subvolid = arg_strtou64(optarg);
12653                                 break;
12654                         case 'r':
12655                                 tree_root_bytenr = arg_strtou64(optarg);
12656                                 break;
12657                         case GETOPT_VAL_CHUNK_TREE:
12658                                 chunk_root_bytenr = arg_strtou64(optarg);
12659                                 break;
12660                         case 'p':
12661                                 ctx.progress_enabled = true;
12662                                 break;
12663                         case '?':
12664                         case 'h':
12665                                 usage(cmd_check_usage);
12666                         case GETOPT_VAL_REPAIR:
12667                                 printf("enabling repair mode\n");
12668                                 repair = 1;
12669                                 ctree_flags |= OPEN_CTREE_WRITES;
12670                                 break;
12671                         case GETOPT_VAL_READONLY:
12672                                 readonly = 1;
12673                                 break;
12674                         case GETOPT_VAL_INIT_CSUM:
12675                                 printf("Creating a new CRC tree\n");
12676                                 init_csum_tree = 1;
12677                                 repair = 1;
12678                                 ctree_flags |= OPEN_CTREE_WRITES;
12679                                 break;
12680                         case GETOPT_VAL_INIT_EXTENT:
12681                                 init_extent_tree = 1;
12682                                 ctree_flags |= (OPEN_CTREE_WRITES |
12683                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12684                                 repair = 1;
12685                                 break;
12686                         case GETOPT_VAL_CHECK_CSUM:
12687                                 check_data_csum = 1;
12688                                 break;
12689                         case GETOPT_VAL_MODE:
12690                                 check_mode = parse_check_mode(optarg);
12691                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12692                                         error("unknown mode: %s", optarg);
12693                                         exit(1);
12694                                 }
12695                                 break;
12696                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12697                                 if (strcmp(optarg, "v1") == 0) {
12698                                         clear_space_cache = 1;
12699                                 } else if (strcmp(optarg, "v2") == 0) {
12700                                         clear_space_cache = 2;
12701                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12702                                 } else {
12703                                         error(
12704                 "invalid argument to --clear-space-cache, must be v1 or v2");
12705                                         exit(1);
12706                                 }
12707                                 ctree_flags |= OPEN_CTREE_WRITES;
12708                                 break;
12709                 }
12710         }
12711
12712         if (check_argc_exact(argc - optind, 1))
12713                 usage(cmd_check_usage);
12714
12715         if (ctx.progress_enabled) {
12716                 ctx.tp = TASK_NOTHING;
12717                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12718         }
12719
12720         /* This check is the only reason for --readonly to exist */
12721         if (readonly && repair) {
12722                 error("repair options are not compatible with --readonly");
12723                 exit(1);
12724         }
12725
12726         /*
12727          * Not supported yet
12728          */
12729         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12730                 error("low memory mode doesn't support repair yet");
12731                 exit(1);
12732         }
12733
12734         radix_tree_init();
12735         cache_tree_init(&root_cache);
12736
12737         if((ret = check_mounted(argv[optind])) < 0) {
12738                 error("could not check mount status: %s", strerror(-ret));
12739                 err |= !!ret;
12740                 goto err_out;
12741         } else if(ret) {
12742                 error("%s is currently mounted, aborting", argv[optind]);
12743                 ret = -EBUSY;
12744                 err |= !!ret;
12745                 goto err_out;
12746         }
12747
12748         /* only allow partial opening under repair mode */
12749         if (repair)
12750                 ctree_flags |= OPEN_CTREE_PARTIAL;
12751
12752         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12753                                   chunk_root_bytenr, ctree_flags);
12754         if (!info) {
12755                 error("cannot open file system");
12756                 ret = -EIO;
12757                 err |= !!ret;
12758                 goto err_out;
12759         }
12760
12761         global_info = info;
12762         root = info->fs_root;
12763         if (clear_space_cache == 1) {
12764                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12765                         error(
12766                 "free space cache v2 detected, use --clear-space-cache v2");
12767                         ret = 1;
12768                         goto close_out;
12769                 }
12770                 printf("Clearing free space cache\n");
12771                 ret = clear_free_space_cache(info);
12772                 if (ret) {
12773                         error("failed to clear free space cache");
12774                         ret = 1;
12775                 } else {
12776                         printf("Free space cache cleared\n");
12777                 }
12778                 goto close_out;
12779         } else if (clear_space_cache == 2) {
12780                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12781                         printf("no free space cache v2 to clear\n");
12782                         ret = 0;
12783                         goto close_out;
12784                 }
12785                 printf("Clear free space cache v2\n");
12786                 ret = btrfs_clear_free_space_tree(info);
12787                 if (ret) {
12788                         error("failed to clear free space cache v2: %d", ret);
12789                         ret = 1;
12790                 } else {
12791                         printf("free space cache v2 cleared\n");
12792                 }
12793                 goto close_out;
12794         }
12795
12796         /*
12797          * repair mode will force us to commit transaction which
12798          * will make us fail to load log tree when mounting.
12799          */
12800         if (repair && btrfs_super_log_root(info->super_copy)) {
12801                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12802                 if (!ret) {
12803                         ret = 1;
12804                         err |= !!ret;
12805                         goto close_out;
12806                 }
12807                 ret = zero_log_tree(root);
12808                 err |= !!ret;
12809                 if (ret) {
12810                         error("failed to zero log tree: %d", ret);
12811                         goto close_out;
12812                 }
12813         }
12814
12815         uuid_unparse(info->super_copy->fsid, uuidbuf);
12816         if (qgroup_report) {
12817                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12818                        uuidbuf);
12819                 ret = qgroup_verify_all(info);
12820                 err |= !!ret;
12821                 if (ret == 0)
12822                         report_qgroups(1);
12823                 goto close_out;
12824         }
12825         if (subvolid) {
12826                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12827                        subvolid, argv[optind], uuidbuf);
12828                 ret = print_extent_state(info, subvolid);
12829                 err |= !!ret;
12830                 goto close_out;
12831         }
12832         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12833
12834         if (!extent_buffer_uptodate(info->tree_root->node) ||
12835             !extent_buffer_uptodate(info->dev_root->node) ||
12836             !extent_buffer_uptodate(info->chunk_root->node)) {
12837                 error("critical roots corrupted, unable to check the filesystem");
12838                 err |= !!ret;
12839                 ret = -EIO;
12840                 goto close_out;
12841         }
12842
12843         if (init_extent_tree || init_csum_tree) {
12844                 struct btrfs_trans_handle *trans;
12845
12846                 trans = btrfs_start_transaction(info->extent_root, 0);
12847                 if (IS_ERR(trans)) {
12848                         error("error starting transaction");
12849                         ret = PTR_ERR(trans);
12850                         err |= !!ret;
12851                         goto close_out;
12852                 }
12853
12854                 if (init_extent_tree) {
12855                         printf("Creating a new extent tree\n");
12856                         ret = reinit_extent_tree(trans, info);
12857                         err |= !!ret;
12858                         if (ret)
12859                                 goto close_out;
12860                 }
12861
12862                 if (init_csum_tree) {
12863                         printf("Reinitialize checksum tree\n");
12864                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12865                         if (ret) {
12866                                 error("checksum tree initialization failed: %d",
12867                                                 ret);
12868                                 ret = -EIO;
12869                                 err |= !!ret;
12870                                 goto close_out;
12871                         }
12872
12873                         ret = fill_csum_tree(trans, info->csum_root,
12874                                              init_extent_tree);
12875                         err |= !!ret;
12876                         if (ret) {
12877                                 error("checksum tree refilling failed: %d", ret);
12878                                 return -EIO;
12879                         }
12880                 }
12881                 /*
12882                  * Ok now we commit and run the normal fsck, which will add
12883                  * extent entries for all of the items it finds.
12884                  */
12885                 ret = btrfs_commit_transaction(trans, info->extent_root);
12886                 err |= !!ret;
12887                 if (ret)
12888                         goto close_out;
12889         }
12890         if (!extent_buffer_uptodate(info->extent_root->node)) {
12891                 error("critical: extent_root, unable to check the filesystem");
12892                 ret = -EIO;
12893                 err |= !!ret;
12894                 goto close_out;
12895         }
12896         if (!extent_buffer_uptodate(info->csum_root->node)) {
12897                 error("critical: csum_root, unable to check the filesystem");
12898                 ret = -EIO;
12899                 err |= !!ret;
12900                 goto close_out;
12901         }
12902
12903         if (!ctx.progress_enabled)
12904                 fprintf(stderr, "checking extents\n");
12905         if (check_mode == CHECK_MODE_LOWMEM)
12906                 ret = check_chunks_and_extents_v2(root);
12907         else
12908                 ret = check_chunks_and_extents(root);
12909         err |= !!ret;
12910         if (ret)
12911                 error(
12912                 "errors found in extent allocation tree or chunk allocation");
12913
12914         ret = repair_root_items(info);
12915         err |= !!ret;
12916         if (ret < 0)
12917                 goto close_out;
12918         if (repair) {
12919                 fprintf(stderr, "Fixed %d roots.\n", ret);
12920                 ret = 0;
12921         } else if (ret > 0) {
12922                 fprintf(stderr,
12923                        "Found %d roots with an outdated root item.\n",
12924                        ret);
12925                 fprintf(stderr,
12926                         "Please run a filesystem check with the option --repair to fix them.\n");
12927                 ret = 1;
12928                 err |= !!ret;
12929                 goto close_out;
12930         }
12931
12932         if (!ctx.progress_enabled) {
12933                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12934                         fprintf(stderr, "checking free space tree\n");
12935                 else
12936                         fprintf(stderr, "checking free space cache\n");
12937         }
12938         ret = check_space_cache(root);
12939         err |= !!ret;
12940         if (ret)
12941                 goto out;
12942
12943         /*
12944          * We used to have to have these hole extents in between our real
12945          * extents so if we don't have this flag set we need to make sure there
12946          * are no gaps in the file extents for inodes, otherwise we can just
12947          * ignore it when this happens.
12948          */
12949         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12950         if (!ctx.progress_enabled)
12951                 fprintf(stderr, "checking fs roots\n");
12952         if (check_mode == CHECK_MODE_LOWMEM)
12953                 ret = check_fs_roots_v2(root->fs_info);
12954         else
12955                 ret = check_fs_roots(root, &root_cache);
12956         err |= !!ret;
12957         if (ret)
12958                 goto out;
12959
12960         fprintf(stderr, "checking csums\n");
12961         ret = check_csums(root);
12962         err |= !!ret;
12963         if (ret)
12964                 goto out;
12965
12966         fprintf(stderr, "checking root refs\n");
12967         /* For low memory mode, check_fs_roots_v2 handles root refs */
12968         if (check_mode != CHECK_MODE_LOWMEM) {
12969                 ret = check_root_refs(root, &root_cache);
12970                 err |= !!ret;
12971                 if (ret)
12972                         goto out;
12973         }
12974
12975         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12976                 struct extent_buffer *eb;
12977
12978                 eb = list_first_entry(&root->fs_info->recow_ebs,
12979                                       struct extent_buffer, recow);
12980                 list_del_init(&eb->recow);
12981                 ret = recow_extent_buffer(root, eb);
12982                 err |= !!ret;
12983                 if (ret)
12984                         break;
12985         }
12986
12987         while (!list_empty(&delete_items)) {
12988                 struct bad_item *bad;
12989
12990                 bad = list_first_entry(&delete_items, struct bad_item, list);
12991                 list_del_init(&bad->list);
12992                 if (repair) {
12993                         ret = delete_bad_item(root, bad);
12994                         err |= !!ret;
12995                 }
12996                 free(bad);
12997         }
12998
12999         if (info->quota_enabled) {
13000                 fprintf(stderr, "checking quota groups\n");
13001                 ret = qgroup_verify_all(info);
13002                 err |= !!ret;
13003                 if (ret)
13004                         goto out;
13005                 report_qgroups(0);
13006                 ret = repair_qgroups(info, &qgroups_repaired);
13007                 err |= !!ret;
13008                 if (err)
13009                         goto out;
13010                 ret = 0;
13011         }
13012
13013         if (!list_empty(&root->fs_info->recow_ebs)) {
13014                 error("transid errors in file system");
13015                 ret = 1;
13016                 err |= !!ret;
13017         }
13018 out:
13019         if (found_old_backref) { /*
13020                  * there was a disk format change when mixed
13021                  * backref was in testing tree. The old format
13022                  * existed about one week.
13023                  */
13024                 printf("\n * Found old mixed backref format. "
13025                        "The old format is not supported! *"
13026                        "\n * Please mount the FS in readonly mode, "
13027                        "backup data and re-format the FS. *\n\n");
13028                 err |= 1;
13029         }
13030         printf("found %llu bytes used err is %d\n",
13031                (unsigned long long)bytes_used, ret);
13032         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13033         printf("total tree bytes: %llu\n",
13034                (unsigned long long)total_btree_bytes);
13035         printf("total fs tree bytes: %llu\n",
13036                (unsigned long long)total_fs_tree_bytes);
13037         printf("total extent tree bytes: %llu\n",
13038                (unsigned long long)total_extent_tree_bytes);
13039         printf("btree space waste bytes: %llu\n",
13040                (unsigned long long)btree_space_waste);
13041         printf("file data blocks allocated: %llu\n referenced %llu\n",
13042                 (unsigned long long)data_bytes_allocated,
13043                 (unsigned long long)data_bytes_referenced);
13044
13045         free_qgroup_counts();
13046         free_root_recs_tree(&root_cache);
13047 close_out:
13048         close_ctree(root);
13049 err_out:
13050         if (ctx.progress_enabled)
13051                 task_deinit(ctx.info);
13052
13053         return err;
13054 }