btrfs-progs: check: remove unused argument from fix_key_order
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 found;
4703         unsigned int extent_type;
4704         unsigned int is_hole;
4705         int ret;
4706         int err = 0;
4707
4708         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4709
4710         extent_type = btrfs_file_extent_type(node, fi);
4711         /* Skip if file extent is inline */
4712         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4713                 struct btrfs_item *e = btrfs_item_nr(slot);
4714                 u32 item_inline_len;
4715
4716                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4717                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4718                 if (extent_num_bytes == 0 ||
4719                     extent_num_bytes != item_inline_len)
4720                         err |= FILE_EXTENT_ERROR;
4721                 *size += extent_num_bytes;
4722                 return err;
4723         }
4724
4725         /* Check extent type */
4726         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4727                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4728                 err |= FILE_EXTENT_ERROR;
4729                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4730                       root->objectid, fkey->objectid, fkey->offset);
4731                 return err;
4732         }
4733
4734         /* Check REG_EXTENT/PREALLOC_EXTENT */
4735         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4736         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4737         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4738         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4739
4740         /* Check EXTENT_DATA datasum */
4741         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4742         if (found > 0 && nodatasum) {
4743                 err |= ODD_CSUM_ITEM;
4744                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4745                       root->objectid, fkey->objectid, fkey->offset);
4746         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4747                    !is_hole &&
4748                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4749                 err |= CSUM_ITEM_MISSING;
4750                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4753                 err |= ODD_CSUM_ITEM;
4754                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         }
4757
4758         /* Check EXTENT_DATA hole */
4759         if (no_holes && is_hole) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         } else if (!no_holes && *end != fkey->offset) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         }
4768
4769         *end += extent_num_bytes;
4770         if (!is_hole)
4771                 *size += extent_num_bytes;
4772
4773         return err;
4774 }
4775
4776 /*
4777  * Check INODE_ITEM and related ITEMs (the same inode number)
4778  * 1. check link count
4779  * 2. check inode ref/extref
4780  * 3. check dir item/index
4781  *
4782  * @ext_ref:    the EXTENDED_IREF feature
4783  *
4784  * Return 0 if no error occurred.
4785  * Return >0 for error or hit the traversal is done(by error bitmap)
4786  */
4787 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4788                             unsigned int ext_ref)
4789 {
4790         struct extent_buffer *node;
4791         struct btrfs_inode_item *ii;
4792         struct btrfs_key key;
4793         u64 inode_id;
4794         u32 mode;
4795         u64 nlink;
4796         u64 nbytes;
4797         u64 isize;
4798         u64 size = 0;
4799         u64 refs = 0;
4800         u64 extent_end = 0;
4801         u64 extent_size = 0;
4802         unsigned int dir;
4803         unsigned int nodatasum;
4804         int slot;
4805         int ret;
4806         int err = 0;
4807
4808         node = path->nodes[0];
4809         slot = path->slots[0];
4810
4811         btrfs_item_key_to_cpu(node, &key, slot);
4812         inode_id = key.objectid;
4813
4814         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4815                 ret = btrfs_next_item(root, path);
4816                 if (ret > 0)
4817                         err |= LAST_ITEM;
4818                 return err;
4819         }
4820
4821         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4822         isize = btrfs_inode_size(node, ii);
4823         nbytes = btrfs_inode_nbytes(node, ii);
4824         mode = btrfs_inode_mode(node, ii);
4825         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4826         nlink = btrfs_inode_nlink(node, ii);
4827         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4828
4829         while (1) {
4830                 ret = btrfs_next_item(root, path);
4831                 if (ret < 0) {
4832                         /* out will fill 'err' rusing current statistics */
4833                         goto out;
4834                 } else if (ret > 0) {
4835                         err |= LAST_ITEM;
4836                         goto out;
4837                 }
4838
4839                 node = path->nodes[0];
4840                 slot = path->slots[0];
4841                 btrfs_item_key_to_cpu(node, &key, slot);
4842                 if (key.objectid != inode_id)
4843                         goto out;
4844
4845                 switch (key.type) {
4846                 case BTRFS_INODE_REF_KEY:
4847                         ret = check_inode_ref(root, &key, node, slot, &refs,
4848                                               mode);
4849                         err |= ret;
4850                         break;
4851                 case BTRFS_INODE_EXTREF_KEY:
4852                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4853                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4854                                         root->objectid, key.objectid,
4855                                         key.offset);
4856                         ret = check_inode_extref(root, &key, node, slot, &refs,
4857                                                  mode);
4858                         err |= ret;
4859                         break;
4860                 case BTRFS_DIR_ITEM_KEY:
4861                 case BTRFS_DIR_INDEX_KEY:
4862                         if (!dir) {
4863                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4864                                         root->objectid, inode_id,
4865                                         imode_to_type(mode), key.objectid,
4866                                         key.offset);
4867                         }
4868                         ret = check_dir_item(root, &key, node, slot, &size,
4869                                              ext_ref);
4870                         err |= ret;
4871                         break;
4872                 case BTRFS_EXTENT_DATA_KEY:
4873                         if (dir) {
4874                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4875                                         root->objectid, inode_id, key.objectid,
4876                                         key.offset);
4877                         }
4878                         ret = check_file_extent(root, &key, node, slot,
4879                                                 nodatasum, &extent_size,
4880                                                 &extent_end);
4881                         err |= ret;
4882                         break;
4883                 case BTRFS_XATTR_ITEM_KEY:
4884                         break;
4885                 default:
4886                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4887                               key.objectid, key.type, key.offset);
4888                 }
4889         }
4890
4891 out:
4892         /* verify INODE_ITEM nlink/isize/nbytes */
4893         if (dir) {
4894                 if (nlink != 1) {
4895                         err |= LINK_COUNT_ERROR;
4896                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4897                               root->objectid, inode_id, nlink);
4898                 }
4899
4900                 /*
4901                  * Just a warning, as dir inode nbytes is just an
4902                  * instructive value.
4903                  */
4904                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4905                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4906                                 root->objectid, inode_id, root->nodesize);
4907                 }
4908
4909                 if (isize != size) {
4910                         err |= ISIZE_ERROR;
4911                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4912                               root->objectid, inode_id, isize, size);
4913                 }
4914         } else {
4915                 if (nlink != refs) {
4916                         err |= LINK_COUNT_ERROR;
4917                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4918                               root->objectid, inode_id, nlink, refs);
4919                 } else if (!nlink) {
4920                         err |= ORPHAN_ITEM;
4921                 }
4922
4923                 if (!nbytes && !no_holes && extent_end < isize) {
4924                         err |= NBYTES_ERROR;
4925                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4926                               root->objectid, inode_id, isize);
4927                 }
4928
4929                 if (nbytes != extent_size) {
4930                         err |= NBYTES_ERROR;
4931                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4932                               root->objectid, inode_id, nbytes, extent_size);
4933                 }
4934         }
4935
4936         return err;
4937 }
4938
4939 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4940 {
4941         struct btrfs_path path;
4942         struct btrfs_key key;
4943         int err = 0;
4944         int ret;
4945
4946         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4947         key.type = BTRFS_INODE_ITEM_KEY;
4948         key.offset = 0;
4949
4950         /* For root being dropped, we don't need to check first inode */
4951         if (btrfs_root_refs(&root->root_item) == 0 &&
4952             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4953             key.objectid)
4954                 return 0;
4955
4956         btrfs_init_path(&path);
4957
4958         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4959         if (ret < 0)
4960                 goto out;
4961         if (ret > 0) {
4962                 ret = 0;
4963                 err |= INODE_ITEM_MISSING;
4964         }
4965
4966         err |= check_inode_item(root, &path, ext_ref);
4967         err &= ~LAST_ITEM;
4968         if (err && !ret)
4969                 ret = -EIO;
4970 out:
4971         btrfs_release_path(&path);
4972         return ret;
4973 }
4974
4975 /*
4976  * Iterate all item on the tree and call check_inode_item() to check.
4977  *
4978  * @root:       the root of the tree to be checked.
4979  * @ext_ref:    the EXTENDED_IREF feature
4980  *
4981  * Return 0 if no error found.
4982  * Return <0 for error.
4983  */
4984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4985 {
4986         struct btrfs_path path;
4987         struct node_refs nrefs;
4988         struct btrfs_root_item *root_item = &root->root_item;
4989         int ret, wret;
4990         int level;
4991
4992         /*
4993          * We need to manually check the first inode item(256)
4994          * As the following traversal function will only start from
4995          * the first inode item in the leaf, if inode item(256) is missing
4996          * we will just skip it forever.
4997          */
4998         ret = check_fs_first_inode(root, ext_ref);
4999         if (ret < 0)
5000                 return ret;
5001
5002         memset(&nrefs, 0, sizeof(nrefs));
5003         level = btrfs_header_level(root->node);
5004         btrfs_init_path(&path);
5005
5006         if (btrfs_root_refs(root_item) > 0 ||
5007             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5008                 path.nodes[level] = root->node;
5009                 path.slots[level] = 0;
5010                 extent_buffer_get(root->node);
5011         } else {
5012                 struct btrfs_key key;
5013
5014                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5015                 level = root_item->drop_level;
5016                 path.lowest_level = level;
5017                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5018                 if (ret < 0)
5019                         goto out;
5020                 ret = 0;
5021         }
5022
5023         while (1) {
5024                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5025                 if (wret < 0)
5026                         ret = wret;
5027                 if (wret != 0)
5028                         break;
5029
5030                 wret = walk_up_tree_v2(root, &path, &level);
5031                 if (wret < 0)
5032                         ret = wret;
5033                 if (wret != 0)
5034                         break;
5035         }
5036
5037 out:
5038         btrfs_release_path(&path);
5039         return ret;
5040 }
5041
5042 /*
5043  * Find the relative ref for root_ref and root_backref.
5044  *
5045  * @root:       the root of the root tree.
5046  * @ref_key:    the key of the root ref.
5047  *
5048  * Return 0 if no error occurred.
5049  */
5050 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5051                           struct extent_buffer *node, int slot)
5052 {
5053         struct btrfs_path path;
5054         struct btrfs_key key;
5055         struct btrfs_root_ref *ref;
5056         struct btrfs_root_ref *backref;
5057         char ref_name[BTRFS_NAME_LEN] = {0};
5058         char backref_name[BTRFS_NAME_LEN] = {0};
5059         u64 ref_dirid;
5060         u64 ref_seq;
5061         u32 ref_namelen;
5062         u64 backref_dirid;
5063         u64 backref_seq;
5064         u32 backref_namelen;
5065         u32 len;
5066         int ret;
5067         int err = 0;
5068
5069         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5070         ref_dirid = btrfs_root_ref_dirid(node, ref);
5071         ref_seq = btrfs_root_ref_sequence(node, ref);
5072         ref_namelen = btrfs_root_ref_name_len(node, ref);
5073
5074         if (ref_namelen <= BTRFS_NAME_LEN) {
5075                 len = ref_namelen;
5076         } else {
5077                 len = BTRFS_NAME_LEN;
5078                 warning("%s[%llu %llu] ref_name too long",
5079                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5080                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5081                         ref_key->offset);
5082         }
5083         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5084
5085         /* Find relative root_ref */
5086         key.objectid = ref_key->offset;
5087         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5088         key.offset = ref_key->objectid;
5089
5090         btrfs_init_path(&path);
5091         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5092         if (ret) {
5093                 err |= ROOT_REF_MISSING;
5094                 error("%s[%llu %llu] couldn't find relative ref",
5095                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5096                       "ROOT_REF" : "ROOT_BACKREF",
5097                       ref_key->objectid, ref_key->offset);
5098                 goto out;
5099         }
5100
5101         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5102                                  struct btrfs_root_ref);
5103         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5104         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5105         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5106
5107         if (backref_namelen <= BTRFS_NAME_LEN) {
5108                 len = backref_namelen;
5109         } else {
5110                 len = BTRFS_NAME_LEN;
5111                 warning("%s[%llu %llu] ref_name too long",
5112                         key.type == BTRFS_ROOT_REF_KEY ?
5113                         "ROOT_REF" : "ROOT_BACKREF",
5114                         key.objectid, key.offset);
5115         }
5116         read_extent_buffer(path.nodes[0], backref_name,
5117                            (unsigned long)(backref + 1), len);
5118
5119         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5120             ref_namelen != backref_namelen ||
5121             strncmp(ref_name, backref_name, len)) {
5122                 err |= ROOT_REF_MISMATCH;
5123                 error("%s[%llu %llu] mismatch relative ref",
5124                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5125                       "ROOT_REF" : "ROOT_BACKREF",
5126                       ref_key->objectid, ref_key->offset);
5127         }
5128 out:
5129         btrfs_release_path(&path);
5130         return err;
5131 }
5132
5133 /*
5134  * Check all fs/file tree in low_memory mode.
5135  *
5136  * 1. for fs tree root item, call check_fs_root_v2()
5137  * 2. for fs tree root ref/backref, call check_root_ref()
5138  *
5139  * Return 0 if no error occurred.
5140  */
5141 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5142 {
5143         struct btrfs_root *tree_root = fs_info->tree_root;
5144         struct btrfs_root *cur_root = NULL;
5145         struct btrfs_path path;
5146         struct btrfs_key key;
5147         struct extent_buffer *node;
5148         unsigned int ext_ref;
5149         int slot;
5150         int ret;
5151         int err = 0;
5152
5153         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5154
5155         btrfs_init_path(&path);
5156         key.objectid = BTRFS_FS_TREE_OBJECTID;
5157         key.offset = 0;
5158         key.type = BTRFS_ROOT_ITEM_KEY;
5159
5160         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5161         if (ret < 0) {
5162                 err = ret;
5163                 goto out;
5164         } else if (ret > 0) {
5165                 err = -ENOENT;
5166                 goto out;
5167         }
5168
5169         while (1) {
5170                 node = path.nodes[0];
5171                 slot = path.slots[0];
5172                 btrfs_item_key_to_cpu(node, &key, slot);
5173                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5174                         goto out;
5175                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5176                     fs_root_objectid(key.objectid)) {
5177                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5178                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5179                                                                        &key);
5180                         } else {
5181                                 key.offset = (u64)-1;
5182                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5183                         }
5184
5185                         if (IS_ERR(cur_root)) {
5186                                 error("Fail to read fs/subvol tree: %lld",
5187                                       key.objectid);
5188                                 err = -EIO;
5189                                 goto next;
5190                         }
5191
5192                         ret = check_fs_root_v2(cur_root, ext_ref);
5193                         err |= ret;
5194
5195                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5196                                 btrfs_free_fs_root(cur_root);
5197                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5198                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5199                         ret = check_root_ref(tree_root, &key, node, slot);
5200                         err |= ret;
5201                 }
5202 next:
5203                 ret = btrfs_next_item(tree_root, &path);
5204                 if (ret > 0)
5205                         goto out;
5206                 if (ret < 0) {
5207                         err = ret;
5208                         goto out;
5209                 }
5210         }
5211
5212 out:
5213         btrfs_release_path(&path);
5214         return err;
5215 }
5216
5217 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5218 {
5219         struct list_head *cur = rec->backrefs.next;
5220         struct extent_backref *back;
5221         struct tree_backref *tback;
5222         struct data_backref *dback;
5223         u64 found = 0;
5224         int err = 0;
5225
5226         while(cur != &rec->backrefs) {
5227                 back = to_extent_backref(cur);
5228                 cur = cur->next;
5229                 if (!back->found_extent_tree) {
5230                         err = 1;
5231                         if (!print_errs)
5232                                 goto out;
5233                         if (back->is_data) {
5234                                 dback = to_data_backref(back);
5235                                 fprintf(stderr, "Backref %llu %s %llu"
5236                                         " owner %llu offset %llu num_refs %lu"
5237                                         " not found in extent tree\n",
5238                                         (unsigned long long)rec->start,
5239                                         back->full_backref ?
5240                                         "parent" : "root",
5241                                         back->full_backref ?
5242                                         (unsigned long long)dback->parent:
5243                                         (unsigned long long)dback->root,
5244                                         (unsigned long long)dback->owner,
5245                                         (unsigned long long)dback->offset,
5246                                         (unsigned long)dback->num_refs);
5247                         } else {
5248                                 tback = to_tree_backref(back);
5249                                 fprintf(stderr, "Backref %llu parent %llu"
5250                                         " root %llu not found in extent tree\n",
5251                                         (unsigned long long)rec->start,
5252                                         (unsigned long long)tback->parent,
5253                                         (unsigned long long)tback->root);
5254                         }
5255                 }
5256                 if (!back->is_data && !back->found_ref) {
5257                         err = 1;
5258                         if (!print_errs)
5259                                 goto out;
5260                         tback = to_tree_backref(back);
5261                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5262                                 (unsigned long long)rec->start,
5263                                 back->full_backref ? "parent" : "root",
5264                                 back->full_backref ?
5265                                 (unsigned long long)tback->parent :
5266                                 (unsigned long long)tback->root, back);
5267                 }
5268                 if (back->is_data) {
5269                         dback = to_data_backref(back);
5270                         if (dback->found_ref != dback->num_refs) {
5271                                 err = 1;
5272                                 if (!print_errs)
5273                                         goto out;
5274                                 fprintf(stderr, "Incorrect local backref count"
5275                                         " on %llu %s %llu owner %llu"
5276                                         " offset %llu found %u wanted %u back %p\n",
5277                                         (unsigned long long)rec->start,
5278                                         back->full_backref ?
5279                                         "parent" : "root",
5280                                         back->full_backref ?
5281                                         (unsigned long long)dback->parent:
5282                                         (unsigned long long)dback->root,
5283                                         (unsigned long long)dback->owner,
5284                                         (unsigned long long)dback->offset,
5285                                         dback->found_ref, dback->num_refs, back);
5286                         }
5287                         if (dback->disk_bytenr != rec->start) {
5288                                 err = 1;
5289                                 if (!print_errs)
5290                                         goto out;
5291                                 fprintf(stderr, "Backref disk bytenr does not"
5292                                         " match extent record, bytenr=%llu, "
5293                                         "ref bytenr=%llu\n",
5294                                         (unsigned long long)rec->start,
5295                                         (unsigned long long)dback->disk_bytenr);
5296                         }
5297
5298                         if (dback->bytes != rec->nr) {
5299                                 err = 1;
5300                                 if (!print_errs)
5301                                         goto out;
5302                                 fprintf(stderr, "Backref bytes do not match "
5303                                         "extent backref, bytenr=%llu, ref "
5304                                         "bytes=%llu, backref bytes=%llu\n",
5305                                         (unsigned long long)rec->start,
5306                                         (unsigned long long)rec->nr,
5307                                         (unsigned long long)dback->bytes);
5308                         }
5309                 }
5310                 if (!back->is_data) {
5311                         found += 1;
5312                 } else {
5313                         dback = to_data_backref(back);
5314                         found += dback->found_ref;
5315                 }
5316         }
5317         if (found != rec->refs) {
5318                 err = 1;
5319                 if (!print_errs)
5320                         goto out;
5321                 fprintf(stderr, "Incorrect global backref count "
5322                         "on %llu found %llu wanted %llu\n",
5323                         (unsigned long long)rec->start,
5324                         (unsigned long long)found,
5325                         (unsigned long long)rec->refs);
5326         }
5327 out:
5328         return err;
5329 }
5330
5331 static int free_all_extent_backrefs(struct extent_record *rec)
5332 {
5333         struct extent_backref *back;
5334         struct list_head *cur;
5335         while (!list_empty(&rec->backrefs)) {
5336                 cur = rec->backrefs.next;
5337                 back = to_extent_backref(cur);
5338                 list_del(cur);
5339                 free(back);
5340         }
5341         return 0;
5342 }
5343
5344 static void free_extent_record_cache(struct cache_tree *extent_cache)
5345 {
5346         struct cache_extent *cache;
5347         struct extent_record *rec;
5348
5349         while (1) {
5350                 cache = first_cache_extent(extent_cache);
5351                 if (!cache)
5352                         break;
5353                 rec = container_of(cache, struct extent_record, cache);
5354                 remove_cache_extent(extent_cache, cache);
5355                 free_all_extent_backrefs(rec);
5356                 free(rec);
5357         }
5358 }
5359
5360 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5361                                  struct extent_record *rec)
5362 {
5363         if (rec->content_checked && rec->owner_ref_checked &&
5364             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5365             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5366             !rec->bad_full_backref && !rec->crossing_stripes &&
5367             !rec->wrong_chunk_type) {
5368                 remove_cache_extent(extent_cache, &rec->cache);
5369                 free_all_extent_backrefs(rec);
5370                 list_del_init(&rec->list);
5371                 free(rec);
5372         }
5373         return 0;
5374 }
5375
5376 static int check_owner_ref(struct btrfs_root *root,
5377                             struct extent_record *rec,
5378                             struct extent_buffer *buf)
5379 {
5380         struct extent_backref *node;
5381         struct tree_backref *back;
5382         struct btrfs_root *ref_root;
5383         struct btrfs_key key;
5384         struct btrfs_path path;
5385         struct extent_buffer *parent;
5386         int level;
5387         int found = 0;
5388         int ret;
5389
5390         list_for_each_entry(node, &rec->backrefs, list) {
5391                 if (node->is_data)
5392                         continue;
5393                 if (!node->found_ref)
5394                         continue;
5395                 if (node->full_backref)
5396                         continue;
5397                 back = to_tree_backref(node);
5398                 if (btrfs_header_owner(buf) == back->root)
5399                         return 0;
5400         }
5401         BUG_ON(rec->is_root);
5402
5403         /* try to find the block by search corresponding fs tree */
5404         key.objectid = btrfs_header_owner(buf);
5405         key.type = BTRFS_ROOT_ITEM_KEY;
5406         key.offset = (u64)-1;
5407
5408         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5409         if (IS_ERR(ref_root))
5410                 return 1;
5411
5412         level = btrfs_header_level(buf);
5413         if (level == 0)
5414                 btrfs_item_key_to_cpu(buf, &key, 0);
5415         else
5416                 btrfs_node_key_to_cpu(buf, &key, 0);
5417
5418         btrfs_init_path(&path);
5419         path.lowest_level = level + 1;
5420         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5421         if (ret < 0)
5422                 return 0;
5423
5424         parent = path.nodes[level + 1];
5425         if (parent && buf->start == btrfs_node_blockptr(parent,
5426                                                         path.slots[level + 1]))
5427                 found = 1;
5428
5429         btrfs_release_path(&path);
5430         return found ? 0 : 1;
5431 }
5432
5433 static int is_extent_tree_record(struct extent_record *rec)
5434 {
5435         struct list_head *cur = rec->backrefs.next;
5436         struct extent_backref *node;
5437         struct tree_backref *back;
5438         int is_extent = 0;
5439
5440         while(cur != &rec->backrefs) {
5441                 node = to_extent_backref(cur);
5442                 cur = cur->next;
5443                 if (node->is_data)
5444                         return 0;
5445                 back = to_tree_backref(node);
5446                 if (node->full_backref)
5447                         return 0;
5448                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5449                         is_extent = 1;
5450         }
5451         return is_extent;
5452 }
5453
5454
5455 static int record_bad_block_io(struct btrfs_fs_info *info,
5456                                struct cache_tree *extent_cache,
5457                                u64 start, u64 len)
5458 {
5459         struct extent_record *rec;
5460         struct cache_extent *cache;
5461         struct btrfs_key key;
5462
5463         cache = lookup_cache_extent(extent_cache, start, len);
5464         if (!cache)
5465                 return 0;
5466
5467         rec = container_of(cache, struct extent_record, cache);
5468         if (!is_extent_tree_record(rec))
5469                 return 0;
5470
5471         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5472         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5473 }
5474
5475 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5476                        struct extent_buffer *buf, int slot)
5477 {
5478         if (btrfs_header_level(buf)) {
5479                 struct btrfs_key_ptr ptr1, ptr2;
5480
5481                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5482                                    sizeof(struct btrfs_key_ptr));
5483                 read_extent_buffer(buf, &ptr2,
5484                                    btrfs_node_key_ptr_offset(slot + 1),
5485                                    sizeof(struct btrfs_key_ptr));
5486                 write_extent_buffer(buf, &ptr1,
5487                                     btrfs_node_key_ptr_offset(slot + 1),
5488                                     sizeof(struct btrfs_key_ptr));
5489                 write_extent_buffer(buf, &ptr2,
5490                                     btrfs_node_key_ptr_offset(slot),
5491                                     sizeof(struct btrfs_key_ptr));
5492                 if (slot == 0) {
5493                         struct btrfs_disk_key key;
5494                         btrfs_node_key(buf, &key, 0);
5495                         btrfs_fixup_low_keys(root, path, &key,
5496                                              btrfs_header_level(buf) + 1);
5497                 }
5498         } else {
5499                 struct btrfs_item *item1, *item2;
5500                 struct btrfs_key k1, k2;
5501                 char *item1_data, *item2_data;
5502                 u32 item1_offset, item2_offset, item1_size, item2_size;
5503
5504                 item1 = btrfs_item_nr(slot);
5505                 item2 = btrfs_item_nr(slot + 1);
5506                 btrfs_item_key_to_cpu(buf, &k1, slot);
5507                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5508                 item1_offset = btrfs_item_offset(buf, item1);
5509                 item2_offset = btrfs_item_offset(buf, item2);
5510                 item1_size = btrfs_item_size(buf, item1);
5511                 item2_size = btrfs_item_size(buf, item2);
5512
5513                 item1_data = malloc(item1_size);
5514                 if (!item1_data)
5515                         return -ENOMEM;
5516                 item2_data = malloc(item2_size);
5517                 if (!item2_data) {
5518                         free(item1_data);
5519                         return -ENOMEM;
5520                 }
5521
5522                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5523                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5524
5525                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5526                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5527                 free(item1_data);
5528                 free(item2_data);
5529
5530                 btrfs_set_item_offset(buf, item1, item2_offset);
5531                 btrfs_set_item_offset(buf, item2, item1_offset);
5532                 btrfs_set_item_size(buf, item1, item2_size);
5533                 btrfs_set_item_size(buf, item2, item1_size);
5534
5535                 path->slots[0] = slot;
5536                 btrfs_set_item_key_unsafe(root, path, &k2);
5537                 path->slots[0] = slot + 1;
5538                 btrfs_set_item_key_unsafe(root, path, &k1);
5539         }
5540         return 0;
5541 }
5542
5543 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5544 {
5545         struct extent_buffer *buf;
5546         struct btrfs_key k1, k2;
5547         int i;
5548         int level = path->lowest_level;
5549         int ret = -EIO;
5550
5551         buf = path->nodes[level];
5552         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5553                 if (level) {
5554                         btrfs_node_key_to_cpu(buf, &k1, i);
5555                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5556                 } else {
5557                         btrfs_item_key_to_cpu(buf, &k1, i);
5558                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5559                 }
5560                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5561                         continue;
5562                 ret = swap_values(root, path, buf, i);
5563                 if (ret)
5564                         break;
5565                 btrfs_mark_buffer_dirty(buf);
5566                 i = 0;
5567         }
5568         return ret;
5569 }
5570
5571 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5572                              struct btrfs_root *root,
5573                              struct btrfs_path *path,
5574                              struct extent_buffer *buf, int slot)
5575 {
5576         struct btrfs_key key;
5577         int nritems = btrfs_header_nritems(buf);
5578
5579         btrfs_item_key_to_cpu(buf, &key, slot);
5580
5581         /* These are all the keys we can deal with missing. */
5582         if (key.type != BTRFS_DIR_INDEX_KEY &&
5583             key.type != BTRFS_EXTENT_ITEM_KEY &&
5584             key.type != BTRFS_METADATA_ITEM_KEY &&
5585             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5586             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5587                 return -1;
5588
5589         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5590                (unsigned long long)key.objectid, key.type,
5591                (unsigned long long)key.offset, slot, buf->start);
5592         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5593                               btrfs_item_nr_offset(slot + 1),
5594                               sizeof(struct btrfs_item) *
5595                               (nritems - slot - 1));
5596         btrfs_set_header_nritems(buf, nritems - 1);
5597         if (slot == 0) {
5598                 struct btrfs_disk_key disk_key;
5599
5600                 btrfs_item_key(buf, &disk_key, 0);
5601                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5602         }
5603         btrfs_mark_buffer_dirty(buf);
5604         return 0;
5605 }
5606
5607 static int fix_item_offset(struct btrfs_trans_handle *trans,
5608                            struct btrfs_root *root,
5609                            struct btrfs_path *path)
5610 {
5611         struct extent_buffer *buf;
5612         int i;
5613         int ret = 0;
5614
5615         /* We should only get this for leaves */
5616         BUG_ON(path->lowest_level);
5617         buf = path->nodes[0];
5618 again:
5619         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5620                 unsigned int shift = 0, offset;
5621
5622                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5623                     BTRFS_LEAF_DATA_SIZE(root)) {
5624                         if (btrfs_item_end_nr(buf, i) >
5625                             BTRFS_LEAF_DATA_SIZE(root)) {
5626                                 ret = delete_bogus_item(trans, root, path,
5627                                                         buf, i);
5628                                 if (!ret)
5629                                         goto again;
5630                                 fprintf(stderr, "item is off the end of the "
5631                                         "leaf, can't fix\n");
5632                                 ret = -EIO;
5633                                 break;
5634                         }
5635                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5636                                 btrfs_item_end_nr(buf, i);
5637                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5638                            btrfs_item_offset_nr(buf, i - 1)) {
5639                         if (btrfs_item_end_nr(buf, i) >
5640                             btrfs_item_offset_nr(buf, i - 1)) {
5641                                 ret = delete_bogus_item(trans, root, path,
5642                                                         buf, i);
5643                                 if (!ret)
5644                                         goto again;
5645                                 fprintf(stderr, "items overlap, can't fix\n");
5646                                 ret = -EIO;
5647                                 break;
5648                         }
5649                         shift = btrfs_item_offset_nr(buf, i - 1) -
5650                                 btrfs_item_end_nr(buf, i);
5651                 }
5652                 if (!shift)
5653                         continue;
5654
5655                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5656                        i, shift, (unsigned long long)buf->start);
5657                 offset = btrfs_item_offset_nr(buf, i);
5658                 memmove_extent_buffer(buf,
5659                                       btrfs_leaf_data(buf) + offset + shift,
5660                                       btrfs_leaf_data(buf) + offset,
5661                                       btrfs_item_size_nr(buf, i));
5662                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5663                                       offset + shift);
5664                 btrfs_mark_buffer_dirty(buf);
5665         }
5666
5667         /*
5668          * We may have moved things, in which case we want to exit so we don't
5669          * write those changes out.  Once we have proper abort functionality in
5670          * progs this can be changed to something nicer.
5671          */
5672         BUG_ON(ret);
5673         return ret;
5674 }
5675
5676 /*
5677  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5678  * then just return -EIO.
5679  */
5680 static int try_to_fix_bad_block(struct btrfs_root *root,
5681                                 struct extent_buffer *buf,
5682                                 enum btrfs_tree_block_status status)
5683 {
5684         struct btrfs_trans_handle *trans;
5685         struct ulist *roots;
5686         struct ulist_node *node;
5687         struct btrfs_root *search_root;
5688         struct btrfs_path path;
5689         struct ulist_iterator iter;
5690         struct btrfs_key root_key, key;
5691         int ret;
5692
5693         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5694             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5695                 return -EIO;
5696
5697         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5698         if (ret)
5699                 return -EIO;
5700
5701         btrfs_init_path(&path);
5702         ULIST_ITER_INIT(&iter);
5703         while ((node = ulist_next(roots, &iter))) {
5704                 root_key.objectid = node->val;
5705                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5706                 root_key.offset = (u64)-1;
5707
5708                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5709                 if (IS_ERR(root)) {
5710                         ret = -EIO;
5711                         break;
5712                 }
5713
5714
5715                 trans = btrfs_start_transaction(search_root, 0);
5716                 if (IS_ERR(trans)) {
5717                         ret = PTR_ERR(trans);
5718                         break;
5719                 }
5720
5721                 path.lowest_level = btrfs_header_level(buf);
5722                 path.skip_check_block = 1;
5723                 if (path.lowest_level)
5724                         btrfs_node_key_to_cpu(buf, &key, 0);
5725                 else
5726                         btrfs_item_key_to_cpu(buf, &key, 0);
5727                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5728                 if (ret) {
5729                         ret = -EIO;
5730                         btrfs_commit_transaction(trans, search_root);
5731                         break;
5732                 }
5733                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5734                         ret = fix_key_order(search_root, &path);
5735                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5736                         ret = fix_item_offset(trans, search_root, &path);
5737                 if (ret) {
5738                         btrfs_commit_transaction(trans, search_root);
5739                         break;
5740                 }
5741                 btrfs_release_path(&path);
5742                 btrfs_commit_transaction(trans, search_root);
5743         }
5744         ulist_free(roots);
5745         btrfs_release_path(&path);
5746         return ret;
5747 }
5748
5749 static int check_block(struct btrfs_root *root,
5750                        struct cache_tree *extent_cache,
5751                        struct extent_buffer *buf, u64 flags)
5752 {
5753         struct extent_record *rec;
5754         struct cache_extent *cache;
5755         struct btrfs_key key;
5756         enum btrfs_tree_block_status status;
5757         int ret = 0;
5758         int level;
5759
5760         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5761         if (!cache)
5762                 return 1;
5763         rec = container_of(cache, struct extent_record, cache);
5764         rec->generation = btrfs_header_generation(buf);
5765
5766         level = btrfs_header_level(buf);
5767         if (btrfs_header_nritems(buf) > 0) {
5768
5769                 if (level == 0)
5770                         btrfs_item_key_to_cpu(buf, &key, 0);
5771                 else
5772                         btrfs_node_key_to_cpu(buf, &key, 0);
5773
5774                 rec->info_objectid = key.objectid;
5775         }
5776         rec->info_level = level;
5777
5778         if (btrfs_is_leaf(buf))
5779                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5780         else
5781                 status = btrfs_check_node(root, &rec->parent_key, buf);
5782
5783         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5784                 if (repair)
5785                         status = try_to_fix_bad_block(root, buf, status);
5786                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5787                         ret = -EIO;
5788                         fprintf(stderr, "bad block %llu\n",
5789                                 (unsigned long long)buf->start);
5790                 } else {
5791                         /*
5792                          * Signal to callers we need to start the scan over
5793                          * again since we'll have cowed blocks.
5794                          */
5795                         ret = -EAGAIN;
5796                 }
5797         } else {
5798                 rec->content_checked = 1;
5799                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5800                         rec->owner_ref_checked = 1;
5801                 else {
5802                         ret = check_owner_ref(root, rec, buf);
5803                         if (!ret)
5804                                 rec->owner_ref_checked = 1;
5805                 }
5806         }
5807         if (!ret)
5808                 maybe_free_extent_rec(extent_cache, rec);
5809         return ret;
5810 }
5811
5812 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5813                                                 u64 parent, u64 root)
5814 {
5815         struct list_head *cur = rec->backrefs.next;
5816         struct extent_backref *node;
5817         struct tree_backref *back;
5818
5819         while(cur != &rec->backrefs) {
5820                 node = to_extent_backref(cur);
5821                 cur = cur->next;
5822                 if (node->is_data)
5823                         continue;
5824                 back = to_tree_backref(node);
5825                 if (parent > 0) {
5826                         if (!node->full_backref)
5827                                 continue;
5828                         if (parent == back->parent)
5829                                 return back;
5830                 } else {
5831                         if (node->full_backref)
5832                                 continue;
5833                         if (back->root == root)
5834                                 return back;
5835                 }
5836         }
5837         return NULL;
5838 }
5839
5840 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5841                                                 u64 parent, u64 root)
5842 {
5843         struct tree_backref *ref = malloc(sizeof(*ref));
5844
5845         if (!ref)
5846                 return NULL;
5847         memset(&ref->node, 0, sizeof(ref->node));
5848         if (parent > 0) {
5849                 ref->parent = parent;
5850                 ref->node.full_backref = 1;
5851         } else {
5852                 ref->root = root;
5853                 ref->node.full_backref = 0;
5854         }
5855         list_add_tail(&ref->node.list, &rec->backrefs);
5856
5857         return ref;
5858 }
5859
5860 static struct data_backref *find_data_backref(struct extent_record *rec,
5861                                                 u64 parent, u64 root,
5862                                                 u64 owner, u64 offset,
5863                                                 int found_ref,
5864                                                 u64 disk_bytenr, u64 bytes)
5865 {
5866         struct list_head *cur = rec->backrefs.next;
5867         struct extent_backref *node;
5868         struct data_backref *back;
5869
5870         while(cur != &rec->backrefs) {
5871                 node = to_extent_backref(cur);
5872                 cur = cur->next;
5873                 if (!node->is_data)
5874                         continue;
5875                 back = to_data_backref(node);
5876                 if (parent > 0) {
5877                         if (!node->full_backref)
5878                                 continue;
5879                         if (parent == back->parent)
5880                                 return back;
5881                 } else {
5882                         if (node->full_backref)
5883                                 continue;
5884                         if (back->root == root && back->owner == owner &&
5885                             back->offset == offset) {
5886                                 if (found_ref && node->found_ref &&
5887                                     (back->bytes != bytes ||
5888                                     back->disk_bytenr != disk_bytenr))
5889                                         continue;
5890                                 return back;
5891                         }
5892                 }
5893         }
5894         return NULL;
5895 }
5896
5897 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5898                                                 u64 parent, u64 root,
5899                                                 u64 owner, u64 offset,
5900                                                 u64 max_size)
5901 {
5902         struct data_backref *ref = malloc(sizeof(*ref));
5903
5904         if (!ref)
5905                 return NULL;
5906         memset(&ref->node, 0, sizeof(ref->node));
5907         ref->node.is_data = 1;
5908
5909         if (parent > 0) {
5910                 ref->parent = parent;
5911                 ref->owner = 0;
5912                 ref->offset = 0;
5913                 ref->node.full_backref = 1;
5914         } else {
5915                 ref->root = root;
5916                 ref->owner = owner;
5917                 ref->offset = offset;
5918                 ref->node.full_backref = 0;
5919         }
5920         ref->bytes = max_size;
5921         ref->found_ref = 0;
5922         ref->num_refs = 0;
5923         list_add_tail(&ref->node.list, &rec->backrefs);
5924         if (max_size > rec->max_size)
5925                 rec->max_size = max_size;
5926         return ref;
5927 }
5928
5929 /* Check if the type of extent matches with its chunk */
5930 static void check_extent_type(struct extent_record *rec)
5931 {
5932         struct btrfs_block_group_cache *bg_cache;
5933
5934         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5935         if (!bg_cache)
5936                 return;
5937
5938         /* data extent, check chunk directly*/
5939         if (!rec->metadata) {
5940                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5941                         rec->wrong_chunk_type = 1;
5942                 return;
5943         }
5944
5945         /* metadata extent, check the obvious case first */
5946         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5947                                  BTRFS_BLOCK_GROUP_METADATA))) {
5948                 rec->wrong_chunk_type = 1;
5949                 return;
5950         }
5951
5952         /*
5953          * Check SYSTEM extent, as it's also marked as metadata, we can only
5954          * make sure it's a SYSTEM extent by its backref
5955          */
5956         if (!list_empty(&rec->backrefs)) {
5957                 struct extent_backref *node;
5958                 struct tree_backref *tback;
5959                 u64 bg_type;
5960
5961                 node = to_extent_backref(rec->backrefs.next);
5962                 if (node->is_data) {
5963                         /* tree block shouldn't have data backref */
5964                         rec->wrong_chunk_type = 1;
5965                         return;
5966                 }
5967                 tback = container_of(node, struct tree_backref, node);
5968
5969                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5970                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5971                 else
5972                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5973                 if (!(bg_cache->flags & bg_type))
5974                         rec->wrong_chunk_type = 1;
5975         }
5976 }
5977
5978 /*
5979  * Allocate a new extent record, fill default values from @tmpl and insert int
5980  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5981  * the cache, otherwise it fails.
5982  */
5983 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5984                 struct extent_record *tmpl)
5985 {
5986         struct extent_record *rec;
5987         int ret = 0;
5988
5989         rec = malloc(sizeof(*rec));
5990         if (!rec)
5991                 return -ENOMEM;
5992         rec->start = tmpl->start;
5993         rec->max_size = tmpl->max_size;
5994         rec->nr = max(tmpl->nr, tmpl->max_size);
5995         rec->found_rec = tmpl->found_rec;
5996         rec->content_checked = tmpl->content_checked;
5997         rec->owner_ref_checked = tmpl->owner_ref_checked;
5998         rec->num_duplicates = 0;
5999         rec->metadata = tmpl->metadata;
6000         rec->flag_block_full_backref = FLAG_UNSET;
6001         rec->bad_full_backref = 0;
6002         rec->crossing_stripes = 0;
6003         rec->wrong_chunk_type = 0;
6004         rec->is_root = tmpl->is_root;
6005         rec->refs = tmpl->refs;
6006         rec->extent_item_refs = tmpl->extent_item_refs;
6007         rec->parent_generation = tmpl->parent_generation;
6008         INIT_LIST_HEAD(&rec->backrefs);
6009         INIT_LIST_HEAD(&rec->dups);
6010         INIT_LIST_HEAD(&rec->list);
6011         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6012         rec->cache.start = tmpl->start;
6013         rec->cache.size = tmpl->nr;
6014         ret = insert_cache_extent(extent_cache, &rec->cache);
6015         if (ret) {
6016                 free(rec);
6017                 return ret;
6018         }
6019         bytes_used += rec->nr;
6020
6021         if (tmpl->metadata)
6022                 rec->crossing_stripes = check_crossing_stripes(global_info,
6023                                 rec->start, global_info->tree_root->nodesize);
6024         check_extent_type(rec);
6025         return ret;
6026 }
6027
6028 /*
6029  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6030  * some are hints:
6031  * - refs              - if found, increase refs
6032  * - is_root           - if found, set
6033  * - content_checked   - if found, set
6034  * - owner_ref_checked - if found, set
6035  *
6036  * If not found, create a new one, initialize and insert.
6037  */
6038 static int add_extent_rec(struct cache_tree *extent_cache,
6039                 struct extent_record *tmpl)
6040 {
6041         struct extent_record *rec;
6042         struct cache_extent *cache;
6043         int ret = 0;
6044         int dup = 0;
6045
6046         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6047         if (cache) {
6048                 rec = container_of(cache, struct extent_record, cache);
6049                 if (tmpl->refs)
6050                         rec->refs++;
6051                 if (rec->nr == 1)
6052                         rec->nr = max(tmpl->nr, tmpl->max_size);
6053
6054                 /*
6055                  * We need to make sure to reset nr to whatever the extent
6056                  * record says was the real size, this way we can compare it to
6057                  * the backrefs.
6058                  */
6059                 if (tmpl->found_rec) {
6060                         if (tmpl->start != rec->start || rec->found_rec) {
6061                                 struct extent_record *tmp;
6062
6063                                 dup = 1;
6064                                 if (list_empty(&rec->list))
6065                                         list_add_tail(&rec->list,
6066                                                       &duplicate_extents);
6067
6068                                 /*
6069                                  * We have to do this song and dance in case we
6070                                  * find an extent record that falls inside of
6071                                  * our current extent record but does not have
6072                                  * the same objectid.
6073                                  */
6074                                 tmp = malloc(sizeof(*tmp));
6075                                 if (!tmp)
6076                                         return -ENOMEM;
6077                                 tmp->start = tmpl->start;
6078                                 tmp->max_size = tmpl->max_size;
6079                                 tmp->nr = tmpl->nr;
6080                                 tmp->found_rec = 1;
6081                                 tmp->metadata = tmpl->metadata;
6082                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6083                                 INIT_LIST_HEAD(&tmp->list);
6084                                 list_add_tail(&tmp->list, &rec->dups);
6085                                 rec->num_duplicates++;
6086                         } else {
6087                                 rec->nr = tmpl->nr;
6088                                 rec->found_rec = 1;
6089                         }
6090                 }
6091
6092                 if (tmpl->extent_item_refs && !dup) {
6093                         if (rec->extent_item_refs) {
6094                                 fprintf(stderr, "block %llu rec "
6095                                         "extent_item_refs %llu, passed %llu\n",
6096                                         (unsigned long long)tmpl->start,
6097                                         (unsigned long long)
6098                                                         rec->extent_item_refs,
6099                                         (unsigned long long)tmpl->extent_item_refs);
6100                         }
6101                         rec->extent_item_refs = tmpl->extent_item_refs;
6102                 }
6103                 if (tmpl->is_root)
6104                         rec->is_root = 1;
6105                 if (tmpl->content_checked)
6106                         rec->content_checked = 1;
6107                 if (tmpl->owner_ref_checked)
6108                         rec->owner_ref_checked = 1;
6109                 memcpy(&rec->parent_key, &tmpl->parent_key,
6110                                 sizeof(tmpl->parent_key));
6111                 if (tmpl->parent_generation)
6112                         rec->parent_generation = tmpl->parent_generation;
6113                 if (rec->max_size < tmpl->max_size)
6114                         rec->max_size = tmpl->max_size;
6115
6116                 /*
6117                  * A metadata extent can't cross stripe_len boundary, otherwise
6118                  * kernel scrub won't be able to handle it.
6119                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6120                  * it.
6121                  */
6122                 if (tmpl->metadata)
6123                         rec->crossing_stripes = check_crossing_stripes(
6124                                         global_info, rec->start,
6125                                         global_info->tree_root->nodesize);
6126                 check_extent_type(rec);
6127                 maybe_free_extent_rec(extent_cache, rec);
6128                 return ret;
6129         }
6130
6131         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6132
6133         return ret;
6134 }
6135
6136 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6137                             u64 parent, u64 root, int found_ref)
6138 {
6139         struct extent_record *rec;
6140         struct tree_backref *back;
6141         struct cache_extent *cache;
6142         int ret;
6143
6144         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6145         if (!cache) {
6146                 struct extent_record tmpl;
6147
6148                 memset(&tmpl, 0, sizeof(tmpl));
6149                 tmpl.start = bytenr;
6150                 tmpl.nr = 1;
6151                 tmpl.metadata = 1;
6152
6153                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6154                 if (ret)
6155                         return ret;
6156
6157                 /* really a bug in cache_extent implement now */
6158                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6159                 if (!cache)
6160                         return -ENOENT;
6161         }
6162
6163         rec = container_of(cache, struct extent_record, cache);
6164         if (rec->start != bytenr) {
6165                 /*
6166                  * Several cause, from unaligned bytenr to over lapping extents
6167                  */
6168                 return -EEXIST;
6169         }
6170
6171         back = find_tree_backref(rec, parent, root);
6172         if (!back) {
6173                 back = alloc_tree_backref(rec, parent, root);
6174                 if (!back)
6175                         return -ENOMEM;
6176         }
6177
6178         if (found_ref) {
6179                 if (back->node.found_ref) {
6180                         fprintf(stderr, "Extent back ref already exists "
6181                                 "for %llu parent %llu root %llu \n",
6182                                 (unsigned long long)bytenr,
6183                                 (unsigned long long)parent,
6184                                 (unsigned long long)root);
6185                 }
6186                 back->node.found_ref = 1;
6187         } else {
6188                 if (back->node.found_extent_tree) {
6189                         fprintf(stderr, "Extent back ref already exists "
6190                                 "for %llu parent %llu root %llu \n",
6191                                 (unsigned long long)bytenr,
6192                                 (unsigned long long)parent,
6193                                 (unsigned long long)root);
6194                 }
6195                 back->node.found_extent_tree = 1;
6196         }
6197         check_extent_type(rec);
6198         maybe_free_extent_rec(extent_cache, rec);
6199         return 0;
6200 }
6201
6202 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6203                             u64 parent, u64 root, u64 owner, u64 offset,
6204                             u32 num_refs, int found_ref, u64 max_size)
6205 {
6206         struct extent_record *rec;
6207         struct data_backref *back;
6208         struct cache_extent *cache;
6209         int ret;
6210
6211         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6212         if (!cache) {
6213                 struct extent_record tmpl;
6214
6215                 memset(&tmpl, 0, sizeof(tmpl));
6216                 tmpl.start = bytenr;
6217                 tmpl.nr = 1;
6218                 tmpl.max_size = max_size;
6219
6220                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6221                 if (ret)
6222                         return ret;
6223
6224                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6225                 if (!cache)
6226                         abort();
6227         }
6228
6229         rec = container_of(cache, struct extent_record, cache);
6230         if (rec->max_size < max_size)
6231                 rec->max_size = max_size;
6232
6233         /*
6234          * If found_ref is set then max_size is the real size and must match the
6235          * existing refs.  So if we have already found a ref then we need to
6236          * make sure that this ref matches the existing one, otherwise we need
6237          * to add a new backref so we can notice that the backrefs don't match
6238          * and we need to figure out who is telling the truth.  This is to
6239          * account for that awful fsync bug I introduced where we'd end up with
6240          * a btrfs_file_extent_item that would have its length include multiple
6241          * prealloc extents or point inside of a prealloc extent.
6242          */
6243         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6244                                  bytenr, max_size);
6245         if (!back) {
6246                 back = alloc_data_backref(rec, parent, root, owner, offset,
6247                                           max_size);
6248                 BUG_ON(!back);
6249         }
6250
6251         if (found_ref) {
6252                 BUG_ON(num_refs != 1);
6253                 if (back->node.found_ref)
6254                         BUG_ON(back->bytes != max_size);
6255                 back->node.found_ref = 1;
6256                 back->found_ref += 1;
6257                 back->bytes = max_size;
6258                 back->disk_bytenr = bytenr;
6259                 rec->refs += 1;
6260                 rec->content_checked = 1;
6261                 rec->owner_ref_checked = 1;
6262         } else {
6263                 if (back->node.found_extent_tree) {
6264                         fprintf(stderr, "Extent back ref already exists "
6265                                 "for %llu parent %llu root %llu "
6266                                 "owner %llu offset %llu num_refs %lu\n",
6267                                 (unsigned long long)bytenr,
6268                                 (unsigned long long)parent,
6269                                 (unsigned long long)root,
6270                                 (unsigned long long)owner,
6271                                 (unsigned long long)offset,
6272                                 (unsigned long)num_refs);
6273                 }
6274                 back->num_refs = num_refs;
6275                 back->node.found_extent_tree = 1;
6276         }
6277         maybe_free_extent_rec(extent_cache, rec);
6278         return 0;
6279 }
6280
6281 static int add_pending(struct cache_tree *pending,
6282                        struct cache_tree *seen, u64 bytenr, u32 size)
6283 {
6284         int ret;
6285         ret = add_cache_extent(seen, bytenr, size);
6286         if (ret)
6287                 return ret;
6288         add_cache_extent(pending, bytenr, size);
6289         return 0;
6290 }
6291
6292 static int pick_next_pending(struct cache_tree *pending,
6293                         struct cache_tree *reada,
6294                         struct cache_tree *nodes,
6295                         u64 last, struct block_info *bits, int bits_nr,
6296                         int *reada_bits)
6297 {
6298         unsigned long node_start = last;
6299         struct cache_extent *cache;
6300         int ret;
6301
6302         cache = search_cache_extent(reada, 0);
6303         if (cache) {
6304                 bits[0].start = cache->start;
6305                 bits[0].size = cache->size;
6306                 *reada_bits = 1;
6307                 return 1;
6308         }
6309         *reada_bits = 0;
6310         if (node_start > 32768)
6311                 node_start -= 32768;
6312
6313         cache = search_cache_extent(nodes, node_start);
6314         if (!cache)
6315                 cache = search_cache_extent(nodes, 0);
6316
6317         if (!cache) {
6318                  cache = search_cache_extent(pending, 0);
6319                  if (!cache)
6320                          return 0;
6321                  ret = 0;
6322                  do {
6323                          bits[ret].start = cache->start;
6324                          bits[ret].size = cache->size;
6325                          cache = next_cache_extent(cache);
6326                          ret++;
6327                  } while (cache && ret < bits_nr);
6328                  return ret;
6329         }
6330
6331         ret = 0;
6332         do {
6333                 bits[ret].start = cache->start;
6334                 bits[ret].size = cache->size;
6335                 cache = next_cache_extent(cache);
6336                 ret++;
6337         } while (cache && ret < bits_nr);
6338
6339         if (bits_nr - ret > 8) {
6340                 u64 lookup = bits[0].start + bits[0].size;
6341                 struct cache_extent *next;
6342                 next = search_cache_extent(pending, lookup);
6343                 while(next) {
6344                         if (next->start - lookup > 32768)
6345                                 break;
6346                         bits[ret].start = next->start;
6347                         bits[ret].size = next->size;
6348                         lookup = next->start + next->size;
6349                         ret++;
6350                         if (ret == bits_nr)
6351                                 break;
6352                         next = next_cache_extent(next);
6353                         if (!next)
6354                                 break;
6355                 }
6356         }
6357         return ret;
6358 }
6359
6360 static void free_chunk_record(struct cache_extent *cache)
6361 {
6362         struct chunk_record *rec;
6363
6364         rec = container_of(cache, struct chunk_record, cache);
6365         list_del_init(&rec->list);
6366         list_del_init(&rec->dextents);
6367         free(rec);
6368 }
6369
6370 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6371 {
6372         cache_tree_free_extents(chunk_cache, free_chunk_record);
6373 }
6374
6375 static void free_device_record(struct rb_node *node)
6376 {
6377         struct device_record *rec;
6378
6379         rec = container_of(node, struct device_record, node);
6380         free(rec);
6381 }
6382
6383 FREE_RB_BASED_TREE(device_cache, free_device_record);
6384
6385 int insert_block_group_record(struct block_group_tree *tree,
6386                               struct block_group_record *bg_rec)
6387 {
6388         int ret;
6389
6390         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6391         if (ret)
6392                 return ret;
6393
6394         list_add_tail(&bg_rec->list, &tree->block_groups);
6395         return 0;
6396 }
6397
6398 static void free_block_group_record(struct cache_extent *cache)
6399 {
6400         struct block_group_record *rec;
6401
6402         rec = container_of(cache, struct block_group_record, cache);
6403         list_del_init(&rec->list);
6404         free(rec);
6405 }
6406
6407 void free_block_group_tree(struct block_group_tree *tree)
6408 {
6409         cache_tree_free_extents(&tree->tree, free_block_group_record);
6410 }
6411
6412 int insert_device_extent_record(struct device_extent_tree *tree,
6413                                 struct device_extent_record *de_rec)
6414 {
6415         int ret;
6416
6417         /*
6418          * Device extent is a bit different from the other extents, because
6419          * the extents which belong to the different devices may have the
6420          * same start and size, so we need use the special extent cache
6421          * search/insert functions.
6422          */
6423         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6424         if (ret)
6425                 return ret;
6426
6427         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6428         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6429         return 0;
6430 }
6431
6432 static void free_device_extent_record(struct cache_extent *cache)
6433 {
6434         struct device_extent_record *rec;
6435
6436         rec = container_of(cache, struct device_extent_record, cache);
6437         if (!list_empty(&rec->chunk_list))
6438                 list_del_init(&rec->chunk_list);
6439         if (!list_empty(&rec->device_list))
6440                 list_del_init(&rec->device_list);
6441         free(rec);
6442 }
6443
6444 void free_device_extent_tree(struct device_extent_tree *tree)
6445 {
6446         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6447 }
6448
6449 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6450 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6451                                  struct extent_buffer *leaf, int slot)
6452 {
6453         struct btrfs_extent_ref_v0 *ref0;
6454         struct btrfs_key key;
6455         int ret;
6456
6457         btrfs_item_key_to_cpu(leaf, &key, slot);
6458         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6459         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6460                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6461                                 0, 0);
6462         } else {
6463                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6464                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6465         }
6466         return ret;
6467 }
6468 #endif
6469
6470 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6471                                             struct btrfs_key *key,
6472                                             int slot)
6473 {
6474         struct btrfs_chunk *ptr;
6475         struct chunk_record *rec;
6476         int num_stripes, i;
6477
6478         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6479         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6480
6481         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6482         if (!rec) {
6483                 fprintf(stderr, "memory allocation failed\n");
6484                 exit(-1);
6485         }
6486
6487         INIT_LIST_HEAD(&rec->list);
6488         INIT_LIST_HEAD(&rec->dextents);
6489         rec->bg_rec = NULL;
6490
6491         rec->cache.start = key->offset;
6492         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6493
6494         rec->generation = btrfs_header_generation(leaf);
6495
6496         rec->objectid = key->objectid;
6497         rec->type = key->type;
6498         rec->offset = key->offset;
6499
6500         rec->length = rec->cache.size;
6501         rec->owner = btrfs_chunk_owner(leaf, ptr);
6502         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6503         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6504         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6505         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6506         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6507         rec->num_stripes = num_stripes;
6508         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6509
6510         for (i = 0; i < rec->num_stripes; ++i) {
6511                 rec->stripes[i].devid =
6512                         btrfs_stripe_devid_nr(leaf, ptr, i);
6513                 rec->stripes[i].offset =
6514                         btrfs_stripe_offset_nr(leaf, ptr, i);
6515                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6516                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6517                                 BTRFS_UUID_SIZE);
6518         }
6519
6520         return rec;
6521 }
6522
6523 static int process_chunk_item(struct cache_tree *chunk_cache,
6524                               struct btrfs_key *key, struct extent_buffer *eb,
6525                               int slot)
6526 {
6527         struct chunk_record *rec;
6528         struct btrfs_chunk *chunk;
6529         int ret = 0;
6530
6531         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6532         /*
6533          * Do extra check for this chunk item,
6534          *
6535          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6536          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6537          * and owner<->key_type check.
6538          */
6539         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6540                                       key->offset);
6541         if (ret < 0) {
6542                 error("chunk(%llu, %llu) is not valid, ignore it",
6543                       key->offset, btrfs_chunk_length(eb, chunk));
6544                 return 0;
6545         }
6546         rec = btrfs_new_chunk_record(eb, key, slot);
6547         ret = insert_cache_extent(chunk_cache, &rec->cache);
6548         if (ret) {
6549                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6550                         rec->offset, rec->length);
6551                 free(rec);
6552         }
6553
6554         return ret;
6555 }
6556
6557 static int process_device_item(struct rb_root *dev_cache,
6558                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6559 {
6560         struct btrfs_dev_item *ptr;
6561         struct device_record *rec;
6562         int ret = 0;
6563
6564         ptr = btrfs_item_ptr(eb,
6565                 slot, struct btrfs_dev_item);
6566
6567         rec = malloc(sizeof(*rec));
6568         if (!rec) {
6569                 fprintf(stderr, "memory allocation failed\n");
6570                 return -ENOMEM;
6571         }
6572
6573         rec->devid = key->offset;
6574         rec->generation = btrfs_header_generation(eb);
6575
6576         rec->objectid = key->objectid;
6577         rec->type = key->type;
6578         rec->offset = key->offset;
6579
6580         rec->devid = btrfs_device_id(eb, ptr);
6581         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6582         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6583
6584         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6585         if (ret) {
6586                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6587                 free(rec);
6588         }
6589
6590         return ret;
6591 }
6592
6593 struct block_group_record *
6594 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6595                              int slot)
6596 {
6597         struct btrfs_block_group_item *ptr;
6598         struct block_group_record *rec;
6599
6600         rec = calloc(1, sizeof(*rec));
6601         if (!rec) {
6602                 fprintf(stderr, "memory allocation failed\n");
6603                 exit(-1);
6604         }
6605
6606         rec->cache.start = key->objectid;
6607         rec->cache.size = key->offset;
6608
6609         rec->generation = btrfs_header_generation(leaf);
6610
6611         rec->objectid = key->objectid;
6612         rec->type = key->type;
6613         rec->offset = key->offset;
6614
6615         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6616         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6617
6618         INIT_LIST_HEAD(&rec->list);
6619
6620         return rec;
6621 }
6622
6623 static int process_block_group_item(struct block_group_tree *block_group_cache,
6624                                     struct btrfs_key *key,
6625                                     struct extent_buffer *eb, int slot)
6626 {
6627         struct block_group_record *rec;
6628         int ret = 0;
6629
6630         rec = btrfs_new_block_group_record(eb, key, slot);
6631         ret = insert_block_group_record(block_group_cache, rec);
6632         if (ret) {
6633                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6634                         rec->objectid, rec->offset);
6635                 free(rec);
6636         }
6637
6638         return ret;
6639 }
6640
6641 struct device_extent_record *
6642 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6643                                struct btrfs_key *key, int slot)
6644 {
6645         struct device_extent_record *rec;
6646         struct btrfs_dev_extent *ptr;
6647
6648         rec = calloc(1, sizeof(*rec));
6649         if (!rec) {
6650                 fprintf(stderr, "memory allocation failed\n");
6651                 exit(-1);
6652         }
6653
6654         rec->cache.objectid = key->objectid;
6655         rec->cache.start = key->offset;
6656
6657         rec->generation = btrfs_header_generation(leaf);
6658
6659         rec->objectid = key->objectid;
6660         rec->type = key->type;
6661         rec->offset = key->offset;
6662
6663         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6664         rec->chunk_objecteid =
6665                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6666         rec->chunk_offset =
6667                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6668         rec->length = btrfs_dev_extent_length(leaf, ptr);
6669         rec->cache.size = rec->length;
6670
6671         INIT_LIST_HEAD(&rec->chunk_list);
6672         INIT_LIST_HEAD(&rec->device_list);
6673
6674         return rec;
6675 }
6676
6677 static int
6678 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6679                            struct btrfs_key *key, struct extent_buffer *eb,
6680                            int slot)
6681 {
6682         struct device_extent_record *rec;
6683         int ret;
6684
6685         rec = btrfs_new_device_extent_record(eb, key, slot);
6686         ret = insert_device_extent_record(dev_extent_cache, rec);
6687         if (ret) {
6688                 fprintf(stderr,
6689                         "Device extent[%llu, %llu, %llu] existed.\n",
6690                         rec->objectid, rec->offset, rec->length);
6691                 free(rec);
6692         }
6693
6694         return ret;
6695 }
6696
6697 static int process_extent_item(struct btrfs_root *root,
6698                                struct cache_tree *extent_cache,
6699                                struct extent_buffer *eb, int slot)
6700 {
6701         struct btrfs_extent_item *ei;
6702         struct btrfs_extent_inline_ref *iref;
6703         struct btrfs_extent_data_ref *dref;
6704         struct btrfs_shared_data_ref *sref;
6705         struct btrfs_key key;
6706         struct extent_record tmpl;
6707         unsigned long end;
6708         unsigned long ptr;
6709         int ret;
6710         int type;
6711         u32 item_size = btrfs_item_size_nr(eb, slot);
6712         u64 refs = 0;
6713         u64 offset;
6714         u64 num_bytes;
6715         int metadata = 0;
6716
6717         btrfs_item_key_to_cpu(eb, &key, slot);
6718
6719         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6720                 metadata = 1;
6721                 num_bytes = root->nodesize;
6722         } else {
6723                 num_bytes = key.offset;
6724         }
6725
6726         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6727                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6728                       key.objectid, root->sectorsize);
6729                 return -EIO;
6730         }
6731         if (item_size < sizeof(*ei)) {
6732 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6733                 struct btrfs_extent_item_v0 *ei0;
6734                 BUG_ON(item_size != sizeof(*ei0));
6735                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6736                 refs = btrfs_extent_refs_v0(eb, ei0);
6737 #else
6738                 BUG();
6739 #endif
6740                 memset(&tmpl, 0, sizeof(tmpl));
6741                 tmpl.start = key.objectid;
6742                 tmpl.nr = num_bytes;
6743                 tmpl.extent_item_refs = refs;
6744                 tmpl.metadata = metadata;
6745                 tmpl.found_rec = 1;
6746                 tmpl.max_size = num_bytes;
6747
6748                 return add_extent_rec(extent_cache, &tmpl);
6749         }
6750
6751         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6752         refs = btrfs_extent_refs(eb, ei);
6753         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6754                 metadata = 1;
6755         else
6756                 metadata = 0;
6757         if (metadata && num_bytes != root->nodesize) {
6758                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6759                       num_bytes, root->nodesize);
6760                 return -EIO;
6761         }
6762         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6763                 error("ignore invalid data extent, length %llu is not aligned to %u",
6764                       num_bytes, root->sectorsize);
6765                 return -EIO;
6766         }
6767
6768         memset(&tmpl, 0, sizeof(tmpl));
6769         tmpl.start = key.objectid;
6770         tmpl.nr = num_bytes;
6771         tmpl.extent_item_refs = refs;
6772         tmpl.metadata = metadata;
6773         tmpl.found_rec = 1;
6774         tmpl.max_size = num_bytes;
6775         add_extent_rec(extent_cache, &tmpl);
6776
6777         ptr = (unsigned long)(ei + 1);
6778         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6779             key.type == BTRFS_EXTENT_ITEM_KEY)
6780                 ptr += sizeof(struct btrfs_tree_block_info);
6781
6782         end = (unsigned long)ei + item_size;
6783         while (ptr < end) {
6784                 iref = (struct btrfs_extent_inline_ref *)ptr;
6785                 type = btrfs_extent_inline_ref_type(eb, iref);
6786                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6787                 switch (type) {
6788                 case BTRFS_TREE_BLOCK_REF_KEY:
6789                         ret = add_tree_backref(extent_cache, key.objectid,
6790                                         0, offset, 0);
6791                         if (ret < 0)
6792                                 error("add_tree_backref failed: %s",
6793                                       strerror(-ret));
6794                         break;
6795                 case BTRFS_SHARED_BLOCK_REF_KEY:
6796                         ret = add_tree_backref(extent_cache, key.objectid,
6797                                         offset, 0, 0);
6798                         if (ret < 0)
6799                                 error("add_tree_backref failed: %s",
6800                                       strerror(-ret));
6801                         break;
6802                 case BTRFS_EXTENT_DATA_REF_KEY:
6803                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6804                         add_data_backref(extent_cache, key.objectid, 0,
6805                                         btrfs_extent_data_ref_root(eb, dref),
6806                                         btrfs_extent_data_ref_objectid(eb,
6807                                                                        dref),
6808                                         btrfs_extent_data_ref_offset(eb, dref),
6809                                         btrfs_extent_data_ref_count(eb, dref),
6810                                         0, num_bytes);
6811                         break;
6812                 case BTRFS_SHARED_DATA_REF_KEY:
6813                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6814                         add_data_backref(extent_cache, key.objectid, offset,
6815                                         0, 0, 0,
6816                                         btrfs_shared_data_ref_count(eb, sref),
6817                                         0, num_bytes);
6818                         break;
6819                 default:
6820                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6821                                 key.objectid, key.type, num_bytes);
6822                         goto out;
6823                 }
6824                 ptr += btrfs_extent_inline_ref_size(type);
6825         }
6826         WARN_ON(ptr > end);
6827 out:
6828         return 0;
6829 }
6830
6831 static int check_cache_range(struct btrfs_root *root,
6832                              struct btrfs_block_group_cache *cache,
6833                              u64 offset, u64 bytes)
6834 {
6835         struct btrfs_free_space *entry;
6836         u64 *logical;
6837         u64 bytenr;
6838         int stripe_len;
6839         int i, nr, ret;
6840
6841         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6842                 bytenr = btrfs_sb_offset(i);
6843                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6844                                        cache->key.objectid, bytenr, 0,
6845                                        &logical, &nr, &stripe_len);
6846                 if (ret)
6847                         return ret;
6848
6849                 while (nr--) {
6850                         if (logical[nr] + stripe_len <= offset)
6851                                 continue;
6852                         if (offset + bytes <= logical[nr])
6853                                 continue;
6854                         if (logical[nr] == offset) {
6855                                 if (stripe_len >= bytes) {
6856                                         free(logical);
6857                                         return 0;
6858                                 }
6859                                 bytes -= stripe_len;
6860                                 offset += stripe_len;
6861                         } else if (logical[nr] < offset) {
6862                                 if (logical[nr] + stripe_len >=
6863                                     offset + bytes) {
6864                                         free(logical);
6865                                         return 0;
6866                                 }
6867                                 bytes = (offset + bytes) -
6868                                         (logical[nr] + stripe_len);
6869                                 offset = logical[nr] + stripe_len;
6870                         } else {
6871                                 /*
6872                                  * Could be tricky, the super may land in the
6873                                  * middle of the area we're checking.  First
6874                                  * check the easiest case, it's at the end.
6875                                  */
6876                                 if (logical[nr] + stripe_len >=
6877                                     bytes + offset) {
6878                                         bytes = logical[nr] - offset;
6879                                         continue;
6880                                 }
6881
6882                                 /* Check the left side */
6883                                 ret = check_cache_range(root, cache,
6884                                                         offset,
6885                                                         logical[nr] - offset);
6886                                 if (ret) {
6887                                         free(logical);
6888                                         return ret;
6889                                 }
6890
6891                                 /* Now we continue with the right side */
6892                                 bytes = (offset + bytes) -
6893                                         (logical[nr] + stripe_len);
6894                                 offset = logical[nr] + stripe_len;
6895                         }
6896                 }
6897
6898                 free(logical);
6899         }
6900
6901         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6902         if (!entry) {
6903                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6904                         offset, offset+bytes);
6905                 return -EINVAL;
6906         }
6907
6908         if (entry->offset != offset) {
6909                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6910                         entry->offset);
6911                 return -EINVAL;
6912         }
6913
6914         if (entry->bytes != bytes) {
6915                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6916                         bytes, entry->bytes, offset);
6917                 return -EINVAL;
6918         }
6919
6920         unlink_free_space(cache->free_space_ctl, entry);
6921         free(entry);
6922         return 0;
6923 }
6924
6925 static int verify_space_cache(struct btrfs_root *root,
6926                               struct btrfs_block_group_cache *cache)
6927 {
6928         struct btrfs_path path;
6929         struct extent_buffer *leaf;
6930         struct btrfs_key key;
6931         u64 last;
6932         int ret = 0;
6933
6934         root = root->fs_info->extent_root;
6935
6936         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6937
6938         btrfs_init_path(&path);
6939         key.objectid = last;
6940         key.offset = 0;
6941         key.type = BTRFS_EXTENT_ITEM_KEY;
6942         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6943         if (ret < 0)
6944                 goto out;
6945         ret = 0;
6946         while (1) {
6947                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6948                         ret = btrfs_next_leaf(root, &path);
6949                         if (ret < 0)
6950                                 goto out;
6951                         if (ret > 0) {
6952                                 ret = 0;
6953                                 break;
6954                         }
6955                 }
6956                 leaf = path.nodes[0];
6957                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6958                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6959                         break;
6960                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6961                     key.type != BTRFS_METADATA_ITEM_KEY) {
6962                         path.slots[0]++;
6963                         continue;
6964                 }
6965
6966                 if (last == key.objectid) {
6967                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6968                                 last = key.objectid + key.offset;
6969                         else
6970                                 last = key.objectid + root->nodesize;
6971                         path.slots[0]++;
6972                         continue;
6973                 }
6974
6975                 ret = check_cache_range(root, cache, last,
6976                                         key.objectid - last);
6977                 if (ret)
6978                         break;
6979                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6980                         last = key.objectid + key.offset;
6981                 else
6982                         last = key.objectid + root->nodesize;
6983                 path.slots[0]++;
6984         }
6985
6986         if (last < cache->key.objectid + cache->key.offset)
6987                 ret = check_cache_range(root, cache, last,
6988                                         cache->key.objectid +
6989                                         cache->key.offset - last);
6990
6991 out:
6992         btrfs_release_path(&path);
6993
6994         if (!ret &&
6995             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6996                 fprintf(stderr, "There are still entries left in the space "
6997                         "cache\n");
6998                 ret = -EINVAL;
6999         }
7000
7001         return ret;
7002 }
7003
7004 static int check_space_cache(struct btrfs_root *root)
7005 {
7006         struct btrfs_block_group_cache *cache;
7007         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7008         int ret;
7009         int error = 0;
7010
7011         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7012             btrfs_super_generation(root->fs_info->super_copy) !=
7013             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7014                 printf("cache and super generation don't match, space cache "
7015                        "will be invalidated\n");
7016                 return 0;
7017         }
7018
7019         if (ctx.progress_enabled) {
7020                 ctx.tp = TASK_FREE_SPACE;
7021                 task_start(ctx.info);
7022         }
7023
7024         while (1) {
7025                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7026                 if (!cache)
7027                         break;
7028
7029                 start = cache->key.objectid + cache->key.offset;
7030                 if (!cache->free_space_ctl) {
7031                         if (btrfs_init_free_space_ctl(cache,
7032                                                       root->sectorsize)) {
7033                                 ret = -ENOMEM;
7034                                 break;
7035                         }
7036                 } else {
7037                         btrfs_remove_free_space_cache(cache);
7038                 }
7039
7040                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7041                         ret = exclude_super_stripes(root, cache);
7042                         if (ret) {
7043                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7044                                         strerror(-ret));
7045                                 error++;
7046                                 continue;
7047                         }
7048                         ret = load_free_space_tree(root->fs_info, cache);
7049                         free_excluded_extents(root, cache);
7050                         if (ret < 0) {
7051                                 fprintf(stderr, "could not load free space tree: %s\n",
7052                                         strerror(-ret));
7053                                 error++;
7054                                 continue;
7055                         }
7056                         error += ret;
7057                 } else {
7058                         ret = load_free_space_cache(root->fs_info, cache);
7059                         if (!ret)
7060                                 continue;
7061                 }
7062
7063                 ret = verify_space_cache(root, cache);
7064                 if (ret) {
7065                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7066                                 cache->key.objectid);
7067                         error++;
7068                 }
7069         }
7070
7071         task_stop(ctx.info);
7072
7073         return error ? -EINVAL : 0;
7074 }
7075
7076 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7077                         u64 num_bytes, unsigned long leaf_offset,
7078                         struct extent_buffer *eb) {
7079
7080         u64 offset = 0;
7081         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7082         char *data;
7083         unsigned long csum_offset;
7084         u32 csum;
7085         u32 csum_expected;
7086         u64 read_len;
7087         u64 data_checked = 0;
7088         u64 tmp;
7089         int ret = 0;
7090         int mirror;
7091         int num_copies;
7092
7093         if (num_bytes % root->sectorsize)
7094                 return -EINVAL;
7095
7096         data = malloc(num_bytes);
7097         if (!data)
7098                 return -ENOMEM;
7099
7100         while (offset < num_bytes) {
7101                 mirror = 0;
7102 again:
7103                 read_len = num_bytes - offset;
7104                 /* read as much space once a time */
7105                 ret = read_extent_data(root, data + offset,
7106                                 bytenr + offset, &read_len, mirror);
7107                 if (ret)
7108                         goto out;
7109                 data_checked = 0;
7110                 /* verify every 4k data's checksum */
7111                 while (data_checked < read_len) {
7112                         csum = ~(u32)0;
7113                         tmp = offset + data_checked;
7114
7115                         csum = btrfs_csum_data((char *)data + tmp,
7116                                                csum, root->sectorsize);
7117                         btrfs_csum_final(csum, (u8 *)&csum);
7118
7119                         csum_offset = leaf_offset +
7120                                  tmp / root->sectorsize * csum_size;
7121                         read_extent_buffer(eb, (char *)&csum_expected,
7122                                            csum_offset, csum_size);
7123                         /* try another mirror */
7124                         if (csum != csum_expected) {
7125                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7126                                                 mirror, bytenr + tmp,
7127                                                 csum, csum_expected);
7128                                 num_copies = btrfs_num_copies(
7129                                                 &root->fs_info->mapping_tree,
7130                                                 bytenr, num_bytes);
7131                                 if (mirror < num_copies - 1) {
7132                                         mirror += 1;
7133                                         goto again;
7134                                 }
7135                         }
7136                         data_checked += root->sectorsize;
7137                 }
7138                 offset += read_len;
7139         }
7140 out:
7141         free(data);
7142         return ret;
7143 }
7144
7145 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7146                                u64 num_bytes)
7147 {
7148         struct btrfs_path path;
7149         struct extent_buffer *leaf;
7150         struct btrfs_key key;
7151         int ret;
7152
7153         btrfs_init_path(&path);
7154         key.objectid = bytenr;
7155         key.type = BTRFS_EXTENT_ITEM_KEY;
7156         key.offset = (u64)-1;
7157
7158 again:
7159         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7160                                 0, 0);
7161         if (ret < 0) {
7162                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7163                 btrfs_release_path(&path);
7164                 return ret;
7165         } else if (ret) {
7166                 if (path.slots[0] > 0) {
7167                         path.slots[0]--;
7168                 } else {
7169                         ret = btrfs_prev_leaf(root, &path);
7170                         if (ret < 0) {
7171                                 goto out;
7172                         } else if (ret > 0) {
7173                                 ret = 0;
7174                                 goto out;
7175                         }
7176                 }
7177         }
7178
7179         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7180
7181         /*
7182          * Block group items come before extent items if they have the same
7183          * bytenr, so walk back one more just in case.  Dear future traveller,
7184          * first congrats on mastering time travel.  Now if it's not too much
7185          * trouble could you go back to 2006 and tell Chris to make the
7186          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7187          * EXTENT_ITEM_KEY please?
7188          */
7189         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7190                 if (path.slots[0] > 0) {
7191                         path.slots[0]--;
7192                 } else {
7193                         ret = btrfs_prev_leaf(root, &path);
7194                         if (ret < 0) {
7195                                 goto out;
7196                         } else if (ret > 0) {
7197                                 ret = 0;
7198                                 goto out;
7199                         }
7200                 }
7201                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7202         }
7203
7204         while (num_bytes) {
7205                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7206                         ret = btrfs_next_leaf(root, &path);
7207                         if (ret < 0) {
7208                                 fprintf(stderr, "Error going to next leaf "
7209                                         "%d\n", ret);
7210                                 btrfs_release_path(&path);
7211                                 return ret;
7212                         } else if (ret) {
7213                                 break;
7214                         }
7215                 }
7216                 leaf = path.nodes[0];
7217                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7218                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7219                         path.slots[0]++;
7220                         continue;
7221                 }
7222                 if (key.objectid + key.offset < bytenr) {
7223                         path.slots[0]++;
7224                         continue;
7225                 }
7226                 if (key.objectid > bytenr + num_bytes)
7227                         break;
7228
7229                 if (key.objectid == bytenr) {
7230                         if (key.offset >= num_bytes) {
7231                                 num_bytes = 0;
7232                                 break;
7233                         }
7234                         num_bytes -= key.offset;
7235                         bytenr += key.offset;
7236                 } else if (key.objectid < bytenr) {
7237                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7238                                 num_bytes = 0;
7239                                 break;
7240                         }
7241                         num_bytes = (bytenr + num_bytes) -
7242                                 (key.objectid + key.offset);
7243                         bytenr = key.objectid + key.offset;
7244                 } else {
7245                         if (key.objectid + key.offset < bytenr + num_bytes) {
7246                                 u64 new_start = key.objectid + key.offset;
7247                                 u64 new_bytes = bytenr + num_bytes - new_start;
7248
7249                                 /*
7250                                  * Weird case, the extent is in the middle of
7251                                  * our range, we'll have to search one side
7252                                  * and then the other.  Not sure if this happens
7253                                  * in real life, but no harm in coding it up
7254                                  * anyway just in case.
7255                                  */
7256                                 btrfs_release_path(&path);
7257                                 ret = check_extent_exists(root, new_start,
7258                                                           new_bytes);
7259                                 if (ret) {
7260                                         fprintf(stderr, "Right section didn't "
7261                                                 "have a record\n");
7262                                         break;
7263                                 }
7264                                 num_bytes = key.objectid - bytenr;
7265                                 goto again;
7266                         }
7267                         num_bytes = key.objectid - bytenr;
7268                 }
7269                 path.slots[0]++;
7270         }
7271         ret = 0;
7272
7273 out:
7274         if (num_bytes && !ret) {
7275                 fprintf(stderr, "There are no extents for csum range "
7276                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7277                 ret = 1;
7278         }
7279
7280         btrfs_release_path(&path);
7281         return ret;
7282 }
7283
7284 static int check_csums(struct btrfs_root *root)
7285 {
7286         struct btrfs_path path;
7287         struct extent_buffer *leaf;
7288         struct btrfs_key key;
7289         u64 offset = 0, num_bytes = 0;
7290         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7291         int errors = 0;
7292         int ret;
7293         u64 data_len;
7294         unsigned long leaf_offset;
7295
7296         root = root->fs_info->csum_root;
7297         if (!extent_buffer_uptodate(root->node)) {
7298                 fprintf(stderr, "No valid csum tree found\n");
7299                 return -ENOENT;
7300         }
7301
7302         btrfs_init_path(&path);
7303         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7304         key.type = BTRFS_EXTENT_CSUM_KEY;
7305         key.offset = 0;
7306         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7307         if (ret < 0) {
7308                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7309                 btrfs_release_path(&path);
7310                 return ret;
7311         }
7312
7313         if (ret > 0 && path.slots[0])
7314                 path.slots[0]--;
7315         ret = 0;
7316
7317         while (1) {
7318                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7319                         ret = btrfs_next_leaf(root, &path);
7320                         if (ret < 0) {
7321                                 fprintf(stderr, "Error going to next leaf "
7322                                         "%d\n", ret);
7323                                 break;
7324                         }
7325                         if (ret)
7326                                 break;
7327                 }
7328                 leaf = path.nodes[0];
7329
7330                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7331                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7332                         path.slots[0]++;
7333                         continue;
7334                 }
7335
7336                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7337                               csum_size) * root->sectorsize;
7338                 if (!check_data_csum)
7339                         goto skip_csum_check;
7340                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7341                 ret = check_extent_csums(root, key.offset, data_len,
7342                                          leaf_offset, leaf);
7343                 if (ret)
7344                         break;
7345 skip_csum_check:
7346                 if (!num_bytes) {
7347                         offset = key.offset;
7348                 } else if (key.offset != offset + num_bytes) {
7349                         ret = check_extent_exists(root, offset, num_bytes);
7350                         if (ret) {
7351                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7352                                         "there is no extent record\n",
7353                                         offset, offset+num_bytes);
7354                                 errors++;
7355                         }
7356                         offset = key.offset;
7357                         num_bytes = 0;
7358                 }
7359                 num_bytes += data_len;
7360                 path.slots[0]++;
7361         }
7362
7363         btrfs_release_path(&path);
7364         return errors;
7365 }
7366
7367 static int is_dropped_key(struct btrfs_key *key,
7368                           struct btrfs_key *drop_key) {
7369         if (key->objectid < drop_key->objectid)
7370                 return 1;
7371         else if (key->objectid == drop_key->objectid) {
7372                 if (key->type < drop_key->type)
7373                         return 1;
7374                 else if (key->type == drop_key->type) {
7375                         if (key->offset < drop_key->offset)
7376                                 return 1;
7377                 }
7378         }
7379         return 0;
7380 }
7381
7382 /*
7383  * Here are the rules for FULL_BACKREF.
7384  *
7385  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7386  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7387  *      FULL_BACKREF set.
7388  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7389  *    if it happened after the relocation occurred since we'll have dropped the
7390  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7391  *    have no real way to know for sure.
7392  *
7393  * We process the blocks one root at a time, and we start from the lowest root
7394  * objectid and go to the highest.  So we can just lookup the owner backref for
7395  * the record and if we don't find it then we know it doesn't exist and we have
7396  * a FULL BACKREF.
7397  *
7398  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7399  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7400  * be set or not and then we can check later once we've gathered all the refs.
7401  */
7402 static int calc_extent_flag(struct btrfs_root *root,
7403                            struct cache_tree *extent_cache,
7404                            struct extent_buffer *buf,
7405                            struct root_item_record *ri,
7406                            u64 *flags)
7407 {
7408         struct extent_record *rec;
7409         struct cache_extent *cache;
7410         struct tree_backref *tback;
7411         u64 owner = 0;
7412
7413         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7414         /* we have added this extent before */
7415         if (!cache)
7416                 return -ENOENT;
7417
7418         rec = container_of(cache, struct extent_record, cache);
7419
7420         /*
7421          * Except file/reloc tree, we can not have
7422          * FULL BACKREF MODE
7423          */
7424         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7425                 goto normal;
7426         /*
7427          * root node
7428          */
7429         if (buf->start == ri->bytenr)
7430                 goto normal;
7431
7432         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7433                 goto full_backref;
7434
7435         owner = btrfs_header_owner(buf);
7436         if (owner == ri->objectid)
7437                 goto normal;
7438
7439         tback = find_tree_backref(rec, 0, owner);
7440         if (!tback)
7441                 goto full_backref;
7442 normal:
7443         *flags = 0;
7444         if (rec->flag_block_full_backref != FLAG_UNSET &&
7445             rec->flag_block_full_backref != 0)
7446                 rec->bad_full_backref = 1;
7447         return 0;
7448 full_backref:
7449         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7450         if (rec->flag_block_full_backref != FLAG_UNSET &&
7451             rec->flag_block_full_backref != 1)
7452                 rec->bad_full_backref = 1;
7453         return 0;
7454 }
7455
7456 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7457 {
7458         fprintf(stderr, "Invalid key type(");
7459         print_key_type(stderr, 0, key_type);
7460         fprintf(stderr, ") found in root(");
7461         print_objectid(stderr, rootid, 0);
7462         fprintf(stderr, ")\n");
7463 }
7464
7465 /*
7466  * Check if the key is valid with its extent buffer.
7467  *
7468  * This is a early check in case invalid key exists in a extent buffer
7469  * This is not comprehensive yet, but should prevent wrong key/item passed
7470  * further
7471  */
7472 static int check_type_with_root(u64 rootid, u8 key_type)
7473 {
7474         switch (key_type) {
7475         /* Only valid in chunk tree */
7476         case BTRFS_DEV_ITEM_KEY:
7477         case BTRFS_CHUNK_ITEM_KEY:
7478                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7479                         goto err;
7480                 break;
7481         /* valid in csum and log tree */
7482         case BTRFS_CSUM_TREE_OBJECTID:
7483                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7484                       is_fstree(rootid)))
7485                         goto err;
7486                 break;
7487         case BTRFS_EXTENT_ITEM_KEY:
7488         case BTRFS_METADATA_ITEM_KEY:
7489         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7490                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7491                         goto err;
7492                 break;
7493         case BTRFS_ROOT_ITEM_KEY:
7494                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7495                         goto err;
7496                 break;
7497         case BTRFS_DEV_EXTENT_KEY:
7498                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7499                         goto err;
7500                 break;
7501         }
7502         return 0;
7503 err:
7504         report_mismatch_key_root(key_type, rootid);
7505         return -EINVAL;
7506 }
7507
7508 static int run_next_block(struct btrfs_root *root,
7509                           struct block_info *bits,
7510                           int bits_nr,
7511                           u64 *last,
7512                           struct cache_tree *pending,
7513                           struct cache_tree *seen,
7514                           struct cache_tree *reada,
7515                           struct cache_tree *nodes,
7516                           struct cache_tree *extent_cache,
7517                           struct cache_tree *chunk_cache,
7518                           struct rb_root *dev_cache,
7519                           struct block_group_tree *block_group_cache,
7520                           struct device_extent_tree *dev_extent_cache,
7521                           struct root_item_record *ri)
7522 {
7523         struct extent_buffer *buf;
7524         struct extent_record *rec = NULL;
7525         u64 bytenr;
7526         u32 size;
7527         u64 parent;
7528         u64 owner;
7529         u64 flags;
7530         u64 ptr;
7531         u64 gen = 0;
7532         int ret = 0;
7533         int i;
7534         int nritems;
7535         struct btrfs_key key;
7536         struct cache_extent *cache;
7537         int reada_bits;
7538
7539         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7540                                     bits_nr, &reada_bits);
7541         if (nritems == 0)
7542                 return 1;
7543
7544         if (!reada_bits) {
7545                 for(i = 0; i < nritems; i++) {
7546                         ret = add_cache_extent(reada, bits[i].start,
7547                                                bits[i].size);
7548                         if (ret == -EEXIST)
7549                                 continue;
7550
7551                         /* fixme, get the parent transid */
7552                         readahead_tree_block(root, bits[i].start,
7553                                              bits[i].size, 0);
7554                 }
7555         }
7556         *last = bits[0].start;
7557         bytenr = bits[0].start;
7558         size = bits[0].size;
7559
7560         cache = lookup_cache_extent(pending, bytenr, size);
7561         if (cache) {
7562                 remove_cache_extent(pending, cache);
7563                 free(cache);
7564         }
7565         cache = lookup_cache_extent(reada, bytenr, size);
7566         if (cache) {
7567                 remove_cache_extent(reada, cache);
7568                 free(cache);
7569         }
7570         cache = lookup_cache_extent(nodes, bytenr, size);
7571         if (cache) {
7572                 remove_cache_extent(nodes, cache);
7573                 free(cache);
7574         }
7575         cache = lookup_cache_extent(extent_cache, bytenr, size);
7576         if (cache) {
7577                 rec = container_of(cache, struct extent_record, cache);
7578                 gen = rec->parent_generation;
7579         }
7580
7581         /* fixme, get the real parent transid */
7582         buf = read_tree_block(root, bytenr, size, gen);
7583         if (!extent_buffer_uptodate(buf)) {
7584                 record_bad_block_io(root->fs_info,
7585                                     extent_cache, bytenr, size);
7586                 goto out;
7587         }
7588
7589         nritems = btrfs_header_nritems(buf);
7590
7591         flags = 0;
7592         if (!init_extent_tree) {
7593                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7594                                        btrfs_header_level(buf), 1, NULL,
7595                                        &flags);
7596                 if (ret < 0) {
7597                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7598                         if (ret < 0) {
7599                                 fprintf(stderr, "Couldn't calc extent flags\n");
7600                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7601                         }
7602                 }
7603         } else {
7604                 flags = 0;
7605                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7606                 if (ret < 0) {
7607                         fprintf(stderr, "Couldn't calc extent flags\n");
7608                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7609                 }
7610         }
7611
7612         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7613                 if (ri != NULL &&
7614                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7615                     ri->objectid == btrfs_header_owner(buf)) {
7616                         /*
7617                          * Ok we got to this block from it's original owner and
7618                          * we have FULL_BACKREF set.  Relocation can leave
7619                          * converted blocks over so this is altogether possible,
7620                          * however it's not possible if the generation > the
7621                          * last snapshot, so check for this case.
7622                          */
7623                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7624                             btrfs_header_generation(buf) > ri->last_snapshot) {
7625                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7626                                 rec->bad_full_backref = 1;
7627                         }
7628                 }
7629         } else {
7630                 if (ri != NULL &&
7631                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7632                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7633                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7634                         rec->bad_full_backref = 1;
7635                 }
7636         }
7637
7638         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7639                 rec->flag_block_full_backref = 1;
7640                 parent = bytenr;
7641                 owner = 0;
7642         } else {
7643                 rec->flag_block_full_backref = 0;
7644                 parent = 0;
7645                 owner = btrfs_header_owner(buf);
7646         }
7647
7648         ret = check_block(root, extent_cache, buf, flags);
7649         if (ret)
7650                 goto out;
7651
7652         if (btrfs_is_leaf(buf)) {
7653                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7654                 for (i = 0; i < nritems; i++) {
7655                         struct btrfs_file_extent_item *fi;
7656                         btrfs_item_key_to_cpu(buf, &key, i);
7657                         /*
7658                          * Check key type against the leaf owner.
7659                          * Could filter quite a lot of early error if
7660                          * owner is correct
7661                          */
7662                         if (check_type_with_root(btrfs_header_owner(buf),
7663                                                  key.type)) {
7664                                 fprintf(stderr, "ignoring invalid key\n");
7665                                 continue;
7666                         }
7667                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7668                                 process_extent_item(root, extent_cache, buf,
7669                                                     i);
7670                                 continue;
7671                         }
7672                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7673                                 process_extent_item(root, extent_cache, buf,
7674                                                     i);
7675                                 continue;
7676                         }
7677                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7678                                 total_csum_bytes +=
7679                                         btrfs_item_size_nr(buf, i);
7680                                 continue;
7681                         }
7682                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7683                                 process_chunk_item(chunk_cache, &key, buf, i);
7684                                 continue;
7685                         }
7686                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7687                                 process_device_item(dev_cache, &key, buf, i);
7688                                 continue;
7689                         }
7690                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7691                                 process_block_group_item(block_group_cache,
7692                                         &key, buf, i);
7693                                 continue;
7694                         }
7695                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7696                                 process_device_extent_item(dev_extent_cache,
7697                                         &key, buf, i);
7698                                 continue;
7699
7700                         }
7701                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7702 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7703                                 process_extent_ref_v0(extent_cache, buf, i);
7704 #else
7705                                 BUG();
7706 #endif
7707                                 continue;
7708                         }
7709
7710                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7711                                 ret = add_tree_backref(extent_cache,
7712                                                 key.objectid, 0, key.offset, 0);
7713                                 if (ret < 0)
7714                                         error("add_tree_backref failed: %s",
7715                                               strerror(-ret));
7716                                 continue;
7717                         }
7718                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7719                                 ret = add_tree_backref(extent_cache,
7720                                                 key.objectid, key.offset, 0, 0);
7721                                 if (ret < 0)
7722                                         error("add_tree_backref failed: %s",
7723                                               strerror(-ret));
7724                                 continue;
7725                         }
7726                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7727                                 struct btrfs_extent_data_ref *ref;
7728                                 ref = btrfs_item_ptr(buf, i,
7729                                                 struct btrfs_extent_data_ref);
7730                                 add_data_backref(extent_cache,
7731                                         key.objectid, 0,
7732                                         btrfs_extent_data_ref_root(buf, ref),
7733                                         btrfs_extent_data_ref_objectid(buf,
7734                                                                        ref),
7735                                         btrfs_extent_data_ref_offset(buf, ref),
7736                                         btrfs_extent_data_ref_count(buf, ref),
7737                                         0, root->sectorsize);
7738                                 continue;
7739                         }
7740                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7741                                 struct btrfs_shared_data_ref *ref;
7742                                 ref = btrfs_item_ptr(buf, i,
7743                                                 struct btrfs_shared_data_ref);
7744                                 add_data_backref(extent_cache,
7745                                         key.objectid, key.offset, 0, 0, 0,
7746                                         btrfs_shared_data_ref_count(buf, ref),
7747                                         0, root->sectorsize);
7748                                 continue;
7749                         }
7750                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7751                                 struct bad_item *bad;
7752
7753                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7754                                         continue;
7755                                 if (!owner)
7756                                         continue;
7757                                 bad = malloc(sizeof(struct bad_item));
7758                                 if (!bad)
7759                                         continue;
7760                                 INIT_LIST_HEAD(&bad->list);
7761                                 memcpy(&bad->key, &key,
7762                                        sizeof(struct btrfs_key));
7763                                 bad->root_id = owner;
7764                                 list_add_tail(&bad->list, &delete_items);
7765                                 continue;
7766                         }
7767                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7768                                 continue;
7769                         fi = btrfs_item_ptr(buf, i,
7770                                             struct btrfs_file_extent_item);
7771                         if (btrfs_file_extent_type(buf, fi) ==
7772                             BTRFS_FILE_EXTENT_INLINE)
7773                                 continue;
7774                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7775                                 continue;
7776
7777                         data_bytes_allocated +=
7778                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7779                         if (data_bytes_allocated < root->sectorsize) {
7780                                 abort();
7781                         }
7782                         data_bytes_referenced +=
7783                                 btrfs_file_extent_num_bytes(buf, fi);
7784                         add_data_backref(extent_cache,
7785                                 btrfs_file_extent_disk_bytenr(buf, fi),
7786                                 parent, owner, key.objectid, key.offset -
7787                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7788                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7789                 }
7790         } else {
7791                 int level;
7792                 struct btrfs_key first_key;
7793
7794                 first_key.objectid = 0;
7795
7796                 if (nritems > 0)
7797                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7798                 level = btrfs_header_level(buf);
7799                 for (i = 0; i < nritems; i++) {
7800                         struct extent_record tmpl;
7801
7802                         ptr = btrfs_node_blockptr(buf, i);
7803                         size = root->nodesize;
7804                         btrfs_node_key_to_cpu(buf, &key, i);
7805                         if (ri != NULL) {
7806                                 if ((level == ri->drop_level)
7807                                     && is_dropped_key(&key, &ri->drop_key)) {
7808                                         continue;
7809                                 }
7810                         }
7811
7812                         memset(&tmpl, 0, sizeof(tmpl));
7813                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7814                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7815                         tmpl.start = ptr;
7816                         tmpl.nr = size;
7817                         tmpl.refs = 1;
7818                         tmpl.metadata = 1;
7819                         tmpl.max_size = size;
7820                         ret = add_extent_rec(extent_cache, &tmpl);
7821                         if (ret < 0)
7822                                 goto out;
7823
7824                         ret = add_tree_backref(extent_cache, ptr, parent,
7825                                         owner, 1);
7826                         if (ret < 0) {
7827                                 error("add_tree_backref failed: %s",
7828                                       strerror(-ret));
7829                                 continue;
7830                         }
7831
7832                         if (level > 1) {
7833                                 add_pending(nodes, seen, ptr, size);
7834                         } else {
7835                                 add_pending(pending, seen, ptr, size);
7836                         }
7837                 }
7838                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7839                                       nritems) * sizeof(struct btrfs_key_ptr);
7840         }
7841         total_btree_bytes += buf->len;
7842         if (fs_root_objectid(btrfs_header_owner(buf)))
7843                 total_fs_tree_bytes += buf->len;
7844         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7845                 total_extent_tree_bytes += buf->len;
7846         if (!found_old_backref &&
7847             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7848             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7849             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7850                 found_old_backref = 1;
7851 out:
7852         free_extent_buffer(buf);
7853         return ret;
7854 }
7855
7856 static int add_root_to_pending(struct extent_buffer *buf,
7857                                struct cache_tree *extent_cache,
7858                                struct cache_tree *pending,
7859                                struct cache_tree *seen,
7860                                struct cache_tree *nodes,
7861                                u64 objectid)
7862 {
7863         struct extent_record tmpl;
7864         int ret;
7865
7866         if (btrfs_header_level(buf) > 0)
7867                 add_pending(nodes, seen, buf->start, buf->len);
7868         else
7869                 add_pending(pending, seen, buf->start, buf->len);
7870
7871         memset(&tmpl, 0, sizeof(tmpl));
7872         tmpl.start = buf->start;
7873         tmpl.nr = buf->len;
7874         tmpl.is_root = 1;
7875         tmpl.refs = 1;
7876         tmpl.metadata = 1;
7877         tmpl.max_size = buf->len;
7878         add_extent_rec(extent_cache, &tmpl);
7879
7880         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7881             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7882                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7883                                 0, 1);
7884         else
7885                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7886                                 1);
7887         return ret;
7888 }
7889
7890 /* as we fix the tree, we might be deleting blocks that
7891  * we're tracking for repair.  This hook makes sure we
7892  * remove any backrefs for blocks as we are fixing them.
7893  */
7894 static int free_extent_hook(struct btrfs_trans_handle *trans,
7895                             struct btrfs_root *root,
7896                             u64 bytenr, u64 num_bytes, u64 parent,
7897                             u64 root_objectid, u64 owner, u64 offset,
7898                             int refs_to_drop)
7899 {
7900         struct extent_record *rec;
7901         struct cache_extent *cache;
7902         int is_data;
7903         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7904
7905         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7906         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7907         if (!cache)
7908                 return 0;
7909
7910         rec = container_of(cache, struct extent_record, cache);
7911         if (is_data) {
7912                 struct data_backref *back;
7913                 back = find_data_backref(rec, parent, root_objectid, owner,
7914                                          offset, 1, bytenr, num_bytes);
7915                 if (!back)
7916                         goto out;
7917                 if (back->node.found_ref) {
7918                         back->found_ref -= refs_to_drop;
7919                         if (rec->refs)
7920                                 rec->refs -= refs_to_drop;
7921                 }
7922                 if (back->node.found_extent_tree) {
7923                         back->num_refs -= refs_to_drop;
7924                         if (rec->extent_item_refs)
7925                                 rec->extent_item_refs -= refs_to_drop;
7926                 }
7927                 if (back->found_ref == 0)
7928                         back->node.found_ref = 0;
7929                 if (back->num_refs == 0)
7930                         back->node.found_extent_tree = 0;
7931
7932                 if (!back->node.found_extent_tree && back->node.found_ref) {
7933                         list_del(&back->node.list);
7934                         free(back);
7935                 }
7936         } else {
7937                 struct tree_backref *back;
7938                 back = find_tree_backref(rec, parent, root_objectid);
7939                 if (!back)
7940                         goto out;
7941                 if (back->node.found_ref) {
7942                         if (rec->refs)
7943                                 rec->refs--;
7944                         back->node.found_ref = 0;
7945                 }
7946                 if (back->node.found_extent_tree) {
7947                         if (rec->extent_item_refs)
7948                                 rec->extent_item_refs--;
7949                         back->node.found_extent_tree = 0;
7950                 }
7951                 if (!back->node.found_extent_tree && back->node.found_ref) {
7952                         list_del(&back->node.list);
7953                         free(back);
7954                 }
7955         }
7956         maybe_free_extent_rec(extent_cache, rec);
7957 out:
7958         return 0;
7959 }
7960
7961 static int delete_extent_records(struct btrfs_trans_handle *trans,
7962                                  struct btrfs_root *root,
7963                                  struct btrfs_path *path,
7964                                  u64 bytenr)
7965 {
7966         struct btrfs_key key;
7967         struct btrfs_key found_key;
7968         struct extent_buffer *leaf;
7969         int ret;
7970         int slot;
7971
7972
7973         key.objectid = bytenr;
7974         key.type = (u8)-1;
7975         key.offset = (u64)-1;
7976
7977         while(1) {
7978                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7979                                         &key, path, 0, 1);
7980                 if (ret < 0)
7981                         break;
7982
7983                 if (ret > 0) {
7984                         ret = 0;
7985                         if (path->slots[0] == 0)
7986                                 break;
7987                         path->slots[0]--;
7988                 }
7989                 ret = 0;
7990
7991                 leaf = path->nodes[0];
7992                 slot = path->slots[0];
7993
7994                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7995                 if (found_key.objectid != bytenr)
7996                         break;
7997
7998                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7999                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8000                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8001                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8002                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8003                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8004                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8005                         btrfs_release_path(path);
8006                         if (found_key.type == 0) {
8007                                 if (found_key.offset == 0)
8008                                         break;
8009                                 key.offset = found_key.offset - 1;
8010                                 key.type = found_key.type;
8011                         }
8012                         key.type = found_key.type - 1;
8013                         key.offset = (u64)-1;
8014                         continue;
8015                 }
8016
8017                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8018                         found_key.objectid, found_key.type, found_key.offset);
8019
8020                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8021                 if (ret)
8022                         break;
8023                 btrfs_release_path(path);
8024
8025                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8026                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8027                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8028                                 found_key.offset : root->nodesize;
8029
8030                         ret = btrfs_update_block_group(trans, root, bytenr,
8031                                                        bytes, 0, 0);
8032                         if (ret)
8033                                 break;
8034                 }
8035         }
8036
8037         btrfs_release_path(path);
8038         return ret;
8039 }
8040
8041 /*
8042  * for a single backref, this will allocate a new extent
8043  * and add the backref to it.
8044  */
8045 static int record_extent(struct btrfs_trans_handle *trans,
8046                          struct btrfs_fs_info *info,
8047                          struct btrfs_path *path,
8048                          struct extent_record *rec,
8049                          struct extent_backref *back,
8050                          int allocated, u64 flags)
8051 {
8052         int ret = 0;
8053         struct btrfs_root *extent_root = info->extent_root;
8054         struct extent_buffer *leaf;
8055         struct btrfs_key ins_key;
8056         struct btrfs_extent_item *ei;
8057         struct data_backref *dback;
8058         struct btrfs_tree_block_info *bi;
8059
8060         if (!back->is_data)
8061                 rec->max_size = max_t(u64, rec->max_size,
8062                                     info->extent_root->nodesize);
8063
8064         if (!allocated) {
8065                 u32 item_size = sizeof(*ei);
8066
8067                 if (!back->is_data)
8068                         item_size += sizeof(*bi);
8069
8070                 ins_key.objectid = rec->start;
8071                 ins_key.offset = rec->max_size;
8072                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8073
8074                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8075                                         &ins_key, item_size);
8076                 if (ret)
8077                         goto fail;
8078
8079                 leaf = path->nodes[0];
8080                 ei = btrfs_item_ptr(leaf, path->slots[0],
8081                                     struct btrfs_extent_item);
8082
8083                 btrfs_set_extent_refs(leaf, ei, 0);
8084                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8085
8086                 if (back->is_data) {
8087                         btrfs_set_extent_flags(leaf, ei,
8088                                                BTRFS_EXTENT_FLAG_DATA);
8089                 } else {
8090                         struct btrfs_disk_key copy_key;;
8091
8092                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8093                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8094                                              sizeof(*bi));
8095
8096                         btrfs_set_disk_key_objectid(&copy_key,
8097                                                     rec->info_objectid);
8098                         btrfs_set_disk_key_type(&copy_key, 0);
8099                         btrfs_set_disk_key_offset(&copy_key, 0);
8100
8101                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8102                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8103
8104                         btrfs_set_extent_flags(leaf, ei,
8105                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8106                 }
8107
8108                 btrfs_mark_buffer_dirty(leaf);
8109                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8110                                                rec->max_size, 1, 0);
8111                 if (ret)
8112                         goto fail;
8113                 btrfs_release_path(path);
8114         }
8115
8116         if (back->is_data) {
8117                 u64 parent;
8118                 int i;
8119
8120                 dback = to_data_backref(back);
8121                 if (back->full_backref)
8122                         parent = dback->parent;
8123                 else
8124                         parent = 0;
8125
8126                 for (i = 0; i < dback->found_ref; i++) {
8127                         /* if parent != 0, we're doing a full backref
8128                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8129                          * just makes the backref allocator create a data
8130                          * backref
8131                          */
8132                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8133                                                    rec->start, rec->max_size,
8134                                                    parent,
8135                                                    dback->root,
8136                                                    parent ?
8137                                                    BTRFS_FIRST_FREE_OBJECTID :
8138                                                    dback->owner,
8139                                                    dback->offset);
8140                         if (ret)
8141                                 break;
8142                 }
8143                 fprintf(stderr, "adding new data backref"
8144                                 " on %llu %s %llu owner %llu"
8145                                 " offset %llu found %d\n",
8146                                 (unsigned long long)rec->start,
8147                                 back->full_backref ?
8148                                 "parent" : "root",
8149                                 back->full_backref ?
8150                                 (unsigned long long)parent :
8151                                 (unsigned long long)dback->root,
8152                                 (unsigned long long)dback->owner,
8153                                 (unsigned long long)dback->offset,
8154                                 dback->found_ref);
8155         } else {
8156                 u64 parent;
8157                 struct tree_backref *tback;
8158
8159                 tback = to_tree_backref(back);
8160                 if (back->full_backref)
8161                         parent = tback->parent;
8162                 else
8163                         parent = 0;
8164
8165                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8166                                            rec->start, rec->max_size,
8167                                            parent, tback->root, 0, 0);
8168                 fprintf(stderr, "adding new tree backref on "
8169                         "start %llu len %llu parent %llu root %llu\n",
8170                         rec->start, rec->max_size, parent, tback->root);
8171         }
8172 fail:
8173         btrfs_release_path(path);
8174         return ret;
8175 }
8176
8177 static struct extent_entry *find_entry(struct list_head *entries,
8178                                        u64 bytenr, u64 bytes)
8179 {
8180         struct extent_entry *entry = NULL;
8181
8182         list_for_each_entry(entry, entries, list) {
8183                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8184                         return entry;
8185         }
8186
8187         return NULL;
8188 }
8189
8190 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8191 {
8192         struct extent_entry *entry, *best = NULL, *prev = NULL;
8193
8194         list_for_each_entry(entry, entries, list) {
8195                 /*
8196                  * If there are as many broken entries as entries then we know
8197                  * not to trust this particular entry.
8198                  */
8199                 if (entry->broken == entry->count)
8200                         continue;
8201
8202                 /*
8203                  * Special case, when there are only two entries and 'best' is
8204                  * the first one
8205                  */
8206                 if (!prev) {
8207                         best = entry;
8208                         prev = entry;
8209                         continue;
8210                 }
8211
8212                 /*
8213                  * If our current entry == best then we can't be sure our best
8214                  * is really the best, so we need to keep searching.
8215                  */
8216                 if (best && best->count == entry->count) {
8217                         prev = entry;
8218                         best = NULL;
8219                         continue;
8220                 }
8221
8222                 /* Prev == entry, not good enough, have to keep searching */
8223                 if (!prev->broken && prev->count == entry->count)
8224                         continue;
8225
8226                 if (!best)
8227                         best = (prev->count > entry->count) ? prev : entry;
8228                 else if (best->count < entry->count)
8229                         best = entry;
8230                 prev = entry;
8231         }
8232
8233         return best;
8234 }
8235
8236 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8237                       struct data_backref *dback, struct extent_entry *entry)
8238 {
8239         struct btrfs_trans_handle *trans;
8240         struct btrfs_root *root;
8241         struct btrfs_file_extent_item *fi;
8242         struct extent_buffer *leaf;
8243         struct btrfs_key key;
8244         u64 bytenr, bytes;
8245         int ret, err;
8246
8247         key.objectid = dback->root;
8248         key.type = BTRFS_ROOT_ITEM_KEY;
8249         key.offset = (u64)-1;
8250         root = btrfs_read_fs_root(info, &key);
8251         if (IS_ERR(root)) {
8252                 fprintf(stderr, "Couldn't find root for our ref\n");
8253                 return -EINVAL;
8254         }
8255
8256         /*
8257          * The backref points to the original offset of the extent if it was
8258          * split, so we need to search down to the offset we have and then walk
8259          * forward until we find the backref we're looking for.
8260          */
8261         key.objectid = dback->owner;
8262         key.type = BTRFS_EXTENT_DATA_KEY;
8263         key.offset = dback->offset;
8264         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8265         if (ret < 0) {
8266                 fprintf(stderr, "Error looking up ref %d\n", ret);
8267                 return ret;
8268         }
8269
8270         while (1) {
8271                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8272                         ret = btrfs_next_leaf(root, path);
8273                         if (ret) {
8274                                 fprintf(stderr, "Couldn't find our ref, next\n");
8275                                 return -EINVAL;
8276                         }
8277                 }
8278                 leaf = path->nodes[0];
8279                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8280                 if (key.objectid != dback->owner ||
8281                     key.type != BTRFS_EXTENT_DATA_KEY) {
8282                         fprintf(stderr, "Couldn't find our ref, search\n");
8283                         return -EINVAL;
8284                 }
8285                 fi = btrfs_item_ptr(leaf, path->slots[0],
8286                                     struct btrfs_file_extent_item);
8287                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8288                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8289
8290                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8291                         break;
8292                 path->slots[0]++;
8293         }
8294
8295         btrfs_release_path(path);
8296
8297         trans = btrfs_start_transaction(root, 1);
8298         if (IS_ERR(trans))
8299                 return PTR_ERR(trans);
8300
8301         /*
8302          * Ok we have the key of the file extent we want to fix, now we can cow
8303          * down to the thing and fix it.
8304          */
8305         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8306         if (ret < 0) {
8307                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8308                         key.objectid, key.type, key.offset, ret);
8309                 goto out;
8310         }
8311         if (ret > 0) {
8312                 fprintf(stderr, "Well that's odd, we just found this key "
8313                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8314                         key.offset);
8315                 ret = -EINVAL;
8316                 goto out;
8317         }
8318         leaf = path->nodes[0];
8319         fi = btrfs_item_ptr(leaf, path->slots[0],
8320                             struct btrfs_file_extent_item);
8321
8322         if (btrfs_file_extent_compression(leaf, fi) &&
8323             dback->disk_bytenr != entry->bytenr) {
8324                 fprintf(stderr, "Ref doesn't match the record start and is "
8325                         "compressed, please take a btrfs-image of this file "
8326                         "system and send it to a btrfs developer so they can "
8327                         "complete this functionality for bytenr %Lu\n",
8328                         dback->disk_bytenr);
8329                 ret = -EINVAL;
8330                 goto out;
8331         }
8332
8333         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8334                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8335         } else if (dback->disk_bytenr > entry->bytenr) {
8336                 u64 off_diff, offset;
8337
8338                 off_diff = dback->disk_bytenr - entry->bytenr;
8339                 offset = btrfs_file_extent_offset(leaf, fi);
8340                 if (dback->disk_bytenr + offset +
8341                     btrfs_file_extent_num_bytes(leaf, fi) >
8342                     entry->bytenr + entry->bytes) {
8343                         fprintf(stderr, "Ref is past the entry end, please "
8344                                 "take a btrfs-image of this file system and "
8345                                 "send it to a btrfs developer, ref %Lu\n",
8346                                 dback->disk_bytenr);
8347                         ret = -EINVAL;
8348                         goto out;
8349                 }
8350                 offset += off_diff;
8351                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8352                 btrfs_set_file_extent_offset(leaf, fi, offset);
8353         } else if (dback->disk_bytenr < entry->bytenr) {
8354                 u64 offset;
8355
8356                 offset = btrfs_file_extent_offset(leaf, fi);
8357                 if (dback->disk_bytenr + offset < entry->bytenr) {
8358                         fprintf(stderr, "Ref is before the entry start, please"
8359                                 " take a btrfs-image of this file system and "
8360                                 "send it to a btrfs developer, ref %Lu\n",
8361                                 dback->disk_bytenr);
8362                         ret = -EINVAL;
8363                         goto out;
8364                 }
8365
8366                 offset += dback->disk_bytenr;
8367                 offset -= entry->bytenr;
8368                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8369                 btrfs_set_file_extent_offset(leaf, fi, offset);
8370         }
8371
8372         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8373
8374         /*
8375          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8376          * only do this if we aren't using compression, otherwise it's a
8377          * trickier case.
8378          */
8379         if (!btrfs_file_extent_compression(leaf, fi))
8380                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8381         else
8382                 printf("ram bytes may be wrong?\n");
8383         btrfs_mark_buffer_dirty(leaf);
8384 out:
8385         err = btrfs_commit_transaction(trans, root);
8386         btrfs_release_path(path);
8387         return ret ? ret : err;
8388 }
8389
8390 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8391                            struct extent_record *rec)
8392 {
8393         struct extent_backref *back;
8394         struct data_backref *dback;
8395         struct extent_entry *entry, *best = NULL;
8396         LIST_HEAD(entries);
8397         int nr_entries = 0;
8398         int broken_entries = 0;
8399         int ret = 0;
8400         short mismatch = 0;
8401
8402         /*
8403          * Metadata is easy and the backrefs should always agree on bytenr and
8404          * size, if not we've got bigger issues.
8405          */
8406         if (rec->metadata)
8407                 return 0;
8408
8409         list_for_each_entry(back, &rec->backrefs, list) {
8410                 if (back->full_backref || !back->is_data)
8411                         continue;
8412
8413                 dback = to_data_backref(back);
8414
8415                 /*
8416                  * We only pay attention to backrefs that we found a real
8417                  * backref for.
8418                  */
8419                 if (dback->found_ref == 0)
8420                         continue;
8421
8422                 /*
8423                  * For now we only catch when the bytes don't match, not the
8424                  * bytenr.  We can easily do this at the same time, but I want
8425                  * to have a fs image to test on before we just add repair
8426                  * functionality willy-nilly so we know we won't screw up the
8427                  * repair.
8428                  */
8429
8430                 entry = find_entry(&entries, dback->disk_bytenr,
8431                                    dback->bytes);
8432                 if (!entry) {
8433                         entry = malloc(sizeof(struct extent_entry));
8434                         if (!entry) {
8435                                 ret = -ENOMEM;
8436                                 goto out;
8437                         }
8438                         memset(entry, 0, sizeof(*entry));
8439                         entry->bytenr = dback->disk_bytenr;
8440                         entry->bytes = dback->bytes;
8441                         list_add_tail(&entry->list, &entries);
8442                         nr_entries++;
8443                 }
8444
8445                 /*
8446                  * If we only have on entry we may think the entries agree when
8447                  * in reality they don't so we have to do some extra checking.
8448                  */
8449                 if (dback->disk_bytenr != rec->start ||
8450                     dback->bytes != rec->nr || back->broken)
8451                         mismatch = 1;
8452
8453                 if (back->broken) {
8454                         entry->broken++;
8455                         broken_entries++;
8456                 }
8457
8458                 entry->count++;
8459         }
8460
8461         /* Yay all the backrefs agree, carry on good sir */
8462         if (nr_entries <= 1 && !mismatch)
8463                 goto out;
8464
8465         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8466                 "%Lu\n", rec->start);
8467
8468         /*
8469          * First we want to see if the backrefs can agree amongst themselves who
8470          * is right, so figure out which one of the entries has the highest
8471          * count.
8472          */
8473         best = find_most_right_entry(&entries);
8474
8475         /*
8476          * Ok so we may have an even split between what the backrefs think, so
8477          * this is where we use the extent ref to see what it thinks.
8478          */
8479         if (!best) {
8480                 entry = find_entry(&entries, rec->start, rec->nr);
8481                 if (!entry && (!broken_entries || !rec->found_rec)) {
8482                         fprintf(stderr, "Backrefs don't agree with each other "
8483                                 "and extent record doesn't agree with anybody,"
8484                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8485                                 rec->start, rec->nr);
8486                         ret = -EINVAL;
8487                         goto out;
8488                 } else if (!entry) {
8489                         /*
8490                          * Ok our backrefs were broken, we'll assume this is the
8491                          * correct value and add an entry for this range.
8492                          */
8493                         entry = malloc(sizeof(struct extent_entry));
8494                         if (!entry) {
8495                                 ret = -ENOMEM;
8496                                 goto out;
8497                         }
8498                         memset(entry, 0, sizeof(*entry));
8499                         entry->bytenr = rec->start;
8500                         entry->bytes = rec->nr;
8501                         list_add_tail(&entry->list, &entries);
8502                         nr_entries++;
8503                 }
8504                 entry->count++;
8505                 best = find_most_right_entry(&entries);
8506                 if (!best) {
8507                         fprintf(stderr, "Backrefs and extent record evenly "
8508                                 "split on who is right, this is going to "
8509                                 "require user input to fix bytenr %Lu bytes "
8510                                 "%Lu\n", rec->start, rec->nr);
8511                         ret = -EINVAL;
8512                         goto out;
8513                 }
8514         }
8515
8516         /*
8517          * I don't think this can happen currently as we'll abort() if we catch
8518          * this case higher up, but in case somebody removes that we still can't
8519          * deal with it properly here yet, so just bail out of that's the case.
8520          */
8521         if (best->bytenr != rec->start) {
8522                 fprintf(stderr, "Extent start and backref starts don't match, "
8523                         "please use btrfs-image on this file system and send "
8524                         "it to a btrfs developer so they can make fsck fix "
8525                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8526                         rec->start, rec->nr);
8527                 ret = -EINVAL;
8528                 goto out;
8529         }
8530
8531         /*
8532          * Ok great we all agreed on an extent record, let's go find the real
8533          * references and fix up the ones that don't match.
8534          */
8535         list_for_each_entry(back, &rec->backrefs, list) {
8536                 if (back->full_backref || !back->is_data)
8537                         continue;
8538
8539                 dback = to_data_backref(back);
8540
8541                 /*
8542                  * Still ignoring backrefs that don't have a real ref attached
8543                  * to them.
8544                  */
8545                 if (dback->found_ref == 0)
8546                         continue;
8547
8548                 if (dback->bytes == best->bytes &&
8549                     dback->disk_bytenr == best->bytenr)
8550                         continue;
8551
8552                 ret = repair_ref(info, path, dback, best);
8553                 if (ret)
8554                         goto out;
8555         }
8556
8557         /*
8558          * Ok we messed with the actual refs, which means we need to drop our
8559          * entire cache and go back and rescan.  I know this is a huge pain and
8560          * adds a lot of extra work, but it's the only way to be safe.  Once all
8561          * the backrefs agree we may not need to do anything to the extent
8562          * record itself.
8563          */
8564         ret = -EAGAIN;
8565 out:
8566         while (!list_empty(&entries)) {
8567                 entry = list_entry(entries.next, struct extent_entry, list);
8568                 list_del_init(&entry->list);
8569                 free(entry);
8570         }
8571         return ret;
8572 }
8573
8574 static int process_duplicates(struct btrfs_root *root,
8575                               struct cache_tree *extent_cache,
8576                               struct extent_record *rec)
8577 {
8578         struct extent_record *good, *tmp;
8579         struct cache_extent *cache;
8580         int ret;
8581
8582         /*
8583          * If we found a extent record for this extent then return, or if we
8584          * have more than one duplicate we are likely going to need to delete
8585          * something.
8586          */
8587         if (rec->found_rec || rec->num_duplicates > 1)
8588                 return 0;
8589
8590         /* Shouldn't happen but just in case */
8591         BUG_ON(!rec->num_duplicates);
8592
8593         /*
8594          * So this happens if we end up with a backref that doesn't match the
8595          * actual extent entry.  So either the backref is bad or the extent
8596          * entry is bad.  Either way we want to have the extent_record actually
8597          * reflect what we found in the extent_tree, so we need to take the
8598          * duplicate out and use that as the extent_record since the only way we
8599          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8600          */
8601         remove_cache_extent(extent_cache, &rec->cache);
8602
8603         good = to_extent_record(rec->dups.next);
8604         list_del_init(&good->list);
8605         INIT_LIST_HEAD(&good->backrefs);
8606         INIT_LIST_HEAD(&good->dups);
8607         good->cache.start = good->start;
8608         good->cache.size = good->nr;
8609         good->content_checked = 0;
8610         good->owner_ref_checked = 0;
8611         good->num_duplicates = 0;
8612         good->refs = rec->refs;
8613         list_splice_init(&rec->backrefs, &good->backrefs);
8614         while (1) {
8615                 cache = lookup_cache_extent(extent_cache, good->start,
8616                                             good->nr);
8617                 if (!cache)
8618                         break;
8619                 tmp = container_of(cache, struct extent_record, cache);
8620
8621                 /*
8622                  * If we find another overlapping extent and it's found_rec is
8623                  * set then it's a duplicate and we need to try and delete
8624                  * something.
8625                  */
8626                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8627                         if (list_empty(&good->list))
8628                                 list_add_tail(&good->list,
8629                                               &duplicate_extents);
8630                         good->num_duplicates += tmp->num_duplicates + 1;
8631                         list_splice_init(&tmp->dups, &good->dups);
8632                         list_del_init(&tmp->list);
8633                         list_add_tail(&tmp->list, &good->dups);
8634                         remove_cache_extent(extent_cache, &tmp->cache);
8635                         continue;
8636                 }
8637
8638                 /*
8639                  * Ok we have another non extent item backed extent rec, so lets
8640                  * just add it to this extent and carry on like we did above.
8641                  */
8642                 good->refs += tmp->refs;
8643                 list_splice_init(&tmp->backrefs, &good->backrefs);
8644                 remove_cache_extent(extent_cache, &tmp->cache);
8645                 free(tmp);
8646         }
8647         ret = insert_cache_extent(extent_cache, &good->cache);
8648         BUG_ON(ret);
8649         free(rec);
8650         return good->num_duplicates ? 0 : 1;
8651 }
8652
8653 static int delete_duplicate_records(struct btrfs_root *root,
8654                                     struct extent_record *rec)
8655 {
8656         struct btrfs_trans_handle *trans;
8657         LIST_HEAD(delete_list);
8658         struct btrfs_path path;
8659         struct extent_record *tmp, *good, *n;
8660         int nr_del = 0;
8661         int ret = 0, err;
8662         struct btrfs_key key;
8663
8664         btrfs_init_path(&path);
8665
8666         good = rec;
8667         /* Find the record that covers all of the duplicates. */
8668         list_for_each_entry(tmp, &rec->dups, list) {
8669                 if (good->start < tmp->start)
8670                         continue;
8671                 if (good->nr > tmp->nr)
8672                         continue;
8673
8674                 if (tmp->start + tmp->nr < good->start + good->nr) {
8675                         fprintf(stderr, "Ok we have overlapping extents that "
8676                                 "aren't completely covered by each other, this "
8677                                 "is going to require more careful thought.  "
8678                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8679                                 tmp->start, tmp->nr, good->start, good->nr);
8680                         abort();
8681                 }
8682                 good = tmp;
8683         }
8684
8685         if (good != rec)
8686                 list_add_tail(&rec->list, &delete_list);
8687
8688         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8689                 if (tmp == good)
8690                         continue;
8691                 list_move_tail(&tmp->list, &delete_list);
8692         }
8693
8694         root = root->fs_info->extent_root;
8695         trans = btrfs_start_transaction(root, 1);
8696         if (IS_ERR(trans)) {
8697                 ret = PTR_ERR(trans);
8698                 goto out;
8699         }
8700
8701         list_for_each_entry(tmp, &delete_list, list) {
8702                 if (tmp->found_rec == 0)
8703                         continue;
8704                 key.objectid = tmp->start;
8705                 key.type = BTRFS_EXTENT_ITEM_KEY;
8706                 key.offset = tmp->nr;
8707
8708                 /* Shouldn't happen but just in case */
8709                 if (tmp->metadata) {
8710                         fprintf(stderr, "Well this shouldn't happen, extent "
8711                                 "record overlaps but is metadata? "
8712                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8713                         abort();
8714                 }
8715
8716                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8717                 if (ret) {
8718                         if (ret > 0)
8719                                 ret = -EINVAL;
8720                         break;
8721                 }
8722                 ret = btrfs_del_item(trans, root, &path);
8723                 if (ret)
8724                         break;
8725                 btrfs_release_path(&path);
8726                 nr_del++;
8727         }
8728         err = btrfs_commit_transaction(trans, root);
8729         if (err && !ret)
8730                 ret = err;
8731 out:
8732         while (!list_empty(&delete_list)) {
8733                 tmp = to_extent_record(delete_list.next);
8734                 list_del_init(&tmp->list);
8735                 if (tmp == rec)
8736                         continue;
8737                 free(tmp);
8738         }
8739
8740         while (!list_empty(&rec->dups)) {
8741                 tmp = to_extent_record(rec->dups.next);
8742                 list_del_init(&tmp->list);
8743                 free(tmp);
8744         }
8745
8746         btrfs_release_path(&path);
8747
8748         if (!ret && !nr_del)
8749                 rec->num_duplicates = 0;
8750
8751         return ret ? ret : nr_del;
8752 }
8753
8754 static int find_possible_backrefs(struct btrfs_fs_info *info,
8755                                   struct btrfs_path *path,
8756                                   struct cache_tree *extent_cache,
8757                                   struct extent_record *rec)
8758 {
8759         struct btrfs_root *root;
8760         struct extent_backref *back;
8761         struct data_backref *dback;
8762         struct cache_extent *cache;
8763         struct btrfs_file_extent_item *fi;
8764         struct btrfs_key key;
8765         u64 bytenr, bytes;
8766         int ret;
8767
8768         list_for_each_entry(back, &rec->backrefs, list) {
8769                 /* Don't care about full backrefs (poor unloved backrefs) */
8770                 if (back->full_backref || !back->is_data)
8771                         continue;
8772
8773                 dback = to_data_backref(back);
8774
8775                 /* We found this one, we don't need to do a lookup */
8776                 if (dback->found_ref)
8777                         continue;
8778
8779                 key.objectid = dback->root;
8780                 key.type = BTRFS_ROOT_ITEM_KEY;
8781                 key.offset = (u64)-1;
8782
8783                 root = btrfs_read_fs_root(info, &key);
8784
8785                 /* No root, definitely a bad ref, skip */
8786                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8787                         continue;
8788                 /* Other err, exit */
8789                 if (IS_ERR(root))
8790                         return PTR_ERR(root);
8791
8792                 key.objectid = dback->owner;
8793                 key.type = BTRFS_EXTENT_DATA_KEY;
8794                 key.offset = dback->offset;
8795                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8796                 if (ret) {
8797                         btrfs_release_path(path);
8798                         if (ret < 0)
8799                                 return ret;
8800                         /* Didn't find it, we can carry on */
8801                         ret = 0;
8802                         continue;
8803                 }
8804
8805                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8806                                     struct btrfs_file_extent_item);
8807                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8808                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8809                 btrfs_release_path(path);
8810                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8811                 if (cache) {
8812                         struct extent_record *tmp;
8813                         tmp = container_of(cache, struct extent_record, cache);
8814
8815                         /*
8816                          * If we found an extent record for the bytenr for this
8817                          * particular backref then we can't add it to our
8818                          * current extent record.  We only want to add backrefs
8819                          * that don't have a corresponding extent item in the
8820                          * extent tree since they likely belong to this record
8821                          * and we need to fix it if it doesn't match bytenrs.
8822                          */
8823                         if  (tmp->found_rec)
8824                                 continue;
8825                 }
8826
8827                 dback->found_ref += 1;
8828                 dback->disk_bytenr = bytenr;
8829                 dback->bytes = bytes;
8830
8831                 /*
8832                  * Set this so the verify backref code knows not to trust the
8833                  * values in this backref.
8834                  */
8835                 back->broken = 1;
8836         }
8837
8838         return 0;
8839 }
8840
8841 /*
8842  * Record orphan data ref into corresponding root.
8843  *
8844  * Return 0 if the extent item contains data ref and recorded.
8845  * Return 1 if the extent item contains no useful data ref
8846  *   On that case, it may contains only shared_dataref or metadata backref
8847  *   or the file extent exists(this should be handled by the extent bytenr
8848  *   recovery routine)
8849  * Return <0 if something goes wrong.
8850  */
8851 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8852                                       struct extent_record *rec)
8853 {
8854         struct btrfs_key key;
8855         struct btrfs_root *dest_root;
8856         struct extent_backref *back;
8857         struct data_backref *dback;
8858         struct orphan_data_extent *orphan;
8859         struct btrfs_path path;
8860         int recorded_data_ref = 0;
8861         int ret = 0;
8862
8863         if (rec->metadata)
8864                 return 1;
8865         btrfs_init_path(&path);
8866         list_for_each_entry(back, &rec->backrefs, list) {
8867                 if (back->full_backref || !back->is_data ||
8868                     !back->found_extent_tree)
8869                         continue;
8870                 dback = to_data_backref(back);
8871                 if (dback->found_ref)
8872                         continue;
8873                 key.objectid = dback->root;
8874                 key.type = BTRFS_ROOT_ITEM_KEY;
8875                 key.offset = (u64)-1;
8876
8877                 dest_root = btrfs_read_fs_root(fs_info, &key);
8878
8879                 /* For non-exist root we just skip it */
8880                 if (IS_ERR(dest_root) || !dest_root)
8881                         continue;
8882
8883                 key.objectid = dback->owner;
8884                 key.type = BTRFS_EXTENT_DATA_KEY;
8885                 key.offset = dback->offset;
8886
8887                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8888                 btrfs_release_path(&path);
8889                 /*
8890                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8891                  * we need to record it for inode/file extent rebuild.
8892                  * For ret > 0, we record it only for file extent rebuild.
8893                  * For ret == 0, the file extent exists but only bytenr
8894                  * mismatch, let the original bytenr fix routine to handle,
8895                  * don't record it.
8896                  */
8897                 if (ret == 0)
8898                         continue;
8899                 ret = 0;
8900                 orphan = malloc(sizeof(*orphan));
8901                 if (!orphan) {
8902                         ret = -ENOMEM;
8903                         goto out;
8904                 }
8905                 INIT_LIST_HEAD(&orphan->list);
8906                 orphan->root = dback->root;
8907                 orphan->objectid = dback->owner;
8908                 orphan->offset = dback->offset;
8909                 orphan->disk_bytenr = rec->cache.start;
8910                 orphan->disk_len = rec->cache.size;
8911                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8912                 recorded_data_ref = 1;
8913         }
8914 out:
8915         btrfs_release_path(&path);
8916         if (!ret)
8917                 return !recorded_data_ref;
8918         else
8919                 return ret;
8920 }
8921
8922 /*
8923  * when an incorrect extent item is found, this will delete
8924  * all of the existing entries for it and recreate them
8925  * based on what the tree scan found.
8926  */
8927 static int fixup_extent_refs(struct btrfs_fs_info *info,
8928                              struct cache_tree *extent_cache,
8929                              struct extent_record *rec)
8930 {
8931         struct btrfs_trans_handle *trans = NULL;
8932         int ret;
8933         struct btrfs_path path;
8934         struct list_head *cur = rec->backrefs.next;
8935         struct cache_extent *cache;
8936         struct extent_backref *back;
8937         int allocated = 0;
8938         u64 flags = 0;
8939
8940         if (rec->flag_block_full_backref)
8941                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8942
8943         btrfs_init_path(&path);
8944         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8945                 /*
8946                  * Sometimes the backrefs themselves are so broken they don't
8947                  * get attached to any meaningful rec, so first go back and
8948                  * check any of our backrefs that we couldn't find and throw
8949                  * them into the list if we find the backref so that
8950                  * verify_backrefs can figure out what to do.
8951                  */
8952                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8953                 if (ret < 0)
8954                         goto out;
8955         }
8956
8957         /* step one, make sure all of the backrefs agree */
8958         ret = verify_backrefs(info, &path, rec);
8959         if (ret < 0)
8960                 goto out;
8961
8962         trans = btrfs_start_transaction(info->extent_root, 1);
8963         if (IS_ERR(trans)) {
8964                 ret = PTR_ERR(trans);
8965                 goto out;
8966         }
8967
8968         /* step two, delete all the existing records */
8969         ret = delete_extent_records(trans, info->extent_root, &path,
8970                                     rec->start);
8971
8972         if (ret < 0)
8973                 goto out;
8974
8975         /* was this block corrupt?  If so, don't add references to it */
8976         cache = lookup_cache_extent(info->corrupt_blocks,
8977                                     rec->start, rec->max_size);
8978         if (cache) {
8979                 ret = 0;
8980                 goto out;
8981         }
8982
8983         /* step three, recreate all the refs we did find */
8984         while(cur != &rec->backrefs) {
8985                 back = to_extent_backref(cur);
8986                 cur = cur->next;
8987
8988                 /*
8989                  * if we didn't find any references, don't create a
8990                  * new extent record
8991                  */
8992                 if (!back->found_ref)
8993                         continue;
8994
8995                 rec->bad_full_backref = 0;
8996                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8997                 allocated = 1;
8998
8999                 if (ret)
9000                         goto out;
9001         }
9002 out:
9003         if (trans) {
9004                 int err = btrfs_commit_transaction(trans, info->extent_root);
9005                 if (!ret)
9006                         ret = err;
9007         }
9008
9009         if (!ret)
9010                 fprintf(stderr, "Repaired extent references for %llu\n",
9011                                 (unsigned long long)rec->start);
9012
9013         btrfs_release_path(&path);
9014         return ret;
9015 }
9016
9017 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9018                               struct extent_record *rec)
9019 {
9020         struct btrfs_trans_handle *trans;
9021         struct btrfs_root *root = fs_info->extent_root;
9022         struct btrfs_path path;
9023         struct btrfs_extent_item *ei;
9024         struct btrfs_key key;
9025         u64 flags;
9026         int ret = 0;
9027
9028         key.objectid = rec->start;
9029         if (rec->metadata) {
9030                 key.type = BTRFS_METADATA_ITEM_KEY;
9031                 key.offset = rec->info_level;
9032         } else {
9033                 key.type = BTRFS_EXTENT_ITEM_KEY;
9034                 key.offset = rec->max_size;
9035         }
9036
9037         trans = btrfs_start_transaction(root, 0);
9038         if (IS_ERR(trans))
9039                 return PTR_ERR(trans);
9040
9041         btrfs_init_path(&path);
9042         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9043         if (ret < 0) {
9044                 btrfs_release_path(&path);
9045                 btrfs_commit_transaction(trans, root);
9046                 return ret;
9047         } else if (ret) {
9048                 fprintf(stderr, "Didn't find extent for %llu\n",
9049                         (unsigned long long)rec->start);
9050                 btrfs_release_path(&path);
9051                 btrfs_commit_transaction(trans, root);
9052                 return -ENOENT;
9053         }
9054
9055         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9056                             struct btrfs_extent_item);
9057         flags = btrfs_extent_flags(path.nodes[0], ei);
9058         if (rec->flag_block_full_backref) {
9059                 fprintf(stderr, "setting full backref on %llu\n",
9060                         (unsigned long long)key.objectid);
9061                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9062         } else {
9063                 fprintf(stderr, "clearing full backref on %llu\n",
9064                         (unsigned long long)key.objectid);
9065                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9066         }
9067         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9068         btrfs_mark_buffer_dirty(path.nodes[0]);
9069         btrfs_release_path(&path);
9070         ret = btrfs_commit_transaction(trans, root);
9071         if (!ret)
9072                 fprintf(stderr, "Repaired extent flags for %llu\n",
9073                                 (unsigned long long)rec->start);
9074
9075         return ret;
9076 }
9077
9078 /* right now we only prune from the extent allocation tree */
9079 static int prune_one_block(struct btrfs_trans_handle *trans,
9080                            struct btrfs_fs_info *info,
9081                            struct btrfs_corrupt_block *corrupt)
9082 {
9083         int ret;
9084         struct btrfs_path path;
9085         struct extent_buffer *eb;
9086         u64 found;
9087         int slot;
9088         int nritems;
9089         int level = corrupt->level + 1;
9090
9091         btrfs_init_path(&path);
9092 again:
9093         /* we want to stop at the parent to our busted block */
9094         path.lowest_level = level;
9095
9096         ret = btrfs_search_slot(trans, info->extent_root,
9097                                 &corrupt->key, &path, -1, 1);
9098
9099         if (ret < 0)
9100                 goto out;
9101
9102         eb = path.nodes[level];
9103         if (!eb) {
9104                 ret = -ENOENT;
9105                 goto out;
9106         }
9107
9108         /*
9109          * hopefully the search gave us the block we want to prune,
9110          * lets try that first
9111          */
9112         slot = path.slots[level];
9113         found =  btrfs_node_blockptr(eb, slot);
9114         if (found == corrupt->cache.start)
9115                 goto del_ptr;
9116
9117         nritems = btrfs_header_nritems(eb);
9118
9119         /* the search failed, lets scan this node and hope we find it */
9120         for (slot = 0; slot < nritems; slot++) {
9121                 found =  btrfs_node_blockptr(eb, slot);
9122                 if (found == corrupt->cache.start)
9123                         goto del_ptr;
9124         }
9125         /*
9126          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9127          * to this block
9128          */
9129         if (eb == info->extent_root->node) {
9130                 ret = -ENOENT;
9131                 goto out;
9132         } else {
9133                 level++;
9134                 btrfs_release_path(&path);
9135                 goto again;
9136         }
9137
9138 del_ptr:
9139         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9140         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9141
9142 out:
9143         btrfs_release_path(&path);
9144         return ret;
9145 }
9146
9147 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9148 {
9149         struct btrfs_trans_handle *trans = NULL;
9150         struct cache_extent *cache;
9151         struct btrfs_corrupt_block *corrupt;
9152
9153         while (1) {
9154                 cache = search_cache_extent(info->corrupt_blocks, 0);
9155                 if (!cache)
9156                         break;
9157                 if (!trans) {
9158                         trans = btrfs_start_transaction(info->extent_root, 1);
9159                         if (IS_ERR(trans))
9160                                 return PTR_ERR(trans);
9161                 }
9162                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9163                 prune_one_block(trans, info, corrupt);
9164                 remove_cache_extent(info->corrupt_blocks, cache);
9165         }
9166         if (trans)
9167                 return btrfs_commit_transaction(trans, info->extent_root);
9168         return 0;
9169 }
9170
9171 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9172 {
9173         struct btrfs_block_group_cache *cache;
9174         u64 start, end;
9175         int ret;
9176
9177         while (1) {
9178                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9179                                             &start, &end, EXTENT_DIRTY);
9180                 if (ret)
9181                         break;
9182                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9183         }
9184
9185         start = 0;
9186         while (1) {
9187                 cache = btrfs_lookup_first_block_group(fs_info, start);
9188                 if (!cache)
9189                         break;
9190                 if (cache->cached)
9191                         cache->cached = 0;
9192                 start = cache->key.objectid + cache->key.offset;
9193         }
9194 }
9195
9196 static int check_extent_refs(struct btrfs_root *root,
9197                              struct cache_tree *extent_cache)
9198 {
9199         struct extent_record *rec;
9200         struct cache_extent *cache;
9201         int ret = 0;
9202         int had_dups = 0;
9203
9204         if (repair) {
9205                 /*
9206                  * if we're doing a repair, we have to make sure
9207                  * we don't allocate from the problem extents.
9208                  * In the worst case, this will be all the
9209                  * extents in the FS
9210                  */
9211                 cache = search_cache_extent(extent_cache, 0);
9212                 while(cache) {
9213                         rec = container_of(cache, struct extent_record, cache);
9214                         set_extent_dirty(root->fs_info->excluded_extents,
9215                                          rec->start,
9216                                          rec->start + rec->max_size - 1);
9217                         cache = next_cache_extent(cache);
9218                 }
9219
9220                 /* pin down all the corrupted blocks too */
9221                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9222                 while(cache) {
9223                         set_extent_dirty(root->fs_info->excluded_extents,
9224                                          cache->start,
9225                                          cache->start + cache->size - 1);
9226                         cache = next_cache_extent(cache);
9227                 }
9228                 prune_corrupt_blocks(root->fs_info);
9229                 reset_cached_block_groups(root->fs_info);
9230         }
9231
9232         reset_cached_block_groups(root->fs_info);
9233
9234         /*
9235          * We need to delete any duplicate entries we find first otherwise we
9236          * could mess up the extent tree when we have backrefs that actually
9237          * belong to a different extent item and not the weird duplicate one.
9238          */
9239         while (repair && !list_empty(&duplicate_extents)) {
9240                 rec = to_extent_record(duplicate_extents.next);
9241                 list_del_init(&rec->list);
9242
9243                 /* Sometimes we can find a backref before we find an actual
9244                  * extent, so we need to process it a little bit to see if there
9245                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9246                  * if this is a backref screwup.  If we need to delete stuff
9247                  * process_duplicates() will return 0, otherwise it will return
9248                  * 1 and we
9249                  */
9250                 if (process_duplicates(root, extent_cache, rec))
9251                         continue;
9252                 ret = delete_duplicate_records(root, rec);
9253                 if (ret < 0)
9254                         return ret;
9255                 /*
9256                  * delete_duplicate_records will return the number of entries
9257                  * deleted, so if it's greater than 0 then we know we actually
9258                  * did something and we need to remove.
9259                  */
9260                 if (ret)
9261                         had_dups = 1;
9262         }
9263
9264         if (had_dups)
9265                 return -EAGAIN;
9266
9267         while(1) {
9268                 int cur_err = 0;
9269                 int fix = 0;
9270
9271                 cache = search_cache_extent(extent_cache, 0);
9272                 if (!cache)
9273                         break;
9274                 rec = container_of(cache, struct extent_record, cache);
9275                 if (rec->num_duplicates) {
9276                         fprintf(stderr, "extent item %llu has multiple extent "
9277                                 "items\n", (unsigned long long)rec->start);
9278                         cur_err = 1;
9279                 }
9280
9281                 if (rec->refs != rec->extent_item_refs) {
9282                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9283                                 (unsigned long long)rec->start,
9284                                 (unsigned long long)rec->nr);
9285                         fprintf(stderr, "extent item %llu, found %llu\n",
9286                                 (unsigned long long)rec->extent_item_refs,
9287                                 (unsigned long long)rec->refs);
9288                         ret = record_orphan_data_extents(root->fs_info, rec);
9289                         if (ret < 0)
9290                                 goto repair_abort;
9291                         fix = ret;
9292                         cur_err = 1;
9293                 }
9294                 if (all_backpointers_checked(rec, 1)) {
9295                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9296                                 (unsigned long long)rec->start,
9297                                 (unsigned long long)rec->nr);
9298                         fix = 1;
9299                         cur_err = 1;
9300                 }
9301                 if (!rec->owner_ref_checked) {
9302                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9303                                 (unsigned long long)rec->start,
9304                                 (unsigned long long)rec->nr);
9305                         fix = 1;
9306                         cur_err = 1;
9307                 }
9308
9309                 if (repair && fix) {
9310                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9311                         if (ret)
9312                                 goto repair_abort;
9313                 }
9314
9315
9316                 if (rec->bad_full_backref) {
9317                         fprintf(stderr, "bad full backref, on [%llu]\n",
9318                                 (unsigned long long)rec->start);
9319                         if (repair) {
9320                                 ret = fixup_extent_flags(root->fs_info, rec);
9321                                 if (ret)
9322                                         goto repair_abort;
9323                                 fix = 1;
9324                         }
9325                         cur_err = 1;
9326                 }
9327                 /*
9328                  * Although it's not a extent ref's problem, we reuse this
9329                  * routine for error reporting.
9330                  * No repair function yet.
9331                  */
9332                 if (rec->crossing_stripes) {
9333                         fprintf(stderr,
9334                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9335                                 rec->start, rec->start + rec->max_size);
9336                         cur_err = 1;
9337                 }
9338
9339                 if (rec->wrong_chunk_type) {
9340                         fprintf(stderr,
9341                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9342                                 rec->start, rec->start + rec->max_size);
9343                         cur_err = 1;
9344                 }
9345
9346                 remove_cache_extent(extent_cache, cache);
9347                 free_all_extent_backrefs(rec);
9348                 if (!init_extent_tree && repair && (!cur_err || fix))
9349                         clear_extent_dirty(root->fs_info->excluded_extents,
9350                                            rec->start,
9351                                            rec->start + rec->max_size - 1);
9352                 free(rec);
9353         }
9354 repair_abort:
9355         if (repair) {
9356                 if (ret && ret != -EAGAIN) {
9357                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9358                         exit(1);
9359                 } else if (!ret) {
9360                         struct btrfs_trans_handle *trans;
9361
9362                         root = root->fs_info->extent_root;
9363                         trans = btrfs_start_transaction(root, 1);
9364                         if (IS_ERR(trans)) {
9365                                 ret = PTR_ERR(trans);
9366                                 goto repair_abort;
9367                         }
9368
9369                         btrfs_fix_block_accounting(trans, root);
9370                         ret = btrfs_commit_transaction(trans, root);
9371                         if (ret)
9372                                 goto repair_abort;
9373                 }
9374                 return ret;
9375         }
9376         return 0;
9377 }
9378
9379 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9380 {
9381         u64 stripe_size;
9382
9383         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9384                 stripe_size = length;
9385                 stripe_size /= num_stripes;
9386         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9387                 stripe_size = length * 2;
9388                 stripe_size /= num_stripes;
9389         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9390                 stripe_size = length;
9391                 stripe_size /= (num_stripes - 1);
9392         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9393                 stripe_size = length;
9394                 stripe_size /= (num_stripes - 2);
9395         } else {
9396                 stripe_size = length;
9397         }
9398         return stripe_size;
9399 }
9400
9401 /*
9402  * Check the chunk with its block group/dev list ref:
9403  * Return 0 if all refs seems valid.
9404  * Return 1 if part of refs seems valid, need later check for rebuild ref
9405  * like missing block group and needs to search extent tree to rebuild them.
9406  * Return -1 if essential refs are missing and unable to rebuild.
9407  */
9408 static int check_chunk_refs(struct chunk_record *chunk_rec,
9409                             struct block_group_tree *block_group_cache,
9410                             struct device_extent_tree *dev_extent_cache,
9411                             int silent)
9412 {
9413         struct cache_extent *block_group_item;
9414         struct block_group_record *block_group_rec;
9415         struct cache_extent *dev_extent_item;
9416         struct device_extent_record *dev_extent_rec;
9417         u64 devid;
9418         u64 offset;
9419         u64 length;
9420         int metadump_v2 = 0;
9421         int i;
9422         int ret = 0;
9423
9424         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9425                                                chunk_rec->offset,
9426                                                chunk_rec->length);
9427         if (block_group_item) {
9428                 block_group_rec = container_of(block_group_item,
9429                                                struct block_group_record,
9430                                                cache);
9431                 if (chunk_rec->length != block_group_rec->offset ||
9432                     chunk_rec->offset != block_group_rec->objectid ||
9433                     (!metadump_v2 &&
9434                      chunk_rec->type_flags != block_group_rec->flags)) {
9435                         if (!silent)
9436                                 fprintf(stderr,
9437                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9438                                         chunk_rec->objectid,
9439                                         chunk_rec->type,
9440                                         chunk_rec->offset,
9441                                         chunk_rec->length,
9442                                         chunk_rec->offset,
9443                                         chunk_rec->type_flags,
9444                                         block_group_rec->objectid,
9445                                         block_group_rec->type,
9446                                         block_group_rec->offset,
9447                                         block_group_rec->offset,
9448                                         block_group_rec->objectid,
9449                                         block_group_rec->flags);
9450                         ret = -1;
9451                 } else {
9452                         list_del_init(&block_group_rec->list);
9453                         chunk_rec->bg_rec = block_group_rec;
9454                 }
9455         } else {
9456                 if (!silent)
9457                         fprintf(stderr,
9458                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9459                                 chunk_rec->objectid,
9460                                 chunk_rec->type,
9461                                 chunk_rec->offset,
9462                                 chunk_rec->length,
9463                                 chunk_rec->offset,
9464                                 chunk_rec->type_flags);
9465                 ret = 1;
9466         }
9467
9468         if (metadump_v2)
9469                 return ret;
9470
9471         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9472                                     chunk_rec->num_stripes);
9473         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9474                 devid = chunk_rec->stripes[i].devid;
9475                 offset = chunk_rec->stripes[i].offset;
9476                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9477                                                        devid, offset, length);
9478                 if (dev_extent_item) {
9479                         dev_extent_rec = container_of(dev_extent_item,
9480                                                 struct device_extent_record,
9481                                                 cache);
9482                         if (dev_extent_rec->objectid != devid ||
9483                             dev_extent_rec->offset != offset ||
9484                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9485                             dev_extent_rec->length != length) {
9486                                 if (!silent)
9487                                         fprintf(stderr,
9488                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9489                                                 chunk_rec->objectid,
9490                                                 chunk_rec->type,
9491                                                 chunk_rec->offset,
9492                                                 chunk_rec->stripes[i].devid,
9493                                                 chunk_rec->stripes[i].offset,
9494                                                 dev_extent_rec->objectid,
9495                                                 dev_extent_rec->offset,
9496                                                 dev_extent_rec->length);
9497                                 ret = -1;
9498                         } else {
9499                                 list_move(&dev_extent_rec->chunk_list,
9500                                           &chunk_rec->dextents);
9501                         }
9502                 } else {
9503                         if (!silent)
9504                                 fprintf(stderr,
9505                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9506                                         chunk_rec->objectid,
9507                                         chunk_rec->type,
9508                                         chunk_rec->offset,
9509                                         chunk_rec->stripes[i].devid,
9510                                         chunk_rec->stripes[i].offset);
9511                         ret = -1;
9512                 }
9513         }
9514         return ret;
9515 }
9516
9517 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9518 int check_chunks(struct cache_tree *chunk_cache,
9519                  struct block_group_tree *block_group_cache,
9520                  struct device_extent_tree *dev_extent_cache,
9521                  struct list_head *good, struct list_head *bad,
9522                  struct list_head *rebuild, int silent)
9523 {
9524         struct cache_extent *chunk_item;
9525         struct chunk_record *chunk_rec;
9526         struct block_group_record *bg_rec;
9527         struct device_extent_record *dext_rec;
9528         int err;
9529         int ret = 0;
9530
9531         chunk_item = first_cache_extent(chunk_cache);
9532         while (chunk_item) {
9533                 chunk_rec = container_of(chunk_item, struct chunk_record,
9534                                          cache);
9535                 err = check_chunk_refs(chunk_rec, block_group_cache,
9536                                        dev_extent_cache, silent);
9537                 if (err < 0)
9538                         ret = err;
9539                 if (err == 0 && good)
9540                         list_add_tail(&chunk_rec->list, good);
9541                 if (err > 0 && rebuild)
9542                         list_add_tail(&chunk_rec->list, rebuild);
9543                 if (err < 0 && bad)
9544                         list_add_tail(&chunk_rec->list, bad);
9545                 chunk_item = next_cache_extent(chunk_item);
9546         }
9547
9548         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9549                 if (!silent)
9550                         fprintf(stderr,
9551                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9552                                 bg_rec->objectid,
9553                                 bg_rec->offset,
9554                                 bg_rec->flags);
9555                 if (!ret)
9556                         ret = 1;
9557         }
9558
9559         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9560                             chunk_list) {
9561                 if (!silent)
9562                         fprintf(stderr,
9563                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9564                                 dext_rec->objectid,
9565                                 dext_rec->offset,
9566                                 dext_rec->length);
9567                 if (!ret)
9568                         ret = 1;
9569         }
9570         return ret;
9571 }
9572
9573
9574 static int check_device_used(struct device_record *dev_rec,
9575                              struct device_extent_tree *dext_cache)
9576 {
9577         struct cache_extent *cache;
9578         struct device_extent_record *dev_extent_rec;
9579         u64 total_byte = 0;
9580
9581         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9582         while (cache) {
9583                 dev_extent_rec = container_of(cache,
9584                                               struct device_extent_record,
9585                                               cache);
9586                 if (dev_extent_rec->objectid != dev_rec->devid)
9587                         break;
9588
9589                 list_del_init(&dev_extent_rec->device_list);
9590                 total_byte += dev_extent_rec->length;
9591                 cache = next_cache_extent(cache);
9592         }
9593
9594         if (total_byte != dev_rec->byte_used) {
9595                 fprintf(stderr,
9596                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9597                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9598                         dev_rec->type, dev_rec->offset);
9599                 return -1;
9600         } else {
9601                 return 0;
9602         }
9603 }
9604
9605 /* check btrfs_dev_item -> btrfs_dev_extent */
9606 static int check_devices(struct rb_root *dev_cache,
9607                          struct device_extent_tree *dev_extent_cache)
9608 {
9609         struct rb_node *dev_node;
9610         struct device_record *dev_rec;
9611         struct device_extent_record *dext_rec;
9612         int err;
9613         int ret = 0;
9614
9615         dev_node = rb_first(dev_cache);
9616         while (dev_node) {
9617                 dev_rec = container_of(dev_node, struct device_record, node);
9618                 err = check_device_used(dev_rec, dev_extent_cache);
9619                 if (err)
9620                         ret = err;
9621
9622                 dev_node = rb_next(dev_node);
9623         }
9624         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9625                             device_list) {
9626                 fprintf(stderr,
9627                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9628                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9629                 if (!ret)
9630                         ret = 1;
9631         }
9632         return ret;
9633 }
9634
9635 static int add_root_item_to_list(struct list_head *head,
9636                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9637                                   u8 level, u8 drop_level,
9638                                   int level_size, struct btrfs_key *drop_key)
9639 {
9640
9641         struct root_item_record *ri_rec;
9642         ri_rec = malloc(sizeof(*ri_rec));
9643         if (!ri_rec)
9644                 return -ENOMEM;
9645         ri_rec->bytenr = bytenr;
9646         ri_rec->objectid = objectid;
9647         ri_rec->level = level;
9648         ri_rec->level_size = level_size;
9649         ri_rec->drop_level = drop_level;
9650         ri_rec->last_snapshot = last_snapshot;
9651         if (drop_key)
9652                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9653         list_add_tail(&ri_rec->list, head);
9654
9655         return 0;
9656 }
9657
9658 static void free_root_item_list(struct list_head *list)
9659 {
9660         struct root_item_record *ri_rec;
9661
9662         while (!list_empty(list)) {
9663                 ri_rec = list_first_entry(list, struct root_item_record,
9664                                           list);
9665                 list_del_init(&ri_rec->list);
9666                 free(ri_rec);
9667         }
9668 }
9669
9670 static int deal_root_from_list(struct list_head *list,
9671                                struct btrfs_root *root,
9672                                struct block_info *bits,
9673                                int bits_nr,
9674                                struct cache_tree *pending,
9675                                struct cache_tree *seen,
9676                                struct cache_tree *reada,
9677                                struct cache_tree *nodes,
9678                                struct cache_tree *extent_cache,
9679                                struct cache_tree *chunk_cache,
9680                                struct rb_root *dev_cache,
9681                                struct block_group_tree *block_group_cache,
9682                                struct device_extent_tree *dev_extent_cache)
9683 {
9684         int ret = 0;
9685         u64 last;
9686
9687         while (!list_empty(list)) {
9688                 struct root_item_record *rec;
9689                 struct extent_buffer *buf;
9690                 rec = list_entry(list->next,
9691                                  struct root_item_record, list);
9692                 last = 0;
9693                 buf = read_tree_block(root->fs_info->tree_root,
9694                                       rec->bytenr, rec->level_size, 0);
9695                 if (!extent_buffer_uptodate(buf)) {
9696                         free_extent_buffer(buf);
9697                         ret = -EIO;
9698                         break;
9699                 }
9700                 ret = add_root_to_pending(buf, extent_cache, pending,
9701                                     seen, nodes, rec->objectid);
9702                 if (ret < 0)
9703                         break;
9704                 /*
9705                  * To rebuild extent tree, we need deal with snapshot
9706                  * one by one, otherwise we deal with node firstly which
9707                  * can maximize readahead.
9708                  */
9709                 while (1) {
9710                         ret = run_next_block(root, bits, bits_nr, &last,
9711                                              pending, seen, reada, nodes,
9712                                              extent_cache, chunk_cache,
9713                                              dev_cache, block_group_cache,
9714                                              dev_extent_cache, rec);
9715                         if (ret != 0)
9716                                 break;
9717                 }
9718                 free_extent_buffer(buf);
9719                 list_del(&rec->list);
9720                 free(rec);
9721                 if (ret < 0)
9722                         break;
9723         }
9724         while (ret >= 0) {
9725                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9726                                      reada, nodes, extent_cache, chunk_cache,
9727                                      dev_cache, block_group_cache,
9728                                      dev_extent_cache, NULL);
9729                 if (ret != 0) {
9730                         if (ret > 0)
9731                                 ret = 0;
9732                         break;
9733                 }
9734         }
9735         return ret;
9736 }
9737
9738 static int check_chunks_and_extents(struct btrfs_root *root)
9739 {
9740         struct rb_root dev_cache;
9741         struct cache_tree chunk_cache;
9742         struct block_group_tree block_group_cache;
9743         struct device_extent_tree dev_extent_cache;
9744         struct cache_tree extent_cache;
9745         struct cache_tree seen;
9746         struct cache_tree pending;
9747         struct cache_tree reada;
9748         struct cache_tree nodes;
9749         struct extent_io_tree excluded_extents;
9750         struct cache_tree corrupt_blocks;
9751         struct btrfs_path path;
9752         struct btrfs_key key;
9753         struct btrfs_key found_key;
9754         int ret, err = 0;
9755         struct block_info *bits;
9756         int bits_nr;
9757         struct extent_buffer *leaf;
9758         int slot;
9759         struct btrfs_root_item ri;
9760         struct list_head dropping_trees;
9761         struct list_head normal_trees;
9762         struct btrfs_root *root1;
9763         u64 objectid;
9764         u32 level_size;
9765         u8 level;
9766
9767         dev_cache = RB_ROOT;
9768         cache_tree_init(&chunk_cache);
9769         block_group_tree_init(&block_group_cache);
9770         device_extent_tree_init(&dev_extent_cache);
9771
9772         cache_tree_init(&extent_cache);
9773         cache_tree_init(&seen);
9774         cache_tree_init(&pending);
9775         cache_tree_init(&nodes);
9776         cache_tree_init(&reada);
9777         cache_tree_init(&corrupt_blocks);
9778         extent_io_tree_init(&excluded_extents);
9779         INIT_LIST_HEAD(&dropping_trees);
9780         INIT_LIST_HEAD(&normal_trees);
9781
9782         if (repair) {
9783                 root->fs_info->excluded_extents = &excluded_extents;
9784                 root->fs_info->fsck_extent_cache = &extent_cache;
9785                 root->fs_info->free_extent_hook = free_extent_hook;
9786                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9787         }
9788
9789         bits_nr = 1024;
9790         bits = malloc(bits_nr * sizeof(struct block_info));
9791         if (!bits) {
9792                 perror("malloc");
9793                 exit(1);
9794         }
9795
9796         if (ctx.progress_enabled) {
9797                 ctx.tp = TASK_EXTENTS;
9798                 task_start(ctx.info);
9799         }
9800
9801 again:
9802         root1 = root->fs_info->tree_root;
9803         level = btrfs_header_level(root1->node);
9804         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9805                                     root1->node->start, 0, level, 0,
9806                                     root1->nodesize, NULL);
9807         if (ret < 0)
9808                 goto out;
9809         root1 = root->fs_info->chunk_root;
9810         level = btrfs_header_level(root1->node);
9811         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9812                                     root1->node->start, 0, level, 0,
9813                                     root1->nodesize, NULL);
9814         if (ret < 0)
9815                 goto out;
9816         btrfs_init_path(&path);
9817         key.offset = 0;
9818         key.objectid = 0;
9819         key.type = BTRFS_ROOT_ITEM_KEY;
9820         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9821                                         &key, &path, 0, 0);
9822         if (ret < 0)
9823                 goto out;
9824         while(1) {
9825                 leaf = path.nodes[0];
9826                 slot = path.slots[0];
9827                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9828                         ret = btrfs_next_leaf(root, &path);
9829                         if (ret != 0)
9830                                 break;
9831                         leaf = path.nodes[0];
9832                         slot = path.slots[0];
9833                 }
9834                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9835                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9836                         unsigned long offset;
9837                         u64 last_snapshot;
9838
9839                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9840                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9841                         last_snapshot = btrfs_root_last_snapshot(&ri);
9842                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9843                                 level = btrfs_root_level(&ri);
9844                                 level_size = root->nodesize;
9845                                 ret = add_root_item_to_list(&normal_trees,
9846                                                 found_key.objectid,
9847                                                 btrfs_root_bytenr(&ri),
9848                                                 last_snapshot, level,
9849                                                 0, level_size, NULL);
9850                                 if (ret < 0)
9851                                         goto out;
9852                         } else {
9853                                 level = btrfs_root_level(&ri);
9854                                 level_size = root->nodesize;
9855                                 objectid = found_key.objectid;
9856                                 btrfs_disk_key_to_cpu(&found_key,
9857                                                       &ri.drop_progress);
9858                                 ret = add_root_item_to_list(&dropping_trees,
9859                                                 objectid,
9860                                                 btrfs_root_bytenr(&ri),
9861                                                 last_snapshot, level,
9862                                                 ri.drop_level,
9863                                                 level_size, &found_key);
9864                                 if (ret < 0)
9865                                         goto out;
9866                         }
9867                 }
9868                 path.slots[0]++;
9869         }
9870         btrfs_release_path(&path);
9871
9872         /*
9873          * check_block can return -EAGAIN if it fixes something, please keep
9874          * this in mind when dealing with return values from these functions, if
9875          * we get -EAGAIN we want to fall through and restart the loop.
9876          */
9877         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9878                                   &seen, &reada, &nodes, &extent_cache,
9879                                   &chunk_cache, &dev_cache, &block_group_cache,
9880                                   &dev_extent_cache);
9881         if (ret < 0) {
9882                 if (ret == -EAGAIN)
9883                         goto loop;
9884                 goto out;
9885         }
9886         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9887                                   &pending, &seen, &reada, &nodes,
9888                                   &extent_cache, &chunk_cache, &dev_cache,
9889                                   &block_group_cache, &dev_extent_cache);
9890         if (ret < 0) {
9891                 if (ret == -EAGAIN)
9892                         goto loop;
9893                 goto out;
9894         }
9895
9896         ret = check_chunks(&chunk_cache, &block_group_cache,
9897                            &dev_extent_cache, NULL, NULL, NULL, 0);
9898         if (ret) {
9899                 if (ret == -EAGAIN)
9900                         goto loop;
9901                 err = ret;
9902         }
9903
9904         ret = check_extent_refs(root, &extent_cache);
9905         if (ret < 0) {
9906                 if (ret == -EAGAIN)
9907                         goto loop;
9908                 goto out;
9909         }
9910
9911         ret = check_devices(&dev_cache, &dev_extent_cache);
9912         if (ret && err)
9913                 ret = err;
9914
9915 out:
9916         task_stop(ctx.info);
9917         if (repair) {
9918                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9919                 extent_io_tree_cleanup(&excluded_extents);
9920                 root->fs_info->fsck_extent_cache = NULL;
9921                 root->fs_info->free_extent_hook = NULL;
9922                 root->fs_info->corrupt_blocks = NULL;
9923                 root->fs_info->excluded_extents = NULL;
9924         }
9925         free(bits);
9926         free_chunk_cache_tree(&chunk_cache);
9927         free_device_cache_tree(&dev_cache);
9928         free_block_group_tree(&block_group_cache);
9929         free_device_extent_tree(&dev_extent_cache);
9930         free_extent_cache_tree(&seen);
9931         free_extent_cache_tree(&pending);
9932         free_extent_cache_tree(&reada);
9933         free_extent_cache_tree(&nodes);
9934         return ret;
9935 loop:
9936         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9937         free_extent_cache_tree(&seen);
9938         free_extent_cache_tree(&pending);
9939         free_extent_cache_tree(&reada);
9940         free_extent_cache_tree(&nodes);
9941         free_chunk_cache_tree(&chunk_cache);
9942         free_block_group_tree(&block_group_cache);
9943         free_device_cache_tree(&dev_cache);
9944         free_device_extent_tree(&dev_extent_cache);
9945         free_extent_record_cache(&extent_cache);
9946         free_root_item_list(&normal_trees);
9947         free_root_item_list(&dropping_trees);
9948         extent_io_tree_cleanup(&excluded_extents);
9949         goto again;
9950 }
9951
9952 /*
9953  * Check backrefs of a tree block given by @bytenr or @eb.
9954  *
9955  * @root:       the root containing the @bytenr or @eb
9956  * @eb:         tree block extent buffer, can be NULL
9957  * @bytenr:     bytenr of the tree block to search
9958  * @level:      tree level of the tree block
9959  * @owner:      owner of the tree block
9960  *
9961  * Return >0 for any error found and output error message
9962  * Return 0 for no error found
9963  */
9964 static int check_tree_block_ref(struct btrfs_root *root,
9965                                 struct extent_buffer *eb, u64 bytenr,
9966                                 int level, u64 owner)
9967 {
9968         struct btrfs_key key;
9969         struct btrfs_root *extent_root = root->fs_info->extent_root;
9970         struct btrfs_path path;
9971         struct btrfs_extent_item *ei;
9972         struct btrfs_extent_inline_ref *iref;
9973         struct extent_buffer *leaf;
9974         unsigned long end;
9975         unsigned long ptr;
9976         int slot;
9977         int skinny_level;
9978         int type;
9979         u32 nodesize = root->nodesize;
9980         u32 item_size;
9981         u64 offset;
9982         int tree_reloc_root = 0;
9983         int found_ref = 0;
9984         int err = 0;
9985         int ret;
9986
9987         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9988             btrfs_header_bytenr(root->node) == bytenr)
9989                 tree_reloc_root = 1;
9990
9991         btrfs_init_path(&path);
9992         key.objectid = bytenr;
9993         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9994                 key.type = BTRFS_METADATA_ITEM_KEY;
9995         else
9996                 key.type = BTRFS_EXTENT_ITEM_KEY;
9997         key.offset = (u64)-1;
9998
9999         /* Search for the backref in extent tree */
10000         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10001         if (ret < 0) {
10002                 err |= BACKREF_MISSING;
10003                 goto out;
10004         }
10005         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10006         if (ret) {
10007                 err |= BACKREF_MISSING;
10008                 goto out;
10009         }
10010
10011         leaf = path.nodes[0];
10012         slot = path.slots[0];
10013         btrfs_item_key_to_cpu(leaf, &key, slot);
10014
10015         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10016
10017         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10018                 skinny_level = (int)key.offset;
10019                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10020         } else {
10021                 struct btrfs_tree_block_info *info;
10022
10023                 info = (struct btrfs_tree_block_info *)(ei + 1);
10024                 skinny_level = btrfs_tree_block_level(leaf, info);
10025                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10026         }
10027
10028         if (eb) {
10029                 u64 header_gen;
10030                 u64 extent_gen;
10031
10032                 if (!(btrfs_extent_flags(leaf, ei) &
10033                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10034                         error(
10035                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10036                                 key.objectid, nodesize,
10037                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10038                         err = BACKREF_MISMATCH;
10039                 }
10040                 header_gen = btrfs_header_generation(eb);
10041                 extent_gen = btrfs_extent_generation(leaf, ei);
10042                 if (header_gen != extent_gen) {
10043                         error(
10044         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10045                                 key.objectid, nodesize, header_gen,
10046                                 extent_gen);
10047                         err = BACKREF_MISMATCH;
10048                 }
10049                 if (level != skinny_level) {
10050                         error(
10051                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10052                                 key.objectid, nodesize, level, skinny_level);
10053                         err = BACKREF_MISMATCH;
10054                 }
10055                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10056                         error(
10057                         "extent[%llu %u] is referred by other roots than %llu",
10058                                 key.objectid, nodesize, root->objectid);
10059                         err = BACKREF_MISMATCH;
10060                 }
10061         }
10062
10063         /*
10064          * Iterate the extent/metadata item to find the exact backref
10065          */
10066         item_size = btrfs_item_size_nr(leaf, slot);
10067         ptr = (unsigned long)iref;
10068         end = (unsigned long)ei + item_size;
10069         while (ptr < end) {
10070                 iref = (struct btrfs_extent_inline_ref *)ptr;
10071                 type = btrfs_extent_inline_ref_type(leaf, iref);
10072                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10073
10074                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10075                         (offset == root->objectid || offset == owner)) {
10076                         found_ref = 1;
10077                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10078                         /*
10079                          * Backref of tree reloc root points to itself, no need
10080                          * to check backref any more.
10081                          */
10082                         if (tree_reloc_root)
10083                                 found_ref = 1;
10084                         else
10085                         /* Check if the backref points to valid referencer */
10086                                 found_ref = !check_tree_block_ref(root, NULL,
10087                                                 offset, level + 1, owner);
10088                 }
10089
10090                 if (found_ref)
10091                         break;
10092                 ptr += btrfs_extent_inline_ref_size(type);
10093         }
10094
10095         /*
10096          * Inlined extent item doesn't have what we need, check
10097          * TREE_BLOCK_REF_KEY
10098          */
10099         if (!found_ref) {
10100                 btrfs_release_path(&path);
10101                 key.objectid = bytenr;
10102                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10103                 key.offset = root->objectid;
10104
10105                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10106                 if (!ret)
10107                         found_ref = 1;
10108         }
10109         if (!found_ref)
10110                 err |= BACKREF_MISSING;
10111 out:
10112         btrfs_release_path(&path);
10113         if (eb && (err & BACKREF_MISSING))
10114                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10115                         bytenr, nodesize, owner, level);
10116         return err;
10117 }
10118
10119 /*
10120  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10121  *
10122  * Return >0 any error found and output error message
10123  * Return 0 for no error found
10124  */
10125 static int check_extent_data_item(struct btrfs_root *root,
10126                                   struct extent_buffer *eb, int slot)
10127 {
10128         struct btrfs_file_extent_item *fi;
10129         struct btrfs_path path;
10130         struct btrfs_root *extent_root = root->fs_info->extent_root;
10131         struct btrfs_key fi_key;
10132         struct btrfs_key dbref_key;
10133         struct extent_buffer *leaf;
10134         struct btrfs_extent_item *ei;
10135         struct btrfs_extent_inline_ref *iref;
10136         struct btrfs_extent_data_ref *dref;
10137         u64 owner;
10138         u64 disk_bytenr;
10139         u64 disk_num_bytes;
10140         u64 extent_num_bytes;
10141         u64 extent_flags;
10142         u32 item_size;
10143         unsigned long end;
10144         unsigned long ptr;
10145         int type;
10146         u64 ref_root;
10147         int found_dbackref = 0;
10148         int err = 0;
10149         int ret;
10150
10151         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10152         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10153
10154         /* Nothing to check for hole and inline data extents */
10155         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10156             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10157                 return 0;
10158
10159         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10160         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10161         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10162
10163         /* Check unaligned disk_num_bytes and num_bytes */
10164         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10165                 error(
10166 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10167                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10168                         root->sectorsize);
10169                 err |= BYTES_UNALIGNED;
10170         } else {
10171                 data_bytes_allocated += disk_num_bytes;
10172         }
10173         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10174                 error(
10175 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10176                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10177                         root->sectorsize);
10178                 err |= BYTES_UNALIGNED;
10179         } else {
10180                 data_bytes_referenced += extent_num_bytes;
10181         }
10182         owner = btrfs_header_owner(eb);
10183
10184         /* Check the extent item of the file extent in extent tree */
10185         btrfs_init_path(&path);
10186         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10187         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10188         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10189
10190         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10191         if (ret) {
10192                 err |= BACKREF_MISSING;
10193                 goto error;
10194         }
10195
10196         leaf = path.nodes[0];
10197         slot = path.slots[0];
10198         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10199
10200         extent_flags = btrfs_extent_flags(leaf, ei);
10201
10202         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10203                 error(
10204                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10205                     disk_bytenr, disk_num_bytes,
10206                     BTRFS_EXTENT_FLAG_DATA);
10207                 err |= BACKREF_MISMATCH;
10208         }
10209
10210         /* Check data backref inside that extent item */
10211         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10212         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10213         ptr = (unsigned long)iref;
10214         end = (unsigned long)ei + item_size;
10215         while (ptr < end) {
10216                 iref = (struct btrfs_extent_inline_ref *)ptr;
10217                 type = btrfs_extent_inline_ref_type(leaf, iref);
10218                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10219
10220                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10221                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10222                         if (ref_root == owner || ref_root == root->objectid)
10223                                 found_dbackref = 1;
10224                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10225                         found_dbackref = !check_tree_block_ref(root, NULL,
10226                                 btrfs_extent_inline_ref_offset(leaf, iref),
10227                                 0, owner);
10228                 }
10229
10230                 if (found_dbackref)
10231                         break;
10232                 ptr += btrfs_extent_inline_ref_size(type);
10233         }
10234
10235         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10236         if (!found_dbackref) {
10237                 btrfs_release_path(&path);
10238
10239                 btrfs_init_path(&path);
10240                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10241                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10242                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10243                                 fi_key.objectid, fi_key.offset);
10244
10245                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10246                                         &dbref_key, &path, 0, 0);
10247                 if (!ret)
10248                         found_dbackref = 1;
10249         }
10250
10251         if (!found_dbackref)
10252                 err |= BACKREF_MISSING;
10253 error:
10254         btrfs_release_path(&path);
10255         if (err & BACKREF_MISSING) {
10256                 error("data extent[%llu %llu] backref lost",
10257                       disk_bytenr, disk_num_bytes);
10258         }
10259         return err;
10260 }
10261
10262 /*
10263  * Get real tree block level for the case like shared block
10264  * Return >= 0 as tree level
10265  * Return <0 for error
10266  */
10267 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10268 {
10269         struct extent_buffer *eb;
10270         struct btrfs_path path;
10271         struct btrfs_key key;
10272         struct btrfs_extent_item *ei;
10273         u64 flags;
10274         u64 transid;
10275         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10276         u8 backref_level;
10277         u8 header_level;
10278         int ret;
10279
10280         /* Search extent tree for extent generation and level */
10281         key.objectid = bytenr;
10282         key.type = BTRFS_METADATA_ITEM_KEY;
10283         key.offset = (u64)-1;
10284
10285         btrfs_init_path(&path);
10286         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10287         if (ret < 0)
10288                 goto release_out;
10289         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10290         if (ret < 0)
10291                 goto release_out;
10292         if (ret > 0) {
10293                 ret = -ENOENT;
10294                 goto release_out;
10295         }
10296
10297         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10298         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10299                             struct btrfs_extent_item);
10300         flags = btrfs_extent_flags(path.nodes[0], ei);
10301         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10302                 ret = -ENOENT;
10303                 goto release_out;
10304         }
10305
10306         /* Get transid for later read_tree_block() check */
10307         transid = btrfs_extent_generation(path.nodes[0], ei);
10308
10309         /* Get backref level as one source */
10310         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10311                 backref_level = key.offset;
10312         } else {
10313                 struct btrfs_tree_block_info *info;
10314
10315                 info = (struct btrfs_tree_block_info *)(ei + 1);
10316                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10317         }
10318         btrfs_release_path(&path);
10319
10320         /* Get level from tree block as an alternative source */
10321         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10322         if (!extent_buffer_uptodate(eb)) {
10323                 free_extent_buffer(eb);
10324                 return -EIO;
10325         }
10326         header_level = btrfs_header_level(eb);
10327         free_extent_buffer(eb);
10328
10329         if (header_level != backref_level)
10330                 return -EIO;
10331         return header_level;
10332
10333 release_out:
10334         btrfs_release_path(&path);
10335         return ret;
10336 }
10337
10338 /*
10339  * Check if a tree block backref is valid (points to a valid tree block)
10340  * if level == -1, level will be resolved
10341  * Return >0 for any error found and print error message
10342  */
10343 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10344                                     u64 bytenr, int level)
10345 {
10346         struct btrfs_root *root;
10347         struct btrfs_key key;
10348         struct btrfs_path path;
10349         struct extent_buffer *eb;
10350         struct extent_buffer *node;
10351         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10352         int err = 0;
10353         int ret;
10354
10355         /* Query level for level == -1 special case */
10356         if (level == -1)
10357                 level = query_tree_block_level(fs_info, bytenr);
10358         if (level < 0) {
10359                 err |= REFERENCER_MISSING;
10360                 goto out;
10361         }
10362
10363         key.objectid = root_id;
10364         key.type = BTRFS_ROOT_ITEM_KEY;
10365         key.offset = (u64)-1;
10366
10367         root = btrfs_read_fs_root(fs_info, &key);
10368         if (IS_ERR(root)) {
10369                 err |= REFERENCER_MISSING;
10370                 goto out;
10371         }
10372
10373         /* Read out the tree block to get item/node key */
10374         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10375         if (!extent_buffer_uptodate(eb)) {
10376                 err |= REFERENCER_MISSING;
10377                 free_extent_buffer(eb);
10378                 goto out;
10379         }
10380
10381         /* Empty tree, no need to check key */
10382         if (!btrfs_header_nritems(eb) && !level) {
10383                 free_extent_buffer(eb);
10384                 goto out;
10385         }
10386
10387         if (level)
10388                 btrfs_node_key_to_cpu(eb, &key, 0);
10389         else
10390                 btrfs_item_key_to_cpu(eb, &key, 0);
10391
10392         free_extent_buffer(eb);
10393
10394         btrfs_init_path(&path);
10395         path.lowest_level = level;
10396         /* Search with the first key, to ensure we can reach it */
10397         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10398         if (ret < 0) {
10399                 err |= REFERENCER_MISSING;
10400                 goto release_out;
10401         }
10402
10403         node = path.nodes[level];
10404         if (btrfs_header_bytenr(node) != bytenr) {
10405                 error(
10406         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10407                         bytenr, nodesize, bytenr,
10408                         btrfs_header_bytenr(node));
10409                 err |= REFERENCER_MISMATCH;
10410         }
10411         if (btrfs_header_level(node) != level) {
10412                 error(
10413         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10414                         bytenr, nodesize, level,
10415                         btrfs_header_level(node));
10416                 err |= REFERENCER_MISMATCH;
10417         }
10418
10419 release_out:
10420         btrfs_release_path(&path);
10421 out:
10422         if (err & REFERENCER_MISSING) {
10423                 if (level < 0)
10424                         error("extent [%llu %d] lost referencer (owner: %llu)",
10425                                 bytenr, nodesize, root_id);
10426                 else
10427                         error(
10428                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10429                                 bytenr, nodesize, root_id, level);
10430         }
10431
10432         return err;
10433 }
10434
10435 /*
10436  * Check if tree block @eb is tree reloc root.
10437  * Return 0 if it's not or any problem happens
10438  * Return 1 if it's a tree reloc root
10439  */
10440 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10441                                  struct extent_buffer *eb)
10442 {
10443         struct btrfs_root *tree_reloc_root;
10444         struct btrfs_key key;
10445         u64 bytenr = btrfs_header_bytenr(eb);
10446         u64 owner = btrfs_header_owner(eb);
10447         int ret = 0;
10448
10449         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10450         key.offset = owner;
10451         key.type = BTRFS_ROOT_ITEM_KEY;
10452
10453         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10454         if (IS_ERR(tree_reloc_root))
10455                 return 0;
10456
10457         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10458                 ret = 1;
10459         btrfs_free_fs_root(tree_reloc_root);
10460         return ret;
10461 }
10462
10463 /*
10464  * Check referencer for shared block backref
10465  * If level == -1, this function will resolve the level.
10466  */
10467 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10468                                      u64 parent, u64 bytenr, int level)
10469 {
10470         struct extent_buffer *eb;
10471         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10472         u32 nr;
10473         int found_parent = 0;
10474         int i;
10475
10476         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10477         if (!extent_buffer_uptodate(eb))
10478                 goto out;
10479
10480         if (level == -1)
10481                 level = query_tree_block_level(fs_info, bytenr);
10482         if (level < 0)
10483                 goto out;
10484
10485         /* It's possible it's a tree reloc root */
10486         if (parent == bytenr) {
10487                 if (is_tree_reloc_root(fs_info, eb))
10488                         found_parent = 1;
10489                 goto out;
10490         }
10491
10492         if (level + 1 != btrfs_header_level(eb))
10493                 goto out;
10494
10495         nr = btrfs_header_nritems(eb);
10496         for (i = 0; i < nr; i++) {
10497                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10498                         found_parent = 1;
10499                         break;
10500                 }
10501         }
10502 out:
10503         free_extent_buffer(eb);
10504         if (!found_parent) {
10505                 error(
10506         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10507                         bytenr, nodesize, parent, level);
10508                 return REFERENCER_MISSING;
10509         }
10510         return 0;
10511 }
10512
10513 /*
10514  * Check referencer for normal (inlined) data ref
10515  * If len == 0, it will be resolved by searching in extent tree
10516  */
10517 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10518                                      u64 root_id, u64 objectid, u64 offset,
10519                                      u64 bytenr, u64 len, u32 count)
10520 {
10521         struct btrfs_root *root;
10522         struct btrfs_root *extent_root = fs_info->extent_root;
10523         struct btrfs_key key;
10524         struct btrfs_path path;
10525         struct extent_buffer *leaf;
10526         struct btrfs_file_extent_item *fi;
10527         u32 found_count = 0;
10528         int slot;
10529         int ret = 0;
10530
10531         if (!len) {
10532                 key.objectid = bytenr;
10533                 key.type = BTRFS_EXTENT_ITEM_KEY;
10534                 key.offset = (u64)-1;
10535
10536                 btrfs_init_path(&path);
10537                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10538                 if (ret < 0)
10539                         goto out;
10540                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10541                 if (ret)
10542                         goto out;
10543                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10544                 if (key.objectid != bytenr ||
10545                     key.type != BTRFS_EXTENT_ITEM_KEY)
10546                         goto out;
10547                 len = key.offset;
10548                 btrfs_release_path(&path);
10549         }
10550         key.objectid = root_id;
10551         key.type = BTRFS_ROOT_ITEM_KEY;
10552         key.offset = (u64)-1;
10553         btrfs_init_path(&path);
10554
10555         root = btrfs_read_fs_root(fs_info, &key);
10556         if (IS_ERR(root))
10557                 goto out;
10558
10559         key.objectid = objectid;
10560         key.type = BTRFS_EXTENT_DATA_KEY;
10561         /*
10562          * It can be nasty as data backref offset is
10563          * file offset - file extent offset, which is smaller or
10564          * equal to original backref offset.  The only special case is
10565          * overflow.  So we need to special check and do further search.
10566          */
10567         key.offset = offset & (1ULL << 63) ? 0 : offset;
10568
10569         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10570         if (ret < 0)
10571                 goto out;
10572
10573         /*
10574          * Search afterwards to get correct one
10575          * NOTE: As we must do a comprehensive check on the data backref to
10576          * make sure the dref count also matches, we must iterate all file
10577          * extents for that inode.
10578          */
10579         while (1) {
10580                 leaf = path.nodes[0];
10581                 slot = path.slots[0];
10582
10583                 btrfs_item_key_to_cpu(leaf, &key, slot);
10584                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10585                         break;
10586                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10587                 /*
10588                  * Except normal disk bytenr and disk num bytes, we still
10589                  * need to do extra check on dbackref offset as
10590                  * dbackref offset = file_offset - file_extent_offset
10591                  */
10592                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10593                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10594                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10595                     offset)
10596                         found_count++;
10597
10598                 ret = btrfs_next_item(root, &path);
10599                 if (ret)
10600                         break;
10601         }
10602 out:
10603         btrfs_release_path(&path);
10604         if (found_count != count) {
10605                 error(
10606 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10607                         bytenr, len, root_id, objectid, offset, count, found_count);
10608                 return REFERENCER_MISSING;
10609         }
10610         return 0;
10611 }
10612
10613 /*
10614  * Check if the referencer of a shared data backref exists
10615  */
10616 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10617                                      u64 parent, u64 bytenr)
10618 {
10619         struct extent_buffer *eb;
10620         struct btrfs_key key;
10621         struct btrfs_file_extent_item *fi;
10622         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10623         u32 nr;
10624         int found_parent = 0;
10625         int i;
10626
10627         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10628         if (!extent_buffer_uptodate(eb))
10629                 goto out;
10630
10631         nr = btrfs_header_nritems(eb);
10632         for (i = 0; i < nr; i++) {
10633                 btrfs_item_key_to_cpu(eb, &key, i);
10634                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10635                         continue;
10636
10637                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10638                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10639                         continue;
10640
10641                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10642                         found_parent = 1;
10643                         break;
10644                 }
10645         }
10646
10647 out:
10648         free_extent_buffer(eb);
10649         if (!found_parent) {
10650                 error("shared extent %llu referencer lost (parent: %llu)",
10651                         bytenr, parent);
10652                 return REFERENCER_MISSING;
10653         }
10654         return 0;
10655 }
10656
10657 /*
10658  * This function will check a given extent item, including its backref and
10659  * itself (like crossing stripe boundary and type)
10660  *
10661  * Since we don't use extent_record anymore, introduce new error bit
10662  */
10663 static int check_extent_item(struct btrfs_fs_info *fs_info,
10664                              struct extent_buffer *eb, int slot)
10665 {
10666         struct btrfs_extent_item *ei;
10667         struct btrfs_extent_inline_ref *iref;
10668         struct btrfs_extent_data_ref *dref;
10669         unsigned long end;
10670         unsigned long ptr;
10671         int type;
10672         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10673         u32 item_size = btrfs_item_size_nr(eb, slot);
10674         u64 flags;
10675         u64 offset;
10676         int metadata = 0;
10677         int level;
10678         struct btrfs_key key;
10679         int ret;
10680         int err = 0;
10681
10682         btrfs_item_key_to_cpu(eb, &key, slot);
10683         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10684                 bytes_used += key.offset;
10685         else
10686                 bytes_used += nodesize;
10687
10688         if (item_size < sizeof(*ei)) {
10689                 /*
10690                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10691                  * old thing when on disk format is still un-determined.
10692                  * No need to care about it anymore
10693                  */
10694                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10695                 return -ENOTTY;
10696         }
10697
10698         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10699         flags = btrfs_extent_flags(eb, ei);
10700
10701         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10702                 metadata = 1;
10703         if (metadata && check_crossing_stripes(global_info, key.objectid,
10704                                                eb->len)) {
10705                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10706                       key.objectid, key.objectid + nodesize);
10707                 err |= CROSSING_STRIPE_BOUNDARY;
10708         }
10709
10710         ptr = (unsigned long)(ei + 1);
10711
10712         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10713                 /* Old EXTENT_ITEM metadata */
10714                 struct btrfs_tree_block_info *info;
10715
10716                 info = (struct btrfs_tree_block_info *)ptr;
10717                 level = btrfs_tree_block_level(eb, info);
10718                 ptr += sizeof(struct btrfs_tree_block_info);
10719         } else {
10720                 /* New METADATA_ITEM */
10721                 level = key.offset;
10722         }
10723         end = (unsigned long)ei + item_size;
10724
10725         if (ptr >= end) {
10726                 err |= ITEM_SIZE_MISMATCH;
10727                 goto out;
10728         }
10729
10730         /* Now check every backref in this extent item */
10731 next:
10732         iref = (struct btrfs_extent_inline_ref *)ptr;
10733         type = btrfs_extent_inline_ref_type(eb, iref);
10734         offset = btrfs_extent_inline_ref_offset(eb, iref);
10735         switch (type) {
10736         case BTRFS_TREE_BLOCK_REF_KEY:
10737                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10738                                                level);
10739                 err |= ret;
10740                 break;
10741         case BTRFS_SHARED_BLOCK_REF_KEY:
10742                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10743                                                  level);
10744                 err |= ret;
10745                 break;
10746         case BTRFS_EXTENT_DATA_REF_KEY:
10747                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10748                 ret = check_extent_data_backref(fs_info,
10749                                 btrfs_extent_data_ref_root(eb, dref),
10750                                 btrfs_extent_data_ref_objectid(eb, dref),
10751                                 btrfs_extent_data_ref_offset(eb, dref),
10752                                 key.objectid, key.offset,
10753                                 btrfs_extent_data_ref_count(eb, dref));
10754                 err |= ret;
10755                 break;
10756         case BTRFS_SHARED_DATA_REF_KEY:
10757                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10758                 err |= ret;
10759                 break;
10760         default:
10761                 error("extent[%llu %d %llu] has unknown ref type: %d",
10762                         key.objectid, key.type, key.offset, type);
10763                 err |= UNKNOWN_TYPE;
10764                 goto out;
10765         }
10766
10767         ptr += btrfs_extent_inline_ref_size(type);
10768         if (ptr < end)
10769                 goto next;
10770
10771 out:
10772         return err;
10773 }
10774
10775 /*
10776  * Check if a dev extent item is referred correctly by its chunk
10777  */
10778 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10779                                  struct extent_buffer *eb, int slot)
10780 {
10781         struct btrfs_root *chunk_root = fs_info->chunk_root;
10782         struct btrfs_dev_extent *ptr;
10783         struct btrfs_path path;
10784         struct btrfs_key chunk_key;
10785         struct btrfs_key devext_key;
10786         struct btrfs_chunk *chunk;
10787         struct extent_buffer *l;
10788         int num_stripes;
10789         u64 length;
10790         int i;
10791         int found_chunk = 0;
10792         int ret;
10793
10794         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10795         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10796         length = btrfs_dev_extent_length(eb, ptr);
10797
10798         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10799         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10800         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10801
10802         btrfs_init_path(&path);
10803         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10804         if (ret)
10805                 goto out;
10806
10807         l = path.nodes[0];
10808         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10809         if (btrfs_chunk_length(l, chunk) != length)
10810                 goto out;
10811
10812         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10813         for (i = 0; i < num_stripes; i++) {
10814                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10815                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10816
10817                 if (devid == devext_key.objectid &&
10818                     offset == devext_key.offset) {
10819                         found_chunk = 1;
10820                         break;
10821                 }
10822         }
10823 out:
10824         btrfs_release_path(&path);
10825         if (!found_chunk) {
10826                 error(
10827                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10828                         devext_key.objectid, devext_key.offset, length);
10829                 return REFERENCER_MISSING;
10830         }
10831         return 0;
10832 }
10833
10834 /*
10835  * Check if the used space is correct with the dev item
10836  */
10837 static int check_dev_item(struct btrfs_fs_info *fs_info,
10838                           struct extent_buffer *eb, int slot)
10839 {
10840         struct btrfs_root *dev_root = fs_info->dev_root;
10841         struct btrfs_dev_item *dev_item;
10842         struct btrfs_path path;
10843         struct btrfs_key key;
10844         struct btrfs_dev_extent *ptr;
10845         u64 dev_id;
10846         u64 used;
10847         u64 total = 0;
10848         int ret;
10849
10850         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10851         dev_id = btrfs_device_id(eb, dev_item);
10852         used = btrfs_device_bytes_used(eb, dev_item);
10853
10854         key.objectid = dev_id;
10855         key.type = BTRFS_DEV_EXTENT_KEY;
10856         key.offset = 0;
10857
10858         btrfs_init_path(&path);
10859         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10860         if (ret < 0) {
10861                 btrfs_item_key_to_cpu(eb, &key, slot);
10862                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10863                         key.objectid, key.type, key.offset);
10864                 btrfs_release_path(&path);
10865                 return REFERENCER_MISSING;
10866         }
10867
10868         /* Iterate dev_extents to calculate the used space of a device */
10869         while (1) {
10870                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10871
10872                 if (key.objectid > dev_id)
10873                         break;
10874                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10875                         goto next;
10876
10877                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10878                                      struct btrfs_dev_extent);
10879                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10880 next:
10881                 ret = btrfs_next_item(dev_root, &path);
10882                 if (ret)
10883                         break;
10884         }
10885         btrfs_release_path(&path);
10886
10887         if (used != total) {
10888                 btrfs_item_key_to_cpu(eb, &key, slot);
10889                 error(
10890 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10891                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10892                         BTRFS_DEV_EXTENT_KEY, dev_id);
10893                 return ACCOUNTING_MISMATCH;
10894         }
10895         return 0;
10896 }
10897
10898 /*
10899  * Check a block group item with its referener (chunk) and its used space
10900  * with extent/metadata item
10901  */
10902 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10903                                   struct extent_buffer *eb, int slot)
10904 {
10905         struct btrfs_root *extent_root = fs_info->extent_root;
10906         struct btrfs_root *chunk_root = fs_info->chunk_root;
10907         struct btrfs_block_group_item *bi;
10908         struct btrfs_block_group_item bg_item;
10909         struct btrfs_path path;
10910         struct btrfs_key bg_key;
10911         struct btrfs_key chunk_key;
10912         struct btrfs_key extent_key;
10913         struct btrfs_chunk *chunk;
10914         struct extent_buffer *leaf;
10915         struct btrfs_extent_item *ei;
10916         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10917         u64 flags;
10918         u64 bg_flags;
10919         u64 used;
10920         u64 total = 0;
10921         int ret;
10922         int err = 0;
10923
10924         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10925         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10926         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10927         used = btrfs_block_group_used(&bg_item);
10928         bg_flags = btrfs_block_group_flags(&bg_item);
10929
10930         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10931         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10932         chunk_key.offset = bg_key.objectid;
10933
10934         btrfs_init_path(&path);
10935         /* Search for the referencer chunk */
10936         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10937         if (ret) {
10938                 error(
10939                 "block group[%llu %llu] did not find the related chunk item",
10940                         bg_key.objectid, bg_key.offset);
10941                 err |= REFERENCER_MISSING;
10942         } else {
10943                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10944                                         struct btrfs_chunk);
10945                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10946                                                 bg_key.offset) {
10947                         error(
10948         "block group[%llu %llu] related chunk item length does not match",
10949                                 bg_key.objectid, bg_key.offset);
10950                         err |= REFERENCER_MISMATCH;
10951                 }
10952         }
10953         btrfs_release_path(&path);
10954
10955         /* Search from the block group bytenr */
10956         extent_key.objectid = bg_key.objectid;
10957         extent_key.type = 0;
10958         extent_key.offset = 0;
10959
10960         btrfs_init_path(&path);
10961         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10962         if (ret < 0)
10963                 goto out;
10964
10965         /* Iterate extent tree to account used space */
10966         while (1) {
10967                 leaf = path.nodes[0];
10968                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10969                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10970                         break;
10971
10972                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10973                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10974                         goto next;
10975                 if (extent_key.objectid < bg_key.objectid)
10976                         goto next;
10977
10978                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10979                         total += nodesize;
10980                 else
10981                         total += extent_key.offset;
10982
10983                 ei = btrfs_item_ptr(leaf, path.slots[0],
10984                                     struct btrfs_extent_item);
10985                 flags = btrfs_extent_flags(leaf, ei);
10986                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10987                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10988                                 error(
10989                         "bad extent[%llu, %llu) type mismatch with chunk",
10990                                         extent_key.objectid,
10991                                         extent_key.objectid + extent_key.offset);
10992                                 err |= CHUNK_TYPE_MISMATCH;
10993                         }
10994                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10995                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10996                                     BTRFS_BLOCK_GROUP_METADATA))) {
10997                                 error(
10998                         "bad extent[%llu, %llu) type mismatch with chunk",
10999                                         extent_key.objectid,
11000                                         extent_key.objectid + nodesize);
11001                                 err |= CHUNK_TYPE_MISMATCH;
11002                         }
11003                 }
11004 next:
11005                 ret = btrfs_next_item(extent_root, &path);
11006                 if (ret)
11007                         break;
11008         }
11009
11010 out:
11011         btrfs_release_path(&path);
11012
11013         if (total != used) {
11014                 error(
11015                 "block group[%llu %llu] used %llu but extent items used %llu",
11016                         bg_key.objectid, bg_key.offset, used, total);
11017                 err |= ACCOUNTING_MISMATCH;
11018         }
11019         return err;
11020 }
11021
11022 /*
11023  * Check a chunk item.
11024  * Including checking all referred dev_extents and block group
11025  */
11026 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11027                             struct extent_buffer *eb, int slot)
11028 {
11029         struct btrfs_root *extent_root = fs_info->extent_root;
11030         struct btrfs_root *dev_root = fs_info->dev_root;
11031         struct btrfs_path path;
11032         struct btrfs_key chunk_key;
11033         struct btrfs_key bg_key;
11034         struct btrfs_key devext_key;
11035         struct btrfs_chunk *chunk;
11036         struct extent_buffer *leaf;
11037         struct btrfs_block_group_item *bi;
11038         struct btrfs_block_group_item bg_item;
11039         struct btrfs_dev_extent *ptr;
11040         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11041         u64 length;
11042         u64 chunk_end;
11043         u64 type;
11044         u64 profile;
11045         int num_stripes;
11046         u64 offset;
11047         u64 objectid;
11048         int i;
11049         int ret;
11050         int err = 0;
11051
11052         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11053         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11054         length = btrfs_chunk_length(eb, chunk);
11055         chunk_end = chunk_key.offset + length;
11056         if (!IS_ALIGNED(length, sectorsize)) {
11057                 error("chunk[%llu %llu) not aligned to %u",
11058                         chunk_key.offset, chunk_end, sectorsize);
11059                 err |= BYTES_UNALIGNED;
11060                 goto out;
11061         }
11062
11063         type = btrfs_chunk_type(eb, chunk);
11064         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11065         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11066                 error("chunk[%llu %llu) has no chunk type",
11067                         chunk_key.offset, chunk_end);
11068                 err |= UNKNOWN_TYPE;
11069         }
11070         if (profile && (profile & (profile - 1))) {
11071                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11072                         chunk_key.offset, chunk_end, profile);
11073                 err |= UNKNOWN_TYPE;
11074         }
11075
11076         bg_key.objectid = chunk_key.offset;
11077         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11078         bg_key.offset = length;
11079
11080         btrfs_init_path(&path);
11081         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11082         if (ret) {
11083                 error(
11084                 "chunk[%llu %llu) did not find the related block group item",
11085                         chunk_key.offset, chunk_end);
11086                 err |= REFERENCER_MISSING;
11087         } else{
11088                 leaf = path.nodes[0];
11089                 bi = btrfs_item_ptr(leaf, path.slots[0],
11090                                     struct btrfs_block_group_item);
11091                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11092                                    sizeof(bg_item));
11093                 if (btrfs_block_group_flags(&bg_item) != type) {
11094                         error(
11095 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11096                                 chunk_key.offset, chunk_end, type,
11097                                 btrfs_block_group_flags(&bg_item));
11098                         err |= REFERENCER_MISSING;
11099                 }
11100         }
11101
11102         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11103         for (i = 0; i < num_stripes; i++) {
11104                 btrfs_release_path(&path);
11105                 btrfs_init_path(&path);
11106                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11107                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11108                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11109
11110                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11111                                         0, 0);
11112                 if (ret)
11113                         goto not_match_dev;
11114
11115                 leaf = path.nodes[0];
11116                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11117                                      struct btrfs_dev_extent);
11118                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11119                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11120                 if (objectid != chunk_key.objectid ||
11121                     offset != chunk_key.offset ||
11122                     btrfs_dev_extent_length(leaf, ptr) != length)
11123                         goto not_match_dev;
11124                 continue;
11125 not_match_dev:
11126                 err |= BACKREF_MISSING;
11127                 error(
11128                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11129                         chunk_key.objectid, chunk_end, i);
11130                 continue;
11131         }
11132         btrfs_release_path(&path);
11133 out:
11134         return err;
11135 }
11136
11137 /*
11138  * Main entry function to check known items and update related accounting info
11139  */
11140 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11141 {
11142         struct btrfs_fs_info *fs_info = root->fs_info;
11143         struct btrfs_key key;
11144         int slot = 0;
11145         int type;
11146         struct btrfs_extent_data_ref *dref;
11147         int ret;
11148         int err = 0;
11149
11150 next:
11151         btrfs_item_key_to_cpu(eb, &key, slot);
11152         type = key.type;
11153
11154         switch (type) {
11155         case BTRFS_EXTENT_DATA_KEY:
11156                 ret = check_extent_data_item(root, eb, slot);
11157                 err |= ret;
11158                 break;
11159         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11160                 ret = check_block_group_item(fs_info, eb, slot);
11161                 err |= ret;
11162                 break;
11163         case BTRFS_DEV_ITEM_KEY:
11164                 ret = check_dev_item(fs_info, eb, slot);
11165                 err |= ret;
11166                 break;
11167         case BTRFS_CHUNK_ITEM_KEY:
11168                 ret = check_chunk_item(fs_info, eb, slot);
11169                 err |= ret;
11170                 break;
11171         case BTRFS_DEV_EXTENT_KEY:
11172                 ret = check_dev_extent_item(fs_info, eb, slot);
11173                 err |= ret;
11174                 break;
11175         case BTRFS_EXTENT_ITEM_KEY:
11176         case BTRFS_METADATA_ITEM_KEY:
11177                 ret = check_extent_item(fs_info, eb, slot);
11178                 err |= ret;
11179                 break;
11180         case BTRFS_EXTENT_CSUM_KEY:
11181                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11182                 break;
11183         case BTRFS_TREE_BLOCK_REF_KEY:
11184                 ret = check_tree_block_backref(fs_info, key.offset,
11185                                                key.objectid, -1);
11186                 err |= ret;
11187                 break;
11188         case BTRFS_EXTENT_DATA_REF_KEY:
11189                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11190                 ret = check_extent_data_backref(fs_info,
11191                                 btrfs_extent_data_ref_root(eb, dref),
11192                                 btrfs_extent_data_ref_objectid(eb, dref),
11193                                 btrfs_extent_data_ref_offset(eb, dref),
11194                                 key.objectid, 0,
11195                                 btrfs_extent_data_ref_count(eb, dref));
11196                 err |= ret;
11197                 break;
11198         case BTRFS_SHARED_BLOCK_REF_KEY:
11199                 ret = check_shared_block_backref(fs_info, key.offset,
11200                                                  key.objectid, -1);
11201                 err |= ret;
11202                 break;
11203         case BTRFS_SHARED_DATA_REF_KEY:
11204                 ret = check_shared_data_backref(fs_info, key.offset,
11205                                                 key.objectid);
11206                 err |= ret;
11207                 break;
11208         default:
11209                 break;
11210         }
11211
11212         if (++slot < btrfs_header_nritems(eb))
11213                 goto next;
11214
11215         return err;
11216 }
11217
11218 /*
11219  * Helper function for later fs/subvol tree check.  To determine if a tree
11220  * block should be checked.
11221  * This function will ensure only the direct referencer with lowest rootid to
11222  * check a fs/subvolume tree block.
11223  *
11224  * Backref check at extent tree would detect errors like missing subvolume
11225  * tree, so we can do aggressive check to reduce duplicated checks.
11226  */
11227 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11228 {
11229         struct btrfs_root *extent_root = root->fs_info->extent_root;
11230         struct btrfs_key key;
11231         struct btrfs_path path;
11232         struct extent_buffer *leaf;
11233         int slot;
11234         struct btrfs_extent_item *ei;
11235         unsigned long ptr;
11236         unsigned long end;
11237         int type;
11238         u32 item_size;
11239         u64 offset;
11240         struct btrfs_extent_inline_ref *iref;
11241         int ret;
11242
11243         btrfs_init_path(&path);
11244         key.objectid = btrfs_header_bytenr(eb);
11245         key.type = BTRFS_METADATA_ITEM_KEY;
11246         key.offset = (u64)-1;
11247
11248         /*
11249          * Any failure in backref resolving means we can't determine
11250          * whom the tree block belongs to.
11251          * So in that case, we need to check that tree block
11252          */
11253         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11254         if (ret < 0)
11255                 goto need_check;
11256
11257         ret = btrfs_previous_extent_item(extent_root, &path,
11258                                          btrfs_header_bytenr(eb));
11259         if (ret)
11260                 goto need_check;
11261
11262         leaf = path.nodes[0];
11263         slot = path.slots[0];
11264         btrfs_item_key_to_cpu(leaf, &key, slot);
11265         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11266
11267         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11268                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11269         } else {
11270                 struct btrfs_tree_block_info *info;
11271
11272                 info = (struct btrfs_tree_block_info *)(ei + 1);
11273                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11274         }
11275
11276         item_size = btrfs_item_size_nr(leaf, slot);
11277         ptr = (unsigned long)iref;
11278         end = (unsigned long)ei + item_size;
11279         while (ptr < end) {
11280                 iref = (struct btrfs_extent_inline_ref *)ptr;
11281                 type = btrfs_extent_inline_ref_type(leaf, iref);
11282                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11283
11284                 /*
11285                  * We only check the tree block if current root is
11286                  * the lowest referencer of it.
11287                  */
11288                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11289                     offset < root->objectid) {
11290                         btrfs_release_path(&path);
11291                         return 0;
11292                 }
11293
11294                 ptr += btrfs_extent_inline_ref_size(type);
11295         }
11296         /*
11297          * Normally we should also check keyed tree block ref, but that may be
11298          * very time consuming.  Inlined ref should already make us skip a lot
11299          * of refs now.  So skip search keyed tree block ref.
11300          */
11301
11302 need_check:
11303         btrfs_release_path(&path);
11304         return 1;
11305 }
11306
11307 /*
11308  * Traversal function for tree block. We will do:
11309  * 1) Skip shared fs/subvolume tree blocks
11310  * 2) Update related bytes accounting
11311  * 3) Pre-order traversal
11312  */
11313 static int traverse_tree_block(struct btrfs_root *root,
11314                                 struct extent_buffer *node)
11315 {
11316         struct extent_buffer *eb;
11317         struct btrfs_key key;
11318         struct btrfs_key drop_key;
11319         int level;
11320         u64 nr;
11321         int i;
11322         int err = 0;
11323         int ret;
11324
11325         /*
11326          * Skip shared fs/subvolume tree block, in that case they will
11327          * be checked by referencer with lowest rootid
11328          */
11329         if (is_fstree(root->objectid) && !should_check(root, node))
11330                 return 0;
11331
11332         /* Update bytes accounting */
11333         total_btree_bytes += node->len;
11334         if (fs_root_objectid(btrfs_header_owner(node)))
11335                 total_fs_tree_bytes += node->len;
11336         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11337                 total_extent_tree_bytes += node->len;
11338         if (!found_old_backref &&
11339             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11340             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11341             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11342                 found_old_backref = 1;
11343
11344         /* pre-order tranversal, check itself first */
11345         level = btrfs_header_level(node);
11346         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11347                                    btrfs_header_level(node),
11348                                    btrfs_header_owner(node));
11349         err |= ret;
11350         if (err)
11351                 error(
11352         "check %s failed root %llu bytenr %llu level %d, force continue check",
11353                         level ? "node":"leaf", root->objectid,
11354                         btrfs_header_bytenr(node), btrfs_header_level(node));
11355
11356         if (!level) {
11357                 btree_space_waste += btrfs_leaf_free_space(root, node);
11358                 ret = check_leaf_items(root, node);
11359                 err |= ret;
11360                 return err;
11361         }
11362
11363         nr = btrfs_header_nritems(node);
11364         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11365         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11366                 sizeof(struct btrfs_key_ptr);
11367
11368         /* Then check all its children */
11369         for (i = 0; i < nr; i++) {
11370                 u64 blocknr = btrfs_node_blockptr(node, i);
11371
11372                 btrfs_node_key_to_cpu(node, &key, i);
11373                 if (level == root->root_item.drop_level &&
11374                     is_dropped_key(&key, &drop_key))
11375                         continue;
11376
11377                 /*
11378                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11379                  * to call the function itself.
11380                  */
11381                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11382                 if (extent_buffer_uptodate(eb)) {
11383                         ret = traverse_tree_block(root, eb);
11384                         err |= ret;
11385                 }
11386                 free_extent_buffer(eb);
11387         }
11388
11389         return err;
11390 }
11391
11392 /*
11393  * Low memory usage version check_chunks_and_extents.
11394  */
11395 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11396 {
11397         struct btrfs_path path;
11398         struct btrfs_key key;
11399         struct btrfs_root *root1;
11400         struct btrfs_root *cur_root;
11401         int err = 0;
11402         int ret;
11403
11404         root1 = root->fs_info->chunk_root;
11405         ret = traverse_tree_block(root1, root1->node);
11406         err |= ret;
11407
11408         root1 = root->fs_info->tree_root;
11409         ret = traverse_tree_block(root1, root1->node);
11410         err |= ret;
11411
11412         btrfs_init_path(&path);
11413         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11414         key.offset = 0;
11415         key.type = BTRFS_ROOT_ITEM_KEY;
11416
11417         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11418         if (ret) {
11419                 error("cannot find extent treet in tree_root");
11420                 goto out;
11421         }
11422
11423         while (1) {
11424                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11425                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11426                         goto next;
11427                 key.offset = (u64)-1;
11428
11429                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11430                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11431                                         &key);
11432                 else
11433                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11434                 if (IS_ERR(cur_root) || !cur_root) {
11435                         error("failed to read tree: %lld", key.objectid);
11436                         goto next;
11437                 }
11438
11439                 ret = traverse_tree_block(cur_root, cur_root->node);
11440                 err |= ret;
11441
11442                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11443                         btrfs_free_fs_root(cur_root);
11444 next:
11445                 ret = btrfs_next_item(root1, &path);
11446                 if (ret)
11447                         goto out;
11448         }
11449
11450 out:
11451         btrfs_release_path(&path);
11452         return err;
11453 }
11454
11455 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11456                            struct btrfs_root *root, int overwrite)
11457 {
11458         struct extent_buffer *c;
11459         struct extent_buffer *old = root->node;
11460         int level;
11461         int ret;
11462         struct btrfs_disk_key disk_key = {0,0,0};
11463
11464         level = 0;
11465
11466         if (overwrite) {
11467                 c = old;
11468                 extent_buffer_get(c);
11469                 goto init;
11470         }
11471         c = btrfs_alloc_free_block(trans, root,
11472                                    root->nodesize,
11473                                    root->root_key.objectid,
11474                                    &disk_key, level, 0, 0);
11475         if (IS_ERR(c)) {
11476                 c = old;
11477                 extent_buffer_get(c);
11478                 overwrite = 1;
11479         }
11480 init:
11481         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11482         btrfs_set_header_level(c, level);
11483         btrfs_set_header_bytenr(c, c->start);
11484         btrfs_set_header_generation(c, trans->transid);
11485         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11486         btrfs_set_header_owner(c, root->root_key.objectid);
11487
11488         write_extent_buffer(c, root->fs_info->fsid,
11489                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11490
11491         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11492                             btrfs_header_chunk_tree_uuid(c),
11493                             BTRFS_UUID_SIZE);
11494
11495         btrfs_mark_buffer_dirty(c);
11496         /*
11497          * this case can happen in the following case:
11498          *
11499          * 1.overwrite previous root.
11500          *
11501          * 2.reinit reloc data root, this is because we skip pin
11502          * down reloc data tree before which means we can allocate
11503          * same block bytenr here.
11504          */
11505         if (old->start == c->start) {
11506                 btrfs_set_root_generation(&root->root_item,
11507                                           trans->transid);
11508                 root->root_item.level = btrfs_header_level(root->node);
11509                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11510                                         &root->root_key, &root->root_item);
11511                 if (ret) {
11512                         free_extent_buffer(c);
11513                         return ret;
11514                 }
11515         }
11516         free_extent_buffer(old);
11517         root->node = c;
11518         add_root_to_dirty_list(root);
11519         return 0;
11520 }
11521
11522 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11523                                 struct extent_buffer *eb, int tree_root)
11524 {
11525         struct extent_buffer *tmp;
11526         struct btrfs_root_item *ri;
11527         struct btrfs_key key;
11528         u64 bytenr;
11529         u32 nodesize;
11530         int level = btrfs_header_level(eb);
11531         int nritems;
11532         int ret;
11533         int i;
11534
11535         /*
11536          * If we have pinned this block before, don't pin it again.
11537          * This can not only avoid forever loop with broken filesystem
11538          * but also give us some speedups.
11539          */
11540         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11541                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11542                 return 0;
11543
11544         btrfs_pin_extent(fs_info, eb->start, eb->len);
11545
11546         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11547         nritems = btrfs_header_nritems(eb);
11548         for (i = 0; i < nritems; i++) {
11549                 if (level == 0) {
11550                         btrfs_item_key_to_cpu(eb, &key, i);
11551                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11552                                 continue;
11553                         /* Skip the extent root and reloc roots */
11554                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11555                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11556                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11557                                 continue;
11558                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11559                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11560
11561                         /*
11562                          * If at any point we start needing the real root we
11563                          * will have to build a stump root for the root we are
11564                          * in, but for now this doesn't actually use the root so
11565                          * just pass in extent_root.
11566                          */
11567                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11568                                               nodesize, 0);
11569                         if (!extent_buffer_uptodate(tmp)) {
11570                                 fprintf(stderr, "Error reading root block\n");
11571                                 return -EIO;
11572                         }
11573                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11574                         free_extent_buffer(tmp);
11575                         if (ret)
11576                                 return ret;
11577                 } else {
11578                         bytenr = btrfs_node_blockptr(eb, i);
11579
11580                         /* If we aren't the tree root don't read the block */
11581                         if (level == 1 && !tree_root) {
11582                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11583                                 continue;
11584                         }
11585
11586                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11587                                               nodesize, 0);
11588                         if (!extent_buffer_uptodate(tmp)) {
11589                                 fprintf(stderr, "Error reading tree block\n");
11590                                 return -EIO;
11591                         }
11592                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11593                         free_extent_buffer(tmp);
11594                         if (ret)
11595                                 return ret;
11596                 }
11597         }
11598
11599         return 0;
11600 }
11601
11602 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11603 {
11604         int ret;
11605
11606         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11607         if (ret)
11608                 return ret;
11609
11610         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11611 }
11612
11613 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11614 {
11615         struct btrfs_block_group_cache *cache;
11616         struct btrfs_path path;
11617         struct extent_buffer *leaf;
11618         struct btrfs_chunk *chunk;
11619         struct btrfs_key key;
11620         int ret;
11621         u64 start;
11622
11623         btrfs_init_path(&path);
11624         key.objectid = 0;
11625         key.type = BTRFS_CHUNK_ITEM_KEY;
11626         key.offset = 0;
11627         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11628         if (ret < 0) {
11629                 btrfs_release_path(&path);
11630                 return ret;
11631         }
11632
11633         /*
11634          * We do this in case the block groups were screwed up and had alloc
11635          * bits that aren't actually set on the chunks.  This happens with
11636          * restored images every time and could happen in real life I guess.
11637          */
11638         fs_info->avail_data_alloc_bits = 0;
11639         fs_info->avail_metadata_alloc_bits = 0;
11640         fs_info->avail_system_alloc_bits = 0;
11641
11642         /* First we need to create the in-memory block groups */
11643         while (1) {
11644                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11645                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11646                         if (ret < 0) {
11647                                 btrfs_release_path(&path);
11648                                 return ret;
11649                         }
11650                         if (ret) {
11651                                 ret = 0;
11652                                 break;
11653                         }
11654                 }
11655                 leaf = path.nodes[0];
11656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11657                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11658                         path.slots[0]++;
11659                         continue;
11660                 }
11661
11662                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11663                 btrfs_add_block_group(fs_info, 0,
11664                                       btrfs_chunk_type(leaf, chunk),
11665                                       key.objectid, key.offset,
11666                                       btrfs_chunk_length(leaf, chunk));
11667                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11668                                  key.offset + btrfs_chunk_length(leaf, chunk));
11669                 path.slots[0]++;
11670         }
11671         start = 0;
11672         while (1) {
11673                 cache = btrfs_lookup_first_block_group(fs_info, start);
11674                 if (!cache)
11675                         break;
11676                 cache->cached = 1;
11677                 start = cache->key.objectid + cache->key.offset;
11678         }
11679
11680         btrfs_release_path(&path);
11681         return 0;
11682 }
11683
11684 static int reset_balance(struct btrfs_trans_handle *trans,
11685                          struct btrfs_fs_info *fs_info)
11686 {
11687         struct btrfs_root *root = fs_info->tree_root;
11688         struct btrfs_path path;
11689         struct extent_buffer *leaf;
11690         struct btrfs_key key;
11691         int del_slot, del_nr = 0;
11692         int ret;
11693         int found = 0;
11694
11695         btrfs_init_path(&path);
11696         key.objectid = BTRFS_BALANCE_OBJECTID;
11697         key.type = BTRFS_BALANCE_ITEM_KEY;
11698         key.offset = 0;
11699         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11700         if (ret) {
11701                 if (ret > 0)
11702                         ret = 0;
11703                 if (!ret)
11704                         goto reinit_data_reloc;
11705                 else
11706                         goto out;
11707         }
11708
11709         ret = btrfs_del_item(trans, root, &path);
11710         if (ret)
11711                 goto out;
11712         btrfs_release_path(&path);
11713
11714         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11715         key.type = BTRFS_ROOT_ITEM_KEY;
11716         key.offset = 0;
11717         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11718         if (ret < 0)
11719                 goto out;
11720         while (1) {
11721                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11722                         if (!found)
11723                                 break;
11724
11725                         if (del_nr) {
11726                                 ret = btrfs_del_items(trans, root, &path,
11727                                                       del_slot, del_nr);
11728                                 del_nr = 0;
11729                                 if (ret)
11730                                         goto out;
11731                         }
11732                         key.offset++;
11733                         btrfs_release_path(&path);
11734
11735                         found = 0;
11736                         ret = btrfs_search_slot(trans, root, &key, &path,
11737                                                 -1, 1);
11738                         if (ret < 0)
11739                                 goto out;
11740                         continue;
11741                 }
11742                 found = 1;
11743                 leaf = path.nodes[0];
11744                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11745                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11746                         break;
11747                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11748                         path.slots[0]++;
11749                         continue;
11750                 }
11751                 if (!del_nr) {
11752                         del_slot = path.slots[0];
11753                         del_nr = 1;
11754                 } else {
11755                         del_nr++;
11756                 }
11757                 path.slots[0]++;
11758         }
11759
11760         if (del_nr) {
11761                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11762                 if (ret)
11763                         goto out;
11764         }
11765         btrfs_release_path(&path);
11766
11767 reinit_data_reloc:
11768         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11769         key.type = BTRFS_ROOT_ITEM_KEY;
11770         key.offset = (u64)-1;
11771         root = btrfs_read_fs_root(fs_info, &key);
11772         if (IS_ERR(root)) {
11773                 fprintf(stderr, "Error reading data reloc tree\n");
11774                 ret = PTR_ERR(root);
11775                 goto out;
11776         }
11777         record_root_in_trans(trans, root);
11778         ret = btrfs_fsck_reinit_root(trans, root, 0);
11779         if (ret)
11780                 goto out;
11781         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11782 out:
11783         btrfs_release_path(&path);
11784         return ret;
11785 }
11786
11787 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11788                               struct btrfs_fs_info *fs_info)
11789 {
11790         u64 start = 0;
11791         int ret;
11792
11793         /*
11794          * The only reason we don't do this is because right now we're just
11795          * walking the trees we find and pinning down their bytes, we don't look
11796          * at any of the leaves.  In order to do mixed groups we'd have to check
11797          * the leaves of any fs roots and pin down the bytes for any file
11798          * extents we find.  Not hard but why do it if we don't have to?
11799          */
11800         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11801                 fprintf(stderr, "We don't support re-initing the extent tree "
11802                         "for mixed block groups yet, please notify a btrfs "
11803                         "developer you want to do this so they can add this "
11804                         "functionality.\n");
11805                 return -EINVAL;
11806         }
11807
11808         /*
11809          * first we need to walk all of the trees except the extent tree and pin
11810          * down the bytes that are in use so we don't overwrite any existing
11811          * metadata.
11812          */
11813         ret = pin_metadata_blocks(fs_info);
11814         if (ret) {
11815                 fprintf(stderr, "error pinning down used bytes\n");
11816                 return ret;
11817         }
11818
11819         /*
11820          * Need to drop all the block groups since we're going to recreate all
11821          * of them again.
11822          */
11823         btrfs_free_block_groups(fs_info);
11824         ret = reset_block_groups(fs_info);
11825         if (ret) {
11826                 fprintf(stderr, "error resetting the block groups\n");
11827                 return ret;
11828         }
11829
11830         /* Ok we can allocate now, reinit the extent root */
11831         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11832         if (ret) {
11833                 fprintf(stderr, "extent root initialization failed\n");
11834                 /*
11835                  * When the transaction code is updated we should end the
11836                  * transaction, but for now progs only knows about commit so
11837                  * just return an error.
11838                  */
11839                 return ret;
11840         }
11841
11842         /*
11843          * Now we have all the in-memory block groups setup so we can make
11844          * allocations properly, and the metadata we care about is safe since we
11845          * pinned all of it above.
11846          */
11847         while (1) {
11848                 struct btrfs_block_group_cache *cache;
11849
11850                 cache = btrfs_lookup_first_block_group(fs_info, start);
11851                 if (!cache)
11852                         break;
11853                 start = cache->key.objectid + cache->key.offset;
11854                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11855                                         &cache->key, &cache->item,
11856                                         sizeof(cache->item));
11857                 if (ret) {
11858                         fprintf(stderr, "Error adding block group\n");
11859                         return ret;
11860                 }
11861                 btrfs_extent_post_op(trans, fs_info->extent_root);
11862         }
11863
11864         ret = reset_balance(trans, fs_info);
11865         if (ret)
11866                 fprintf(stderr, "error resetting the pending balance\n");
11867
11868         return ret;
11869 }
11870
11871 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11872 {
11873         struct btrfs_path path;
11874         struct btrfs_trans_handle *trans;
11875         struct btrfs_key key;
11876         int ret;
11877
11878         printf("Recowing metadata block %llu\n", eb->start);
11879         key.objectid = btrfs_header_owner(eb);
11880         key.type = BTRFS_ROOT_ITEM_KEY;
11881         key.offset = (u64)-1;
11882
11883         root = btrfs_read_fs_root(root->fs_info, &key);
11884         if (IS_ERR(root)) {
11885                 fprintf(stderr, "Couldn't find owner root %llu\n",
11886                         key.objectid);
11887                 return PTR_ERR(root);
11888         }
11889
11890         trans = btrfs_start_transaction(root, 1);
11891         if (IS_ERR(trans))
11892                 return PTR_ERR(trans);
11893
11894         btrfs_init_path(&path);
11895         path.lowest_level = btrfs_header_level(eb);
11896         if (path.lowest_level)
11897                 btrfs_node_key_to_cpu(eb, &key, 0);
11898         else
11899                 btrfs_item_key_to_cpu(eb, &key, 0);
11900
11901         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11902         btrfs_commit_transaction(trans, root);
11903         btrfs_release_path(&path);
11904         return ret;
11905 }
11906
11907 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11908 {
11909         struct btrfs_path path;
11910         struct btrfs_trans_handle *trans;
11911         struct btrfs_key key;
11912         int ret;
11913
11914         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11915                bad->key.type, bad->key.offset);
11916         key.objectid = bad->root_id;
11917         key.type = BTRFS_ROOT_ITEM_KEY;
11918         key.offset = (u64)-1;
11919
11920         root = btrfs_read_fs_root(root->fs_info, &key);
11921         if (IS_ERR(root)) {
11922                 fprintf(stderr, "Couldn't find owner root %llu\n",
11923                         key.objectid);
11924                 return PTR_ERR(root);
11925         }
11926
11927         trans = btrfs_start_transaction(root, 1);
11928         if (IS_ERR(trans))
11929                 return PTR_ERR(trans);
11930
11931         btrfs_init_path(&path);
11932         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11933         if (ret) {
11934                 if (ret > 0)
11935                         ret = 0;
11936                 goto out;
11937         }
11938         ret = btrfs_del_item(trans, root, &path);
11939 out:
11940         btrfs_commit_transaction(trans, root);
11941         btrfs_release_path(&path);
11942         return ret;
11943 }
11944
11945 static int zero_log_tree(struct btrfs_root *root)
11946 {
11947         struct btrfs_trans_handle *trans;
11948         int ret;
11949
11950         trans = btrfs_start_transaction(root, 1);
11951         if (IS_ERR(trans)) {
11952                 ret = PTR_ERR(trans);
11953                 return ret;
11954         }
11955         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11956         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11957         ret = btrfs_commit_transaction(trans, root);
11958         return ret;
11959 }
11960
11961 static int populate_csum(struct btrfs_trans_handle *trans,
11962                          struct btrfs_root *csum_root, char *buf, u64 start,
11963                          u64 len)
11964 {
11965         u64 offset = 0;
11966         u64 sectorsize;
11967         int ret = 0;
11968
11969         while (offset < len) {
11970                 sectorsize = csum_root->sectorsize;
11971                 ret = read_extent_data(csum_root, buf, start + offset,
11972                                        &sectorsize, 0);
11973                 if (ret)
11974                         break;
11975                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11976                                             start + offset, buf, sectorsize);
11977                 if (ret)
11978                         break;
11979                 offset += sectorsize;
11980         }
11981         return ret;
11982 }
11983
11984 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11985                                       struct btrfs_root *csum_root,
11986                                       struct btrfs_root *cur_root)
11987 {
11988         struct btrfs_path path;
11989         struct btrfs_key key;
11990         struct extent_buffer *node;
11991         struct btrfs_file_extent_item *fi;
11992         char *buf = NULL;
11993         u64 start = 0;
11994         u64 len = 0;
11995         int slot = 0;
11996         int ret = 0;
11997
11998         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11999         if (!buf)
12000                 return -ENOMEM;
12001
12002         btrfs_init_path(&path);
12003         key.objectid = 0;
12004         key.offset = 0;
12005         key.type = 0;
12006         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12007         if (ret < 0)
12008                 goto out;
12009         /* Iterate all regular file extents and fill its csum */
12010         while (1) {
12011                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12012
12013                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12014                         goto next;
12015                 node = path.nodes[0];
12016                 slot = path.slots[0];
12017                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12018                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12019                         goto next;
12020                 start = btrfs_file_extent_disk_bytenr(node, fi);
12021                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12022
12023                 ret = populate_csum(trans, csum_root, buf, start, len);
12024                 if (ret == -EEXIST)
12025                         ret = 0;
12026                 if (ret < 0)
12027                         goto out;
12028 next:
12029                 /*
12030                  * TODO: if next leaf is corrupted, jump to nearest next valid
12031                  * leaf.
12032                  */
12033                 ret = btrfs_next_item(cur_root, &path);
12034                 if (ret < 0)
12035                         goto out;
12036                 if (ret > 0) {
12037                         ret = 0;
12038                         goto out;
12039                 }
12040         }
12041
12042 out:
12043         btrfs_release_path(&path);
12044         free(buf);
12045         return ret;
12046 }
12047
12048 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12049                                   struct btrfs_root *csum_root)
12050 {
12051         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12052         struct btrfs_path path;
12053         struct btrfs_root *tree_root = fs_info->tree_root;
12054         struct btrfs_root *cur_root;
12055         struct extent_buffer *node;
12056         struct btrfs_key key;
12057         int slot = 0;
12058         int ret = 0;
12059
12060         btrfs_init_path(&path);
12061         key.objectid = BTRFS_FS_TREE_OBJECTID;
12062         key.offset = 0;
12063         key.type = BTRFS_ROOT_ITEM_KEY;
12064         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12065         if (ret < 0)
12066                 goto out;
12067         if (ret > 0) {
12068                 ret = -ENOENT;
12069                 goto out;
12070         }
12071
12072         while (1) {
12073                 node = path.nodes[0];
12074                 slot = path.slots[0];
12075                 btrfs_item_key_to_cpu(node, &key, slot);
12076                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12077                         goto out;
12078                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12079                         goto next;
12080                 if (!is_fstree(key.objectid))
12081                         goto next;
12082                 key.offset = (u64)-1;
12083
12084                 cur_root = btrfs_read_fs_root(fs_info, &key);
12085                 if (IS_ERR(cur_root) || !cur_root) {
12086                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12087                                 key.objectid);
12088                         goto out;
12089                 }
12090                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12091                                 cur_root);
12092                 if (ret < 0)
12093                         goto out;
12094 next:
12095                 ret = btrfs_next_item(tree_root, &path);
12096                 if (ret > 0) {
12097                         ret = 0;
12098                         goto out;
12099                 }
12100                 if (ret < 0)
12101                         goto out;
12102         }
12103
12104 out:
12105         btrfs_release_path(&path);
12106         return ret;
12107 }
12108
12109 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12110                                       struct btrfs_root *csum_root)
12111 {
12112         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12113         struct btrfs_path path;
12114         struct btrfs_extent_item *ei;
12115         struct extent_buffer *leaf;
12116         char *buf;
12117         struct btrfs_key key;
12118         int ret;
12119
12120         btrfs_init_path(&path);
12121         key.objectid = 0;
12122         key.type = BTRFS_EXTENT_ITEM_KEY;
12123         key.offset = 0;
12124         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12125         if (ret < 0) {
12126                 btrfs_release_path(&path);
12127                 return ret;
12128         }
12129
12130         buf = malloc(csum_root->sectorsize);
12131         if (!buf) {
12132                 btrfs_release_path(&path);
12133                 return -ENOMEM;
12134         }
12135
12136         while (1) {
12137                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12138                         ret = btrfs_next_leaf(extent_root, &path);
12139                         if (ret < 0)
12140                                 break;
12141                         if (ret) {
12142                                 ret = 0;
12143                                 break;
12144                         }
12145                 }
12146                 leaf = path.nodes[0];
12147
12148                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12149                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12150                         path.slots[0]++;
12151                         continue;
12152                 }
12153
12154                 ei = btrfs_item_ptr(leaf, path.slots[0],
12155                                     struct btrfs_extent_item);
12156                 if (!(btrfs_extent_flags(leaf, ei) &
12157                       BTRFS_EXTENT_FLAG_DATA)) {
12158                         path.slots[0]++;
12159                         continue;
12160                 }
12161
12162                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12163                                     key.offset);
12164                 if (ret)
12165                         break;
12166                 path.slots[0]++;
12167         }
12168
12169         btrfs_release_path(&path);
12170         free(buf);
12171         return ret;
12172 }
12173
12174 /*
12175  * Recalculate the csum and put it into the csum tree.
12176  *
12177  * Extent tree init will wipe out all the extent info, so in that case, we
12178  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12179  * will use fs/subvol trees to init the csum tree.
12180  */
12181 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12182                           struct btrfs_root *csum_root,
12183                           int search_fs_tree)
12184 {
12185         if (search_fs_tree)
12186                 return fill_csum_tree_from_fs(trans, csum_root);
12187         else
12188                 return fill_csum_tree_from_extent(trans, csum_root);
12189 }
12190
12191 static void free_roots_info_cache(void)
12192 {
12193         if (!roots_info_cache)
12194                 return;
12195
12196         while (!cache_tree_empty(roots_info_cache)) {
12197                 struct cache_extent *entry;
12198                 struct root_item_info *rii;
12199
12200                 entry = first_cache_extent(roots_info_cache);
12201                 if (!entry)
12202                         break;
12203                 remove_cache_extent(roots_info_cache, entry);
12204                 rii = container_of(entry, struct root_item_info, cache_extent);
12205                 free(rii);
12206         }
12207
12208         free(roots_info_cache);
12209         roots_info_cache = NULL;
12210 }
12211
12212 static int build_roots_info_cache(struct btrfs_fs_info *info)
12213 {
12214         int ret = 0;
12215         struct btrfs_key key;
12216         struct extent_buffer *leaf;
12217         struct btrfs_path path;
12218
12219         if (!roots_info_cache) {
12220                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12221                 if (!roots_info_cache)
12222                         return -ENOMEM;
12223                 cache_tree_init(roots_info_cache);
12224         }
12225
12226         btrfs_init_path(&path);
12227         key.objectid = 0;
12228         key.type = BTRFS_EXTENT_ITEM_KEY;
12229         key.offset = 0;
12230         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12231         if (ret < 0)
12232                 goto out;
12233         leaf = path.nodes[0];
12234
12235         while (1) {
12236                 struct btrfs_key found_key;
12237                 struct btrfs_extent_item *ei;
12238                 struct btrfs_extent_inline_ref *iref;
12239                 int slot = path.slots[0];
12240                 int type;
12241                 u64 flags;
12242                 u64 root_id;
12243                 u8 level;
12244                 struct cache_extent *entry;
12245                 struct root_item_info *rii;
12246
12247                 if (slot >= btrfs_header_nritems(leaf)) {
12248                         ret = btrfs_next_leaf(info->extent_root, &path);
12249                         if (ret < 0) {
12250                                 break;
12251                         } else if (ret) {
12252                                 ret = 0;
12253                                 break;
12254                         }
12255                         leaf = path.nodes[0];
12256                         slot = path.slots[0];
12257                 }
12258
12259                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12260
12261                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12262                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12263                         goto next;
12264
12265                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12266                 flags = btrfs_extent_flags(leaf, ei);
12267
12268                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12269                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12270                         goto next;
12271
12272                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12273                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12274                         level = found_key.offset;
12275                 } else {
12276                         struct btrfs_tree_block_info *binfo;
12277
12278                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12279                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12280                         level = btrfs_tree_block_level(leaf, binfo);
12281                 }
12282
12283                 /*
12284                  * For a root extent, it must be of the following type and the
12285                  * first (and only one) iref in the item.
12286                  */
12287                 type = btrfs_extent_inline_ref_type(leaf, iref);
12288                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12289                         goto next;
12290
12291                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12292                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12293                 if (!entry) {
12294                         rii = malloc(sizeof(struct root_item_info));
12295                         if (!rii) {
12296                                 ret = -ENOMEM;
12297                                 goto out;
12298                         }
12299                         rii->cache_extent.start = root_id;
12300                         rii->cache_extent.size = 1;
12301                         rii->level = (u8)-1;
12302                         entry = &rii->cache_extent;
12303                         ret = insert_cache_extent(roots_info_cache, entry);
12304                         ASSERT(ret == 0);
12305                 } else {
12306                         rii = container_of(entry, struct root_item_info,
12307                                            cache_extent);
12308                 }
12309
12310                 ASSERT(rii->cache_extent.start == root_id);
12311                 ASSERT(rii->cache_extent.size == 1);
12312
12313                 if (level > rii->level || rii->level == (u8)-1) {
12314                         rii->level = level;
12315                         rii->bytenr = found_key.objectid;
12316                         rii->gen = btrfs_extent_generation(leaf, ei);
12317                         rii->node_count = 1;
12318                 } else if (level == rii->level) {
12319                         rii->node_count++;
12320                 }
12321 next:
12322                 path.slots[0]++;
12323         }
12324
12325 out:
12326         btrfs_release_path(&path);
12327
12328         return ret;
12329 }
12330
12331 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12332                                   struct btrfs_path *path,
12333                                   const struct btrfs_key *root_key,
12334                                   const int read_only_mode)
12335 {
12336         const u64 root_id = root_key->objectid;
12337         struct cache_extent *entry;
12338         struct root_item_info *rii;
12339         struct btrfs_root_item ri;
12340         unsigned long offset;
12341
12342         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12343         if (!entry) {
12344                 fprintf(stderr,
12345                         "Error: could not find extent items for root %llu\n",
12346                         root_key->objectid);
12347                 return -ENOENT;
12348         }
12349
12350         rii = container_of(entry, struct root_item_info, cache_extent);
12351         ASSERT(rii->cache_extent.start == root_id);
12352         ASSERT(rii->cache_extent.size == 1);
12353
12354         if (rii->node_count != 1) {
12355                 fprintf(stderr,
12356                         "Error: could not find btree root extent for root %llu\n",
12357                         root_id);
12358                 return -ENOENT;
12359         }
12360
12361         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12362         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12363
12364         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12365             btrfs_root_level(&ri) != rii->level ||
12366             btrfs_root_generation(&ri) != rii->gen) {
12367
12368                 /*
12369                  * If we're in repair mode but our caller told us to not update
12370                  * the root item, i.e. just check if it needs to be updated, don't
12371                  * print this message, since the caller will call us again shortly
12372                  * for the same root item without read only mode (the caller will
12373                  * open a transaction first).
12374                  */
12375                 if (!(read_only_mode && repair))
12376                         fprintf(stderr,
12377                                 "%sroot item for root %llu,"
12378                                 " current bytenr %llu, current gen %llu, current level %u,"
12379                                 " new bytenr %llu, new gen %llu, new level %u\n",
12380                                 (read_only_mode ? "" : "fixing "),
12381                                 root_id,
12382                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12383                                 btrfs_root_level(&ri),
12384                                 rii->bytenr, rii->gen, rii->level);
12385
12386                 if (btrfs_root_generation(&ri) > rii->gen) {
12387                         fprintf(stderr,
12388                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12389                                 root_id, btrfs_root_generation(&ri), rii->gen);
12390                         return -EINVAL;
12391                 }
12392
12393                 if (!read_only_mode) {
12394                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12395                         btrfs_set_root_level(&ri, rii->level);
12396                         btrfs_set_root_generation(&ri, rii->gen);
12397                         write_extent_buffer(path->nodes[0], &ri,
12398                                             offset, sizeof(ri));
12399                 }
12400
12401                 return 1;
12402         }
12403
12404         return 0;
12405 }
12406
12407 /*
12408  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12409  * caused read-only snapshots to be corrupted if they were created at a moment
12410  * when the source subvolume/snapshot had orphan items. The issue was that the
12411  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12412  * node instead of the post orphan cleanup root node.
12413  * So this function, and its callees, just detects and fixes those cases. Even
12414  * though the regression was for read-only snapshots, this function applies to
12415  * any snapshot/subvolume root.
12416  * This must be run before any other repair code - not doing it so, makes other
12417  * repair code delete or modify backrefs in the extent tree for example, which
12418  * will result in an inconsistent fs after repairing the root items.
12419  */
12420 static int repair_root_items(struct btrfs_fs_info *info)
12421 {
12422         struct btrfs_path path;
12423         struct btrfs_key key;
12424         struct extent_buffer *leaf;
12425         struct btrfs_trans_handle *trans = NULL;
12426         int ret = 0;
12427         int bad_roots = 0;
12428         int need_trans = 0;
12429
12430         btrfs_init_path(&path);
12431
12432         ret = build_roots_info_cache(info);
12433         if (ret)
12434                 goto out;
12435
12436         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12437         key.type = BTRFS_ROOT_ITEM_KEY;
12438         key.offset = 0;
12439
12440 again:
12441         /*
12442          * Avoid opening and committing transactions if a leaf doesn't have
12443          * any root items that need to be fixed, so that we avoid rotating
12444          * backup roots unnecessarily.
12445          */
12446         if (need_trans) {
12447                 trans = btrfs_start_transaction(info->tree_root, 1);
12448                 if (IS_ERR(trans)) {
12449                         ret = PTR_ERR(trans);
12450                         goto out;
12451                 }
12452         }
12453
12454         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12455                                 0, trans ? 1 : 0);
12456         if (ret < 0)
12457                 goto out;
12458         leaf = path.nodes[0];
12459
12460         while (1) {
12461                 struct btrfs_key found_key;
12462
12463                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12464                         int no_more_keys = find_next_key(&path, &key);
12465
12466                         btrfs_release_path(&path);
12467                         if (trans) {
12468                                 ret = btrfs_commit_transaction(trans,
12469                                                                info->tree_root);
12470                                 trans = NULL;
12471                                 if (ret < 0)
12472                                         goto out;
12473                         }
12474                         need_trans = 0;
12475                         if (no_more_keys)
12476                                 break;
12477                         goto again;
12478                 }
12479
12480                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12481
12482                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12483                         goto next;
12484                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12485                         goto next;
12486
12487                 ret = maybe_repair_root_item(info, &path, &found_key,
12488                                              trans ? 0 : 1);
12489                 if (ret < 0)
12490                         goto out;
12491                 if (ret) {
12492                         if (!trans && repair) {
12493                                 need_trans = 1;
12494                                 key = found_key;
12495                                 btrfs_release_path(&path);
12496                                 goto again;
12497                         }
12498                         bad_roots++;
12499                 }
12500 next:
12501                 path.slots[0]++;
12502         }
12503         ret = 0;
12504 out:
12505         free_roots_info_cache();
12506         btrfs_release_path(&path);
12507         if (trans)
12508                 btrfs_commit_transaction(trans, info->tree_root);
12509         if (ret < 0)
12510                 return ret;
12511
12512         return bad_roots;
12513 }
12514
12515 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12516 {
12517         struct btrfs_trans_handle *trans;
12518         struct btrfs_block_group_cache *bg_cache;
12519         u64 current = 0;
12520         int ret = 0;
12521
12522         /* Clear all free space cache inodes and its extent data */
12523         while (1) {
12524                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12525                 if (!bg_cache)
12526                         break;
12527                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12528                 if (ret < 0)
12529                         return ret;
12530                 current = bg_cache->key.objectid + bg_cache->key.offset;
12531         }
12532
12533         /* Don't forget to set cache_generation to -1 */
12534         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12535         if (IS_ERR(trans)) {
12536                 error("failed to update super block cache generation");
12537                 return PTR_ERR(trans);
12538         }
12539         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12540         btrfs_commit_transaction(trans, fs_info->tree_root);
12541
12542         return ret;
12543 }
12544
12545 const char * const cmd_check_usage[] = {
12546         "btrfs check [options] <device>",
12547         "Check structural integrity of a filesystem (unmounted).",
12548         "Check structural integrity of an unmounted filesystem. Verify internal",
12549         "trees' consistency and item connectivity. In the repair mode try to",
12550         "fix the problems found. ",
12551         "WARNING: the repair mode is considered dangerous",
12552         "",
12553         "-s|--super <superblock>     use this superblock copy",
12554         "-b|--backup                 use the first valid backup root copy",
12555         "--repair                    try to repair the filesystem",
12556         "--readonly                  run in read-only mode (default)",
12557         "--init-csum-tree            create a new CRC tree",
12558         "--init-extent-tree          create a new extent tree",
12559         "--mode <MODE>               allows choice of memory/IO trade-offs",
12560         "                            where MODE is one of:",
12561         "                            original - read inodes and extents to memory (requires",
12562         "                                       more memory, does less IO)",
12563         "                            lowmem   - try to use less memory but read blocks again",
12564         "                                       when needed",
12565         "--check-data-csum           verify checksums of data blocks",
12566         "-Q|--qgroup-report          print a report on qgroup consistency",
12567         "-E|--subvol-extents <subvolid>",
12568         "                            print subvolume extents and sharing state",
12569         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12570         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12571         "-p|--progress               indicate progress",
12572         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12573         NULL
12574 };
12575
12576 int cmd_check(int argc, char **argv)
12577 {
12578         struct cache_tree root_cache;
12579         struct btrfs_root *root;
12580         struct btrfs_fs_info *info;
12581         u64 bytenr = 0;
12582         u64 subvolid = 0;
12583         u64 tree_root_bytenr = 0;
12584         u64 chunk_root_bytenr = 0;
12585         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12586         int ret;
12587         int err = 0;
12588         u64 num;
12589         int init_csum_tree = 0;
12590         int readonly = 0;
12591         int clear_space_cache = 0;
12592         int qgroup_report = 0;
12593         int qgroups_repaired = 0;
12594         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12595
12596         while(1) {
12597                 int c;
12598                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12599                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12600                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12601                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12602                 static const struct option long_options[] = {
12603                         { "super", required_argument, NULL, 's' },
12604                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12605                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12606                         { "init-csum-tree", no_argument, NULL,
12607                                 GETOPT_VAL_INIT_CSUM },
12608                         { "init-extent-tree", no_argument, NULL,
12609                                 GETOPT_VAL_INIT_EXTENT },
12610                         { "check-data-csum", no_argument, NULL,
12611                                 GETOPT_VAL_CHECK_CSUM },
12612                         { "backup", no_argument, NULL, 'b' },
12613                         { "subvol-extents", required_argument, NULL, 'E' },
12614                         { "qgroup-report", no_argument, NULL, 'Q' },
12615                         { "tree-root", required_argument, NULL, 'r' },
12616                         { "chunk-root", required_argument, NULL,
12617                                 GETOPT_VAL_CHUNK_TREE },
12618                         { "progress", no_argument, NULL, 'p' },
12619                         { "mode", required_argument, NULL,
12620                                 GETOPT_VAL_MODE },
12621                         { "clear-space-cache", required_argument, NULL,
12622                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12623                         { NULL, 0, NULL, 0}
12624                 };
12625
12626                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12627                 if (c < 0)
12628                         break;
12629                 switch(c) {
12630                         case 'a': /* ignored */ break;
12631                         case 'b':
12632                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12633                                 break;
12634                         case 's':
12635                                 num = arg_strtou64(optarg);
12636                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12637                                         error(
12638                                         "super mirror should be less than %d",
12639                                                 BTRFS_SUPER_MIRROR_MAX);
12640                                         exit(1);
12641                                 }
12642                                 bytenr = btrfs_sb_offset(((int)num));
12643                                 printf("using SB copy %llu, bytenr %llu\n", num,
12644                                        (unsigned long long)bytenr);
12645                                 break;
12646                         case 'Q':
12647                                 qgroup_report = 1;
12648                                 break;
12649                         case 'E':
12650                                 subvolid = arg_strtou64(optarg);
12651                                 break;
12652                         case 'r':
12653                                 tree_root_bytenr = arg_strtou64(optarg);
12654                                 break;
12655                         case GETOPT_VAL_CHUNK_TREE:
12656                                 chunk_root_bytenr = arg_strtou64(optarg);
12657                                 break;
12658                         case 'p':
12659                                 ctx.progress_enabled = true;
12660                                 break;
12661                         case '?':
12662                         case 'h':
12663                                 usage(cmd_check_usage);
12664                         case GETOPT_VAL_REPAIR:
12665                                 printf("enabling repair mode\n");
12666                                 repair = 1;
12667                                 ctree_flags |= OPEN_CTREE_WRITES;
12668                                 break;
12669                         case GETOPT_VAL_READONLY:
12670                                 readonly = 1;
12671                                 break;
12672                         case GETOPT_VAL_INIT_CSUM:
12673                                 printf("Creating a new CRC tree\n");
12674                                 init_csum_tree = 1;
12675                                 repair = 1;
12676                                 ctree_flags |= OPEN_CTREE_WRITES;
12677                                 break;
12678                         case GETOPT_VAL_INIT_EXTENT:
12679                                 init_extent_tree = 1;
12680                                 ctree_flags |= (OPEN_CTREE_WRITES |
12681                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12682                                 repair = 1;
12683                                 break;
12684                         case GETOPT_VAL_CHECK_CSUM:
12685                                 check_data_csum = 1;
12686                                 break;
12687                         case GETOPT_VAL_MODE:
12688                                 check_mode = parse_check_mode(optarg);
12689                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12690                                         error("unknown mode: %s", optarg);
12691                                         exit(1);
12692                                 }
12693                                 break;
12694                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12695                                 if (strcmp(optarg, "v1") == 0) {
12696                                         clear_space_cache = 1;
12697                                 } else if (strcmp(optarg, "v2") == 0) {
12698                                         clear_space_cache = 2;
12699                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12700                                 } else {
12701                                         error(
12702                 "invalid argument to --clear-space-cache, must be v1 or v2");
12703                                         exit(1);
12704                                 }
12705                                 ctree_flags |= OPEN_CTREE_WRITES;
12706                                 break;
12707                 }
12708         }
12709
12710         if (check_argc_exact(argc - optind, 1))
12711                 usage(cmd_check_usage);
12712
12713         if (ctx.progress_enabled) {
12714                 ctx.tp = TASK_NOTHING;
12715                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12716         }
12717
12718         /* This check is the only reason for --readonly to exist */
12719         if (readonly && repair) {
12720                 error("repair options are not compatible with --readonly");
12721                 exit(1);
12722         }
12723
12724         /*
12725          * Not supported yet
12726          */
12727         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12728                 error("low memory mode doesn't support repair yet");
12729                 exit(1);
12730         }
12731
12732         radix_tree_init();
12733         cache_tree_init(&root_cache);
12734
12735         if((ret = check_mounted(argv[optind])) < 0) {
12736                 error("could not check mount status: %s", strerror(-ret));
12737                 err |= !!ret;
12738                 goto err_out;
12739         } else if(ret) {
12740                 error("%s is currently mounted, aborting", argv[optind]);
12741                 ret = -EBUSY;
12742                 err |= !!ret;
12743                 goto err_out;
12744         }
12745
12746         /* only allow partial opening under repair mode */
12747         if (repair)
12748                 ctree_flags |= OPEN_CTREE_PARTIAL;
12749
12750         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12751                                   chunk_root_bytenr, ctree_flags);
12752         if (!info) {
12753                 error("cannot open file system");
12754                 ret = -EIO;
12755                 err |= !!ret;
12756                 goto err_out;
12757         }
12758
12759         global_info = info;
12760         root = info->fs_root;
12761         if (clear_space_cache == 1) {
12762                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12763                         error(
12764                 "free space cache v2 detected, use --clear-space-cache v2");
12765                         ret = 1;
12766                         goto close_out;
12767                 }
12768                 printf("Clearing free space cache\n");
12769                 ret = clear_free_space_cache(info);
12770                 if (ret) {
12771                         error("failed to clear free space cache");
12772                         ret = 1;
12773                 } else {
12774                         printf("Free space cache cleared\n");
12775                 }
12776                 goto close_out;
12777         } else if (clear_space_cache == 2) {
12778                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12779                         printf("no free space cache v2 to clear\n");
12780                         ret = 0;
12781                         goto close_out;
12782                 }
12783                 printf("Clear free space cache v2\n");
12784                 ret = btrfs_clear_free_space_tree(info);
12785                 if (ret) {
12786                         error("failed to clear free space cache v2: %d", ret);
12787                         ret = 1;
12788                 } else {
12789                         printf("free space cache v2 cleared\n");
12790                 }
12791                 goto close_out;
12792         }
12793
12794         /*
12795          * repair mode will force us to commit transaction which
12796          * will make us fail to load log tree when mounting.
12797          */
12798         if (repair && btrfs_super_log_root(info->super_copy)) {
12799                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12800                 if (!ret) {
12801                         ret = 1;
12802                         err |= !!ret;
12803                         goto close_out;
12804                 }
12805                 ret = zero_log_tree(root);
12806                 err |= !!ret;
12807                 if (ret) {
12808                         error("failed to zero log tree: %d", ret);
12809                         goto close_out;
12810                 }
12811         }
12812
12813         uuid_unparse(info->super_copy->fsid, uuidbuf);
12814         if (qgroup_report) {
12815                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12816                        uuidbuf);
12817                 ret = qgroup_verify_all(info);
12818                 err |= !!ret;
12819                 if (ret == 0)
12820                         report_qgroups(1);
12821                 goto close_out;
12822         }
12823         if (subvolid) {
12824                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12825                        subvolid, argv[optind], uuidbuf);
12826                 ret = print_extent_state(info, subvolid);
12827                 err |= !!ret;
12828                 goto close_out;
12829         }
12830         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12831
12832         if (!extent_buffer_uptodate(info->tree_root->node) ||
12833             !extent_buffer_uptodate(info->dev_root->node) ||
12834             !extent_buffer_uptodate(info->chunk_root->node)) {
12835                 error("critical roots corrupted, unable to check the filesystem");
12836                 err |= !!ret;
12837                 ret = -EIO;
12838                 goto close_out;
12839         }
12840
12841         if (init_extent_tree || init_csum_tree) {
12842                 struct btrfs_trans_handle *trans;
12843
12844                 trans = btrfs_start_transaction(info->extent_root, 0);
12845                 if (IS_ERR(trans)) {
12846                         error("error starting transaction");
12847                         ret = PTR_ERR(trans);
12848                         err |= !!ret;
12849                         goto close_out;
12850                 }
12851
12852                 if (init_extent_tree) {
12853                         printf("Creating a new extent tree\n");
12854                         ret = reinit_extent_tree(trans, info);
12855                         err |= !!ret;
12856                         if (ret)
12857                                 goto close_out;
12858                 }
12859
12860                 if (init_csum_tree) {
12861                         printf("Reinitialize checksum tree\n");
12862                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12863                         if (ret) {
12864                                 error("checksum tree initialization failed: %d",
12865                                                 ret);
12866                                 ret = -EIO;
12867                                 err |= !!ret;
12868                                 goto close_out;
12869                         }
12870
12871                         ret = fill_csum_tree(trans, info->csum_root,
12872                                              init_extent_tree);
12873                         err |= !!ret;
12874                         if (ret) {
12875                                 error("checksum tree refilling failed: %d", ret);
12876                                 return -EIO;
12877                         }
12878                 }
12879                 /*
12880                  * Ok now we commit and run the normal fsck, which will add
12881                  * extent entries for all of the items it finds.
12882                  */
12883                 ret = btrfs_commit_transaction(trans, info->extent_root);
12884                 err |= !!ret;
12885                 if (ret)
12886                         goto close_out;
12887         }
12888         if (!extent_buffer_uptodate(info->extent_root->node)) {
12889                 error("critical: extent_root, unable to check the filesystem");
12890                 ret = -EIO;
12891                 err |= !!ret;
12892                 goto close_out;
12893         }
12894         if (!extent_buffer_uptodate(info->csum_root->node)) {
12895                 error("critical: csum_root, unable to check the filesystem");
12896                 ret = -EIO;
12897                 err |= !!ret;
12898                 goto close_out;
12899         }
12900
12901         if (!ctx.progress_enabled)
12902                 fprintf(stderr, "checking extents\n");
12903         if (check_mode == CHECK_MODE_LOWMEM)
12904                 ret = check_chunks_and_extents_v2(root);
12905         else
12906                 ret = check_chunks_and_extents(root);
12907         err |= !!ret;
12908         if (ret)
12909                 error(
12910                 "errors found in extent allocation tree or chunk allocation");
12911
12912         ret = repair_root_items(info);
12913         err |= !!ret;
12914         if (ret < 0)
12915                 goto close_out;
12916         if (repair) {
12917                 fprintf(stderr, "Fixed %d roots.\n", ret);
12918                 ret = 0;
12919         } else if (ret > 0) {
12920                 fprintf(stderr,
12921                        "Found %d roots with an outdated root item.\n",
12922                        ret);
12923                 fprintf(stderr,
12924                         "Please run a filesystem check with the option --repair to fix them.\n");
12925                 ret = 1;
12926                 err |= !!ret;
12927                 goto close_out;
12928         }
12929
12930         if (!ctx.progress_enabled) {
12931                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12932                         fprintf(stderr, "checking free space tree\n");
12933                 else
12934                         fprintf(stderr, "checking free space cache\n");
12935         }
12936         ret = check_space_cache(root);
12937         err |= !!ret;
12938         if (ret)
12939                 goto out;
12940
12941         /*
12942          * We used to have to have these hole extents in between our real
12943          * extents so if we don't have this flag set we need to make sure there
12944          * are no gaps in the file extents for inodes, otherwise we can just
12945          * ignore it when this happens.
12946          */
12947         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12948         if (!ctx.progress_enabled)
12949                 fprintf(stderr, "checking fs roots\n");
12950         if (check_mode == CHECK_MODE_LOWMEM)
12951                 ret = check_fs_roots_v2(root->fs_info);
12952         else
12953                 ret = check_fs_roots(root, &root_cache);
12954         err |= !!ret;
12955         if (ret)
12956                 goto out;
12957
12958         fprintf(stderr, "checking csums\n");
12959         ret = check_csums(root);
12960         err |= !!ret;
12961         if (ret)
12962                 goto out;
12963
12964         fprintf(stderr, "checking root refs\n");
12965         /* For low memory mode, check_fs_roots_v2 handles root refs */
12966         if (check_mode != CHECK_MODE_LOWMEM) {
12967                 ret = check_root_refs(root, &root_cache);
12968                 err |= !!ret;
12969                 if (ret)
12970                         goto out;
12971         }
12972
12973         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12974                 struct extent_buffer *eb;
12975
12976                 eb = list_first_entry(&root->fs_info->recow_ebs,
12977                                       struct extent_buffer, recow);
12978                 list_del_init(&eb->recow);
12979                 ret = recow_extent_buffer(root, eb);
12980                 err |= !!ret;
12981                 if (ret)
12982                         break;
12983         }
12984
12985         while (!list_empty(&delete_items)) {
12986                 struct bad_item *bad;
12987
12988                 bad = list_first_entry(&delete_items, struct bad_item, list);
12989                 list_del_init(&bad->list);
12990                 if (repair) {
12991                         ret = delete_bad_item(root, bad);
12992                         err |= !!ret;
12993                 }
12994                 free(bad);
12995         }
12996
12997         if (info->quota_enabled) {
12998                 fprintf(stderr, "checking quota groups\n");
12999                 ret = qgroup_verify_all(info);
13000                 err |= !!ret;
13001                 if (ret)
13002                         goto out;
13003                 report_qgroups(0);
13004                 ret = repair_qgroups(info, &qgroups_repaired);
13005                 err |= !!ret;
13006                 if (err)
13007                         goto out;
13008                 ret = 0;
13009         }
13010
13011         if (!list_empty(&root->fs_info->recow_ebs)) {
13012                 error("transid errors in file system");
13013                 ret = 1;
13014                 err |= !!ret;
13015         }
13016 out:
13017         if (found_old_backref) { /*
13018                  * there was a disk format change when mixed
13019                  * backref was in testing tree. The old format
13020                  * existed about one week.
13021                  */
13022                 printf("\n * Found old mixed backref format. "
13023                        "The old format is not supported! *"
13024                        "\n * Please mount the FS in readonly mode, "
13025                        "backup data and re-format the FS. *\n\n");
13026                 err |= 1;
13027         }
13028         printf("found %llu bytes used err is %d\n",
13029                (unsigned long long)bytes_used, ret);
13030         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13031         printf("total tree bytes: %llu\n",
13032                (unsigned long long)total_btree_bytes);
13033         printf("total fs tree bytes: %llu\n",
13034                (unsigned long long)total_fs_tree_bytes);
13035         printf("total extent tree bytes: %llu\n",
13036                (unsigned long long)total_extent_tree_bytes);
13037         printf("btree space waste bytes: %llu\n",
13038                (unsigned long long)btree_space_waste);
13039         printf("file data blocks allocated: %llu\n referenced %llu\n",
13040                 (unsigned long long)data_bytes_allocated,
13041                 (unsigned long long)data_bytes_referenced);
13042
13043         free_qgroup_counts();
13044         free_root_recs_tree(&root_cache);
13045 close_out:
13046         close_ctree(root);
13047 err_out:
13048         if (ctx.progress_enabled)
13049                 task_deinit(ctx.info);
13050
13051         return err;
13052 }