btrfs-progs: check: use on-stack path buffer in check_fs_root_v2
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         int level_size;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize, root->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct btrfs_root *root,
1481                             struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(root, eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct btrfs_root *root,
1987                             struct extent_buffer *parent, int slot,
1988                             struct extent_buffer *child)
1989 {
1990         struct btrfs_key parent_key;
1991         struct btrfs_key child_key;
1992         int ret = 0;
1993
1994         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1995         if (btrfs_header_level(child) == 0)
1996                 btrfs_item_key_to_cpu(child, &child_key, 0);
1997         else
1998                 btrfs_node_key_to_cpu(child, &child_key, 0);
1999
2000         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2001                 ret = -EINVAL;
2002                 fprintf(stderr,
2003                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2004                         parent_key.objectid, parent_key.type, parent_key.offset,
2005                         child_key.objectid, child_key.type, child_key.offset);
2006         }
2007         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008                 ret = -EINVAL;
2009                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2010                         btrfs_node_blockptr(parent, slot),
2011                         btrfs_header_bytenr(child));
2012         }
2013         if (btrfs_node_ptr_generation(parent, slot) !=
2014             btrfs_header_generation(child)) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2017                         btrfs_header_generation(child),
2018                         btrfs_node_ptr_generation(parent, slot));
2019         }
2020         return ret;
2021 }
2022
2023 /*
2024  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2025  * in every fs or file tree check. Here we find its all root ids, and only check
2026  * it in the fs or file tree which has the smallest root id.
2027  */
2028 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 {
2030         struct rb_node *node;
2031         struct ulist_node *u;
2032
2033         if (roots->nnodes == 1)
2034                 return 1;
2035
2036         node = rb_first(&roots->root);
2037         u = rb_entry(node, struct ulist_node, rb_node);
2038         /*
2039          * current root id is not smallest, we skip it and let it be checked
2040          * in the fs or file tree who hash the smallest root id.
2041          */
2042         if (root->objectid != u->val)
2043                 return 0;
2044
2045         return 1;
2046 }
2047
2048 /*
2049  * for a tree node or leaf, we record its reference count, so later if we still
2050  * process this node or leaf, don't need to compute its reference count again.
2051  */
2052 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2053                              struct node_refs *nrefs, u64 level)
2054 {
2055         int check, ret;
2056         u64 refs;
2057         struct ulist *roots;
2058
2059         if (nrefs->bytenr[level] != bytenr) {
2060                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2061                                        level, 1, &refs, NULL);
2062                 if (ret < 0)
2063                         return ret;
2064
2065                 nrefs->bytenr[level] = bytenr;
2066                 nrefs->refs[level] = refs;
2067                 if (refs > 1) {
2068                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2069                                                    0, &roots);
2070                         if (ret)
2071                                 return -EIO;
2072
2073                         check = need_check(root, roots);
2074                         ulist_free(roots);
2075                         nrefs->need_check[level] = check;
2076                 } else {
2077                         nrefs->need_check[level] = 1;
2078                 }
2079         }
2080
2081         return 0;
2082 }
2083
2084 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2085                           struct walk_control *wc, int *level,
2086                           struct node_refs *nrefs)
2087 {
2088         enum btrfs_tree_block_status status;
2089         u64 bytenr;
2090         u64 ptr_gen;
2091         struct extent_buffer *next;
2092         struct extent_buffer *cur;
2093         u32 blocksize;
2094         int ret, err = 0;
2095         u64 refs;
2096
2097         WARN_ON(*level < 0);
2098         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099
2100         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2101                 refs = nrefs->refs[*level];
2102                 ret = 0;
2103         } else {
2104                 ret = btrfs_lookup_extent_info(NULL, root,
2105                                        path->nodes[*level]->start,
2106                                        *level, 1, &refs, NULL);
2107                 if (ret < 0) {
2108                         err = ret;
2109                         goto out;
2110                 }
2111                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2112                 nrefs->refs[*level] = refs;
2113         }
2114
2115         if (refs > 1) {
2116                 ret = enter_shared_node(root, path->nodes[*level]->start,
2117                                         refs, wc, *level);
2118                 if (ret > 0) {
2119                         err = ret;
2120                         goto out;
2121                 }
2122         }
2123
2124         while (*level >= 0) {
2125                 WARN_ON(*level < 0);
2126                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2127                 cur = path->nodes[*level];
2128
2129                 if (btrfs_header_level(cur) != *level)
2130                         WARN_ON(1);
2131
2132                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2133                         break;
2134                 if (*level == 0) {
2135                         ret = process_one_leaf(root, cur, wc);
2136                         if (ret < 0)
2137                                 err = ret;
2138                         break;
2139                 }
2140                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2141                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2142                 blocksize = root->nodesize;
2143
2144                 if (bytenr == nrefs->bytenr[*level - 1]) {
2145                         refs = nrefs->refs[*level - 1];
2146                 } else {
2147                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2148                                         *level - 1, 1, &refs, NULL);
2149                         if (ret < 0) {
2150                                 refs = 0;
2151                         } else {
2152                                 nrefs->bytenr[*level - 1] = bytenr;
2153                                 nrefs->refs[*level - 1] = refs;
2154                         }
2155                 }
2156
2157                 if (refs > 1) {
2158                         ret = enter_shared_node(root, bytenr, refs,
2159                                                 wc, *level - 1);
2160                         if (ret > 0) {
2161                                 path->slots[*level]++;
2162                                 continue;
2163                         }
2164                 }
2165
2166                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2167                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2168                         free_extent_buffer(next);
2169                         reada_walk_down(root, cur, path->slots[*level]);
2170                         next = read_tree_block(root, bytenr, blocksize,
2171                                                ptr_gen);
2172                         if (!extent_buffer_uptodate(next)) {
2173                                 struct btrfs_key node_key;
2174
2175                                 btrfs_node_key_to_cpu(path->nodes[*level],
2176                                                       &node_key,
2177                                                       path->slots[*level]);
2178                                 btrfs_add_corrupt_extent_record(root->fs_info,
2179                                                 &node_key,
2180                                                 path->nodes[*level]->start,
2181                                                 root->nodesize, *level);
2182                                 err = -EIO;
2183                                 goto out;
2184                         }
2185                 }
2186
2187                 ret = check_child_node(root, cur, path->slots[*level], next);
2188                 if (ret) {
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2217                              int *level, struct node_refs *nrefs, int ext_ref)
2218 {
2219         enum btrfs_tree_block_status status;
2220         u64 bytenr;
2221         u64 ptr_gen;
2222         struct extent_buffer *next;
2223         struct extent_buffer *cur;
2224         u32 blocksize;
2225         int ret;
2226
2227         WARN_ON(*level < 0);
2228         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229
2230         ret = update_nodes_refs(root, path->nodes[*level]->start,
2231                                 nrefs, *level);
2232         if (ret < 0)
2233                 return ret;
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 /* Don't forgot to check leaf/node validation */
2246                 if (*level == 0) {
2247                         ret = btrfs_check_leaf(root, NULL, cur);
2248                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2249                                 ret = -EIO;
2250                                 break;
2251                         }
2252                         ret = process_one_leaf_v2(root, path, nrefs,
2253                                                   level, ext_ref);
2254                         break;
2255                 } else {
2256                         ret = btrfs_check_node(root, NULL, cur);
2257                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2258                                 ret = -EIO;
2259                                 break;
2260                         }
2261                 }
2262                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2263                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2264                 blocksize = root->nodesize;
2265
2266                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2267                 if (ret)
2268                         break;
2269                 if (!nrefs->need_check[*level - 1]) {
2270                         path->slots[*level]++;
2271                         continue;
2272                 }
2273
2274                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2275                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2276                         free_extent_buffer(next);
2277                         reada_walk_down(root, cur, path->slots[*level]);
2278                         next = read_tree_block(root, bytenr, blocksize,
2279                                                ptr_gen);
2280                         if (!extent_buffer_uptodate(next)) {
2281                                 struct btrfs_key node_key;
2282
2283                                 btrfs_node_key_to_cpu(path->nodes[*level],
2284                                                       &node_key,
2285                                                       path->slots[*level]);
2286                                 btrfs_add_corrupt_extent_record(root->fs_info,
2287                                                 &node_key,
2288                                                 path->nodes[*level]->start,
2289                                                 root->nodesize, *level);
2290                                 ret = -EIO;
2291                                 break;
2292                         }
2293                 }
2294
2295                 ret = check_child_node(root, cur, path->slots[*level], next);
2296                 if (ret < 0) 
2297                         break;
2298
2299                 if (btrfs_is_leaf(next))
2300                         status = btrfs_check_leaf(root, NULL, next);
2301                 else
2302                         status = btrfs_check_node(root, NULL, next);
2303                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2304                         free_extent_buffer(next);
2305                         ret = -EIO;
2306                         break;
2307                 }
2308
2309                 *level = *level - 1;
2310                 free_extent_buffer(path->nodes[*level]);
2311                 path->nodes[*level] = next;
2312                 path->slots[*level] = 0;
2313         }
2314         return ret;
2315 }
2316
2317 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2318                         struct walk_control *wc, int *level)
2319 {
2320         int i;
2321         struct extent_buffer *leaf;
2322
2323         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2324                 leaf = path->nodes[i];
2325                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2326                         path->slots[i]++;
2327                         *level = i;
2328                         return 0;
2329                 } else {
2330                         free_extent_buffer(path->nodes[*level]);
2331                         path->nodes[*level] = NULL;
2332                         BUG_ON(*level > wc->active_node);
2333                         if (*level == wc->active_node)
2334                                 leave_shared_node(root, wc, *level);
2335                         *level = i + 1;
2336                 }
2337         }
2338         return 1;
2339 }
2340
2341 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2342                            int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         *level = i + 1;
2357                 }
2358         }
2359         return 1;
2360 }
2361
2362 static int check_root_dir(struct inode_record *rec)
2363 {
2364         struct inode_backref *backref;
2365         int ret = -1;
2366
2367         if (!rec->found_inode_item || rec->errors)
2368                 goto out;
2369         if (rec->nlink != 1 || rec->found_link != 0)
2370                 goto out;
2371         if (list_empty(&rec->backrefs))
2372                 goto out;
2373         backref = to_inode_backref(rec->backrefs.next);
2374         if (!backref->found_inode_ref)
2375                 goto out;
2376         if (backref->index != 0 || backref->namelen != 2 ||
2377             memcmp(backref->name, "..", 2))
2378                 goto out;
2379         if (backref->found_dir_index || backref->found_dir_item)
2380                 goto out;
2381         ret = 0;
2382 out:
2383         return ret;
2384 }
2385
2386 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2387                               struct btrfs_root *root, struct btrfs_path *path,
2388                               struct inode_record *rec)
2389 {
2390         struct btrfs_inode_item *ei;
2391         struct btrfs_key key;
2392         int ret;
2393
2394         key.objectid = rec->ino;
2395         key.type = BTRFS_INODE_ITEM_KEY;
2396         key.offset = (u64)-1;
2397
2398         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2399         if (ret < 0)
2400                 goto out;
2401         if (ret) {
2402                 if (!path->slots[0]) {
2403                         ret = -ENOENT;
2404                         goto out;
2405                 }
2406                 path->slots[0]--;
2407                 ret = 0;
2408         }
2409         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2410         if (key.objectid != rec->ino) {
2411                 ret = -ENOENT;
2412                 goto out;
2413         }
2414
2415         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2416                             struct btrfs_inode_item);
2417         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2418         btrfs_mark_buffer_dirty(path->nodes[0]);
2419         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2420         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2421                root->root_key.objectid);
2422 out:
2423         btrfs_release_path(path);
2424         return ret;
2425 }
2426
2427 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2428                                     struct btrfs_root *root,
2429                                     struct btrfs_path *path,
2430                                     struct inode_record *rec)
2431 {
2432         int ret;
2433
2434         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2435         btrfs_release_path(path);
2436         if (!ret)
2437                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2438         return ret;
2439 }
2440
2441 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2442                                struct btrfs_root *root,
2443                                struct btrfs_path *path,
2444                                struct inode_record *rec)
2445 {
2446         struct btrfs_inode_item *ei;
2447         struct btrfs_key key;
2448         int ret = 0;
2449
2450         key.objectid = rec->ino;
2451         key.type = BTRFS_INODE_ITEM_KEY;
2452         key.offset = 0;
2453
2454         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2455         if (ret) {
2456                 if (ret > 0)
2457                         ret = -ENOENT;
2458                 goto out;
2459         }
2460
2461         /* Since ret == 0, no need to check anything */
2462         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463                             struct btrfs_inode_item);
2464         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2465         btrfs_mark_buffer_dirty(path->nodes[0]);
2466         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2467         printf("reset nbytes for ino %llu root %llu\n",
2468                rec->ino, root->root_key.objectid);
2469 out:
2470         btrfs_release_path(path);
2471         return ret;
2472 }
2473
2474 static int add_missing_dir_index(struct btrfs_root *root,
2475                                  struct cache_tree *inode_cache,
2476                                  struct inode_record *rec,
2477                                  struct inode_backref *backref)
2478 {
2479         struct btrfs_path path;
2480         struct btrfs_trans_handle *trans;
2481         struct btrfs_dir_item *dir_item;
2482         struct extent_buffer *leaf;
2483         struct btrfs_key key;
2484         struct btrfs_disk_key disk_key;
2485         struct inode_record *dir_rec;
2486         unsigned long name_ptr;
2487         u32 data_size = sizeof(*dir_item) + backref->namelen;
2488         int ret;
2489
2490         trans = btrfs_start_transaction(root, 1);
2491         if (IS_ERR(trans))
2492                 return PTR_ERR(trans);
2493
2494         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2495                 (unsigned long long)rec->ino);
2496
2497         btrfs_init_path(&path);
2498         key.objectid = backref->dir;
2499         key.type = BTRFS_DIR_INDEX_KEY;
2500         key.offset = backref->index;
2501         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2502         BUG_ON(ret);
2503
2504         leaf = path.nodes[0];
2505         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506
2507         disk_key.objectid = cpu_to_le64(rec->ino);
2508         disk_key.type = BTRFS_INODE_ITEM_KEY;
2509         disk_key.offset = 0;
2510
2511         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2512         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2513         btrfs_set_dir_data_len(leaf, dir_item, 0);
2514         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2515         name_ptr = (unsigned long)(dir_item + 1);
2516         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2517         btrfs_mark_buffer_dirty(leaf);
2518         btrfs_release_path(&path);
2519         btrfs_commit_transaction(trans, root);
2520
2521         backref->found_dir_index = 1;
2522         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2523         BUG_ON(IS_ERR(dir_rec));
2524         if (!dir_rec)
2525                 return 0;
2526         dir_rec->found_size += backref->namelen;
2527         if (dir_rec->found_size == dir_rec->isize &&
2528             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2529                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2530         if (dir_rec->found_size != dir_rec->isize)
2531                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532
2533         return 0;
2534 }
2535
2536 static int delete_dir_index(struct btrfs_root *root,
2537                             struct cache_tree *inode_cache,
2538                             struct inode_record *rec,
2539                             struct inode_backref *backref)
2540 {
2541         struct btrfs_trans_handle *trans;
2542         struct btrfs_dir_item *di;
2543         struct btrfs_path path;
2544         int ret = 0;
2545
2546         trans = btrfs_start_transaction(root, 1);
2547         if (IS_ERR(trans))
2548                 return PTR_ERR(trans);
2549
2550         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2551                 (unsigned long long)backref->dir,
2552                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2553                 (unsigned long long)root->objectid);
2554
2555         btrfs_init_path(&path);
2556         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2557                                     backref->name, backref->namelen,
2558                                     backref->index, -1);
2559         if (IS_ERR(di)) {
2560                 ret = PTR_ERR(di);
2561                 btrfs_release_path(&path);
2562                 btrfs_commit_transaction(trans, root);
2563                 if (ret == -ENOENT)
2564                         return 0;
2565                 return ret;
2566         }
2567
2568         if (!di)
2569                 ret = btrfs_del_item(trans, root, &path);
2570         else
2571                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2572         BUG_ON(ret);
2573         btrfs_release_path(&path);
2574         btrfs_commit_transaction(trans, root);
2575         return ret;
2576 }
2577
2578 static int create_inode_item(struct btrfs_root *root,
2579                              struct inode_record *rec,
2580                              struct inode_backref *backref, int root_dir)
2581 {
2582         struct btrfs_trans_handle *trans;
2583         struct btrfs_inode_item inode_item;
2584         time_t now = time(NULL);
2585         int ret;
2586
2587         trans = btrfs_start_transaction(root, 1);
2588         if (IS_ERR(trans)) {
2589                 ret = PTR_ERR(trans);
2590                 return ret;
2591         }
2592
2593         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2594                 "be incomplete, please check permissions and content after "
2595                 "the fsck completes.\n", (unsigned long long)root->objectid,
2596                 (unsigned long long)rec->ino);
2597
2598         memset(&inode_item, 0, sizeof(inode_item));
2599         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2600         if (root_dir)
2601                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2602         else
2603                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2604         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2605         if (rec->found_dir_item) {
2606                 if (rec->found_file_extent)
2607                         fprintf(stderr, "root %llu inode %llu has both a dir "
2608                                 "item and extents, unsure if it is a dir or a "
2609                                 "regular file so setting it as a directory\n",
2610                                 (unsigned long long)root->objectid,
2611                                 (unsigned long long)rec->ino);
2612                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2613                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2614         } else if (!rec->found_dir_item) {
2615                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2616                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2617         }
2618         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2619         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2620         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2621         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2622         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2623         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2624         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2625         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2626
2627         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2628         BUG_ON(ret);
2629         btrfs_commit_transaction(trans, root);
2630         return 0;
2631 }
2632
2633 static int repair_inode_backrefs(struct btrfs_root *root,
2634                                  struct inode_record *rec,
2635                                  struct cache_tree *inode_cache,
2636                                  int delete)
2637 {
2638         struct inode_backref *tmp, *backref;
2639         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2640         int ret = 0;
2641         int repaired = 0;
2642
2643         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2644                 if (!delete && rec->ino == root_dirid) {
2645                         if (!rec->found_inode_item) {
2646                                 ret = create_inode_item(root, rec, backref, 1);
2647                                 if (ret)
2648                                         break;
2649                                 repaired++;
2650                         }
2651                 }
2652
2653                 /* Index 0 for root dir's are special, don't mess with it */
2654                 if (rec->ino == root_dirid && backref->index == 0)
2655                         continue;
2656
2657                 if (delete &&
2658                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2659                      (backref->found_dir_index && backref->found_inode_ref &&
2660                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2661                         ret = delete_dir_index(root, inode_cache, rec, backref);
2662                         if (ret)
2663                                 break;
2664                         repaired++;
2665                         list_del(&backref->list);
2666                         free(backref);
2667                 }
2668
2669                 if (!delete && !backref->found_dir_index &&
2670                     backref->found_dir_item && backref->found_inode_ref) {
2671                         ret = add_missing_dir_index(root, inode_cache, rec,
2672                                                     backref);
2673                         if (ret)
2674                                 break;
2675                         repaired++;
2676                         if (backref->found_dir_item &&
2677                             backref->found_dir_index &&
2678                             backref->found_dir_index) {
2679                                 if (!backref->errors &&
2680                                     backref->found_inode_ref) {
2681                                         list_del(&backref->list);
2682                                         free(backref);
2683                                 }
2684                         }
2685                 }
2686
2687                 if (!delete && (!backref->found_dir_index &&
2688                                 !backref->found_dir_item &&
2689                                 backref->found_inode_ref)) {
2690                         struct btrfs_trans_handle *trans;
2691                         struct btrfs_key location;
2692
2693                         ret = check_dir_conflict(root, backref->name,
2694                                                  backref->namelen,
2695                                                  backref->dir,
2696                                                  backref->index);
2697                         if (ret) {
2698                                 /*
2699                                  * let nlink fixing routine to handle it,
2700                                  * which can do it better.
2701                                  */
2702                                 ret = 0;
2703                                 break;
2704                         }
2705                         location.objectid = rec->ino;
2706                         location.type = BTRFS_INODE_ITEM_KEY;
2707                         location.offset = 0;
2708
2709                         trans = btrfs_start_transaction(root, 1);
2710                         if (IS_ERR(trans)) {
2711                                 ret = PTR_ERR(trans);
2712                                 break;
2713                         }
2714                         fprintf(stderr, "adding missing dir index/item pair "
2715                                 "for inode %llu\n",
2716                                 (unsigned long long)rec->ino);
2717                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2718                                                     backref->namelen,
2719                                                     backref->dir, &location,
2720                                                     imode_to_type(rec->imode),
2721                                                     backref->index);
2722                         BUG_ON(ret);
2723                         btrfs_commit_transaction(trans, root);
2724                         repaired++;
2725                 }
2726
2727                 if (!delete && (backref->found_inode_ref &&
2728                                 backref->found_dir_index &&
2729                                 backref->found_dir_item &&
2730                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2731                                 !rec->found_inode_item)) {
2732                         ret = create_inode_item(root, rec, backref, 0);
2733                         if (ret)
2734                                 break;
2735                         repaired++;
2736                 }
2737
2738         }
2739         return ret ? ret : repaired;
2740 }
2741
2742 /*
2743  * To determine the file type for nlink/inode_item repair
2744  *
2745  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2746  * Return -ENOENT if file type is not found.
2747  */
2748 static int find_file_type(struct inode_record *rec, u8 *type)
2749 {
2750         struct inode_backref *backref;
2751
2752         /* For inode item recovered case */
2753         if (rec->found_inode_item) {
2754                 *type = imode_to_type(rec->imode);
2755                 return 0;
2756         }
2757
2758         list_for_each_entry(backref, &rec->backrefs, list) {
2759                 if (backref->found_dir_index || backref->found_dir_item) {
2760                         *type = backref->filetype;
2761                         return 0;
2762                 }
2763         }
2764         return -ENOENT;
2765 }
2766
2767 /*
2768  * To determine the file name for nlink repair
2769  *
2770  * Return 0 if file name is found, set name and namelen.
2771  * Return -ENOENT if file name is not found.
2772  */
2773 static int find_file_name(struct inode_record *rec,
2774                           char *name, int *namelen)
2775 {
2776         struct inode_backref *backref;
2777
2778         list_for_each_entry(backref, &rec->backrefs, list) {
2779                 if (backref->found_dir_index || backref->found_dir_item ||
2780                     backref->found_inode_ref) {
2781                         memcpy(name, backref->name, backref->namelen);
2782                         *namelen = backref->namelen;
2783                         return 0;
2784                 }
2785         }
2786         return -ENOENT;
2787 }
2788
2789 /* Reset the nlink of the inode to the correct one */
2790 static int reset_nlink(struct btrfs_trans_handle *trans,
2791                        struct btrfs_root *root,
2792                        struct btrfs_path *path,
2793                        struct inode_record *rec)
2794 {
2795         struct inode_backref *backref;
2796         struct inode_backref *tmp;
2797         struct btrfs_key key;
2798         struct btrfs_inode_item *inode_item;
2799         int ret = 0;
2800
2801         /* We don't believe this either, reset it and iterate backref */
2802         rec->found_link = 0;
2803
2804         /* Remove all backref including the valid ones */
2805         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2806                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2807                                    backref->index, backref->name,
2808                                    backref->namelen, 0);
2809                 if (ret < 0)
2810                         goto out;
2811
2812                 /* remove invalid backref, so it won't be added back */
2813                 if (!(backref->found_dir_index &&
2814                       backref->found_dir_item &&
2815                       backref->found_inode_ref)) {
2816                         list_del(&backref->list);
2817                         free(backref);
2818                 } else {
2819                         rec->found_link++;
2820                 }
2821         }
2822
2823         /* Set nlink to 0 */
2824         key.objectid = rec->ino;
2825         key.type = BTRFS_INODE_ITEM_KEY;
2826         key.offset = 0;
2827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2828         if (ret < 0)
2829                 goto out;
2830         if (ret > 0) {
2831                 ret = -ENOENT;
2832                 goto out;
2833         }
2834         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2835                                     struct btrfs_inode_item);
2836         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2837         btrfs_mark_buffer_dirty(path->nodes[0]);
2838         btrfs_release_path(path);
2839
2840         /*
2841          * Add back valid inode_ref/dir_item/dir_index,
2842          * add_link() will handle the nlink inc, so new nlink must be correct
2843          */
2844         list_for_each_entry(backref, &rec->backrefs, list) {
2845                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2846                                      backref->name, backref->namelen,
2847                                      backref->filetype, &backref->index, 1);
2848                 if (ret < 0)
2849                         goto out;
2850         }
2851 out:
2852         btrfs_release_path(path);
2853         return ret;
2854 }
2855
2856 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2857                                struct btrfs_root *root,
2858                                struct btrfs_path *path,
2859                                struct inode_record *rec)
2860 {
2861         char *dir_name = "lost+found";
2862         char namebuf[BTRFS_NAME_LEN] = {0};
2863         u64 lost_found_ino;
2864         u32 mode = 0700;
2865         u8 type = 0;
2866         int namelen = 0;
2867         int name_recovered = 0;
2868         int type_recovered = 0;
2869         int ret = 0;
2870
2871         /*
2872          * Get file name and type first before these invalid inode ref
2873          * are deleted by remove_all_invalid_backref()
2874          */
2875         name_recovered = !find_file_name(rec, namebuf, &namelen);
2876         type_recovered = !find_file_type(rec, &type);
2877
2878         if (!name_recovered) {
2879                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2880                        rec->ino, rec->ino);
2881                 namelen = count_digits(rec->ino);
2882                 sprintf(namebuf, "%llu", rec->ino);
2883                 name_recovered = 1;
2884         }
2885         if (!type_recovered) {
2886                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2887                        rec->ino);
2888                 type = BTRFS_FT_REG_FILE;
2889                 type_recovered = 1;
2890         }
2891
2892         ret = reset_nlink(trans, root, path, rec);
2893         if (ret < 0) {
2894                 fprintf(stderr,
2895                         "Failed to reset nlink for inode %llu: %s\n",
2896                         rec->ino, strerror(-ret));
2897                 goto out;
2898         }
2899
2900         if (rec->found_link == 0) {
2901                 lost_found_ino = root->highest_inode;
2902                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2903                         ret = -EOVERFLOW;
2904                         goto out;
2905                 }
2906                 lost_found_ino++;
2907                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2908                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2909                                   mode);
2910                 if (ret < 0) {
2911                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2912                                 dir_name, strerror(-ret));
2913                         goto out;
2914                 }
2915                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2916                                      namebuf, namelen, type, NULL, 1);
2917                 /*
2918                  * Add ".INO" suffix several times to handle case where
2919                  * "FILENAME.INO" is already taken by another file.
2920                  */
2921                 while (ret == -EEXIST) {
2922                         /*
2923                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2924                          */
2925                         if (namelen + count_digits(rec->ino) + 1 >
2926                             BTRFS_NAME_LEN) {
2927                                 ret = -EFBIG;
2928                                 goto out;
2929                         }
2930                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2931                                  ".%llu", rec->ino);
2932                         namelen += count_digits(rec->ino) + 1;
2933                         ret = btrfs_add_link(trans, root, rec->ino,
2934                                              lost_found_ino, namebuf,
2935                                              namelen, type, NULL, 1);
2936                 }
2937                 if (ret < 0) {
2938                         fprintf(stderr,
2939                                 "Failed to link the inode %llu to %s dir: %s\n",
2940                                 rec->ino, dir_name, strerror(-ret));
2941                         goto out;
2942                 }
2943                 /*
2944                  * Just increase the found_link, don't actually add the
2945                  * backref. This will make things easier and this inode
2946                  * record will be freed after the repair is done.
2947                  * So fsck will not report problem about this inode.
2948                  */
2949                 rec->found_link++;
2950                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2951                        namelen, namebuf, dir_name);
2952         }
2953         printf("Fixed the nlink of inode %llu\n", rec->ino);
2954 out:
2955         /*
2956          * Clear the flag anyway, or we will loop forever for the same inode
2957          * as it will not be removed from the bad inode list and the dead loop
2958          * happens.
2959          */
2960         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2961         btrfs_release_path(path);
2962         return ret;
2963 }
2964
2965 /*
2966  * Check if there is any normal(reg or prealloc) file extent for given
2967  * ino.
2968  * This is used to determine the file type when neither its dir_index/item or
2969  * inode_item exists.
2970  *
2971  * This will *NOT* report error, if any error happens, just consider it does
2972  * not have any normal file extent.
2973  */
2974 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2975 {
2976         struct btrfs_path path;
2977         struct btrfs_key key;
2978         struct btrfs_key found_key;
2979         struct btrfs_file_extent_item *fi;
2980         u8 type;
2981         int ret = 0;
2982
2983         btrfs_init_path(&path);
2984         key.objectid = ino;
2985         key.type = BTRFS_EXTENT_DATA_KEY;
2986         key.offset = 0;
2987
2988         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2989         if (ret < 0) {
2990                 ret = 0;
2991                 goto out;
2992         }
2993         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2994                 ret = btrfs_next_leaf(root, &path);
2995                 if (ret) {
2996                         ret = 0;
2997                         goto out;
2998                 }
2999         }
3000         while (1) {
3001                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3002                                       path.slots[0]);
3003                 if (found_key.objectid != ino ||
3004                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3005                         break;
3006                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3007                                     struct btrfs_file_extent_item);
3008                 type = btrfs_file_extent_type(path.nodes[0], fi);
3009                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3010                         ret = 1;
3011                         goto out;
3012                 }
3013         }
3014 out:
3015         btrfs_release_path(&path);
3016         return ret;
3017 }
3018
3019 static u32 btrfs_type_to_imode(u8 type)
3020 {
3021         static u32 imode_by_btrfs_type[] = {
3022                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3023                 [BTRFS_FT_DIR]          = S_IFDIR,
3024                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3025                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3026                 [BTRFS_FT_FIFO]         = S_IFIFO,
3027                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3028                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3029         };
3030
3031         return imode_by_btrfs_type[(type)];
3032 }
3033
3034 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3035                                 struct btrfs_root *root,
3036                                 struct btrfs_path *path,
3037                                 struct inode_record *rec)
3038 {
3039         u8 filetype;
3040         u32 mode = 0700;
3041         int type_recovered = 0;
3042         int ret = 0;
3043
3044         printf("Trying to rebuild inode:%llu\n", rec->ino);
3045
3046         type_recovered = !find_file_type(rec, &filetype);
3047
3048         /*
3049          * Try to determine inode type if type not found.
3050          *
3051          * For found regular file extent, it must be FILE.
3052          * For found dir_item/index, it must be DIR.
3053          *
3054          * For undetermined one, use FILE as fallback.
3055          *
3056          * TODO:
3057          * 1. If found backref(inode_index/item is already handled) to it,
3058          *    it must be DIR.
3059          *    Need new inode-inode ref structure to allow search for that.
3060          */
3061         if (!type_recovered) {
3062                 if (rec->found_file_extent &&
3063                     find_normal_file_extent(root, rec->ino)) {
3064                         type_recovered = 1;
3065                         filetype = BTRFS_FT_REG_FILE;
3066                 } else if (rec->found_dir_item) {
3067                         type_recovered = 1;
3068                         filetype = BTRFS_FT_DIR;
3069                 } else if (!list_empty(&rec->orphan_extents)) {
3070                         type_recovered = 1;
3071                         filetype = BTRFS_FT_REG_FILE;
3072                 } else{
3073                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3074                                rec->ino);
3075                         type_recovered = 1;
3076                         filetype = BTRFS_FT_REG_FILE;
3077                 }
3078         }
3079
3080         ret = btrfs_new_inode(trans, root, rec->ino,
3081                               mode | btrfs_type_to_imode(filetype));
3082         if (ret < 0)
3083                 goto out;
3084
3085         /*
3086          * Here inode rebuild is done, we only rebuild the inode item,
3087          * don't repair the nlink(like move to lost+found).
3088          * That is the job of nlink repair.
3089          *
3090          * We just fill the record and return
3091          */
3092         rec->found_dir_item = 1;
3093         rec->imode = mode | btrfs_type_to_imode(filetype);
3094         rec->nlink = 0;
3095         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3096         /* Ensure the inode_nlinks repair function will be called */
3097         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3098 out:
3099         return ret;
3100 }
3101
3102 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3103                                       struct btrfs_root *root,
3104                                       struct btrfs_path *path,
3105                                       struct inode_record *rec)
3106 {
3107         struct orphan_data_extent *orphan;
3108         struct orphan_data_extent *tmp;
3109         int ret = 0;
3110
3111         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3112                 /*
3113                  * Check for conflicting file extents
3114                  *
3115                  * Here we don't know whether the extents is compressed or not,
3116                  * so we can only assume it not compressed nor data offset,
3117                  * and use its disk_len as extent length.
3118                  */
3119                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3120                                        orphan->offset, orphan->disk_len, 0);
3121                 btrfs_release_path(path);
3122                 if (ret < 0)
3123                         goto out;
3124                 if (!ret) {
3125                         fprintf(stderr,
3126                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3127                                 orphan->disk_bytenr, orphan->disk_len);
3128                         ret = btrfs_free_extent(trans,
3129                                         root->fs_info->extent_root,
3130                                         orphan->disk_bytenr, orphan->disk_len,
3131                                         0, root->objectid, orphan->objectid,
3132                                         orphan->offset);
3133                         if (ret < 0)
3134                                 goto out;
3135                 }
3136                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3137                                 orphan->offset, orphan->disk_bytenr,
3138                                 orphan->disk_len, orphan->disk_len);
3139                 if (ret < 0)
3140                         goto out;
3141
3142                 /* Update file size info */
3143                 rec->found_size += orphan->disk_len;
3144                 if (rec->found_size == rec->nbytes)
3145                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3146
3147                 /* Update the file extent hole info too */
3148                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3149                                            orphan->disk_len);
3150                 if (ret < 0)
3151                         goto out;
3152                 if (RB_EMPTY_ROOT(&rec->holes))
3153                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3154
3155                 list_del(&orphan->list);
3156                 free(orphan);
3157         }
3158         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3159 out:
3160         return ret;
3161 }
3162
3163 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3164                                         struct btrfs_root *root,
3165                                         struct btrfs_path *path,
3166                                         struct inode_record *rec)
3167 {
3168         struct rb_node *node;
3169         struct file_extent_hole *hole;
3170         int found = 0;
3171         int ret = 0;
3172
3173         node = rb_first(&rec->holes);
3174
3175         while (node) {
3176                 found = 1;
3177                 hole = rb_entry(node, struct file_extent_hole, node);
3178                 ret = btrfs_punch_hole(trans, root, rec->ino,
3179                                        hole->start, hole->len);
3180                 if (ret < 0)
3181                         goto out;
3182                 ret = del_file_extent_hole(&rec->holes, hole->start,
3183                                            hole->len);
3184                 if (ret < 0)
3185                         goto out;
3186                 if (RB_EMPTY_ROOT(&rec->holes))
3187                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3188                 node = rb_first(&rec->holes);
3189         }
3190         /* special case for a file losing all its file extent */
3191         if (!found) {
3192                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3193                                        round_up(rec->isize, root->sectorsize));
3194                 if (ret < 0)
3195                         goto out;
3196         }
3197         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3198                rec->ino, root->objectid);
3199 out:
3200         return ret;
3201 }
3202
3203 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3204 {
3205         struct btrfs_trans_handle *trans;
3206         struct btrfs_path path;
3207         int ret = 0;
3208
3209         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3210                              I_ERR_NO_ORPHAN_ITEM |
3211                              I_ERR_LINK_COUNT_WRONG |
3212                              I_ERR_NO_INODE_ITEM |
3213                              I_ERR_FILE_EXTENT_ORPHAN |
3214                              I_ERR_FILE_EXTENT_DISCOUNT|
3215                              I_ERR_FILE_NBYTES_WRONG)))
3216                 return rec->errors;
3217
3218         /*
3219          * For nlink repair, it may create a dir and add link, so
3220          * 2 for parent(256)'s dir_index and dir_item
3221          * 2 for lost+found dir's inode_item and inode_ref
3222          * 1 for the new inode_ref of the file
3223          * 2 for lost+found dir's dir_index and dir_item for the file
3224          */
3225         trans = btrfs_start_transaction(root, 7);
3226         if (IS_ERR(trans))
3227                 return PTR_ERR(trans);
3228
3229         btrfs_init_path(&path);
3230         if (rec->errors & I_ERR_NO_INODE_ITEM)
3231                 ret = repair_inode_no_item(trans, root, &path, rec);
3232         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3233                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3234         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3235                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3236         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3237                 ret = repair_inode_isize(trans, root, &path, rec);
3238         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3239                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3240         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3241                 ret = repair_inode_nlinks(trans, root, &path, rec);
3242         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3243                 ret = repair_inode_nbytes(trans, root, &path, rec);
3244         btrfs_commit_transaction(trans, root);
3245         btrfs_release_path(&path);
3246         return ret;
3247 }
3248
3249 static int check_inode_recs(struct btrfs_root *root,
3250                             struct cache_tree *inode_cache)
3251 {
3252         struct cache_extent *cache;
3253         struct ptr_node *node;
3254         struct inode_record *rec;
3255         struct inode_backref *backref;
3256         int stage = 0;
3257         int ret = 0;
3258         int err = 0;
3259         u64 error = 0;
3260         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3261
3262         if (btrfs_root_refs(&root->root_item) == 0) {
3263                 if (!cache_tree_empty(inode_cache))
3264                         fprintf(stderr, "warning line %d\n", __LINE__);
3265                 return 0;
3266         }
3267
3268         /*
3269          * We need to record the highest inode number for later 'lost+found'
3270          * dir creation.
3271          * We must select an ino not used/referred by any existing inode, or
3272          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3273          * this may cause 'lost+found' dir has wrong nlinks.
3274          */
3275         cache = last_cache_extent(inode_cache);
3276         if (cache) {
3277                 node = container_of(cache, struct ptr_node, cache);
3278                 rec = node->data;
3279                 if (rec->ino > root->highest_inode)
3280                         root->highest_inode = rec->ino;
3281         }
3282
3283         /*
3284          * We need to repair backrefs first because we could change some of the
3285          * errors in the inode recs.
3286          *
3287          * We also need to go through and delete invalid backrefs first and then
3288          * add the correct ones second.  We do this because we may get EEXIST
3289          * when adding back the correct index because we hadn't yet deleted the
3290          * invalid index.
3291          *
3292          * For example, if we were missing a dir index then the directories
3293          * isize would be wrong, so if we fixed the isize to what we thought it
3294          * would be and then fixed the backref we'd still have a invalid fs, so
3295          * we need to add back the dir index and then check to see if the isize
3296          * is still wrong.
3297          */
3298         while (stage < 3) {
3299                 stage++;
3300                 if (stage == 3 && !err)
3301                         break;
3302
3303                 cache = search_cache_extent(inode_cache, 0);
3304                 while (repair && cache) {
3305                         node = container_of(cache, struct ptr_node, cache);
3306                         rec = node->data;
3307                         cache = next_cache_extent(cache);
3308
3309                         /* Need to free everything up and rescan */
3310                         if (stage == 3) {
3311                                 remove_cache_extent(inode_cache, &node->cache);
3312                                 free(node);
3313                                 free_inode_rec(rec);
3314                                 continue;
3315                         }
3316
3317                         if (list_empty(&rec->backrefs))
3318                                 continue;
3319
3320                         ret = repair_inode_backrefs(root, rec, inode_cache,
3321                                                     stage == 1);
3322                         if (ret < 0) {
3323                                 err = ret;
3324                                 stage = 2;
3325                                 break;
3326                         } if (ret > 0) {
3327                                 err = -EAGAIN;
3328                         }
3329                 }
3330         }
3331         if (err)
3332                 return err;
3333
3334         rec = get_inode_rec(inode_cache, root_dirid, 0);
3335         BUG_ON(IS_ERR(rec));
3336         if (rec) {
3337                 ret = check_root_dir(rec);
3338                 if (ret) {
3339                         fprintf(stderr, "root %llu root dir %llu error\n",
3340                                 (unsigned long long)root->root_key.objectid,
3341                                 (unsigned long long)root_dirid);
3342                         print_inode_error(root, rec);
3343                         error++;
3344                 }
3345         } else {
3346                 if (repair) {
3347                         struct btrfs_trans_handle *trans;
3348
3349                         trans = btrfs_start_transaction(root, 1);
3350                         if (IS_ERR(trans)) {
3351                                 err = PTR_ERR(trans);
3352                                 return err;
3353                         }
3354
3355                         fprintf(stderr,
3356                                 "root %llu missing its root dir, recreating\n",
3357                                 (unsigned long long)root->objectid);
3358
3359                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3360                         BUG_ON(ret);
3361
3362                         btrfs_commit_transaction(trans, root);
3363                         return -EAGAIN;
3364                 }
3365
3366                 fprintf(stderr, "root %llu root dir %llu not found\n",
3367                         (unsigned long long)root->root_key.objectid,
3368                         (unsigned long long)root_dirid);
3369         }
3370
3371         while (1) {
3372                 cache = search_cache_extent(inode_cache, 0);
3373                 if (!cache)
3374                         break;
3375                 node = container_of(cache, struct ptr_node, cache);
3376                 rec = node->data;
3377                 remove_cache_extent(inode_cache, &node->cache);
3378                 free(node);
3379                 if (rec->ino == root_dirid ||
3380                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3381                         free_inode_rec(rec);
3382                         continue;
3383                 }
3384
3385                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3386                         ret = check_orphan_item(root, rec->ino);
3387                         if (ret == 0)
3388                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3389                         if (can_free_inode_rec(rec)) {
3390                                 free_inode_rec(rec);
3391                                 continue;
3392                         }
3393                 }
3394
3395                 if (!rec->found_inode_item)
3396                         rec->errors |= I_ERR_NO_INODE_ITEM;
3397                 if (rec->found_link != rec->nlink)
3398                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3399                 if (repair) {
3400                         ret = try_repair_inode(root, rec);
3401                         if (ret == 0 && can_free_inode_rec(rec)) {
3402                                 free_inode_rec(rec);
3403                                 continue;
3404                         }
3405                         ret = 0;
3406                 }
3407
3408                 if (!(repair && ret == 0))
3409                         error++;
3410                 print_inode_error(root, rec);
3411                 list_for_each_entry(backref, &rec->backrefs, list) {
3412                         if (!backref->found_dir_item)
3413                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3414                         if (!backref->found_dir_index)
3415                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3416                         if (!backref->found_inode_ref)
3417                                 backref->errors |= REF_ERR_NO_INODE_REF;
3418                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3419                                 " namelen %u name %s filetype %d errors %x",
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->filetype, backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426                 free_inode_rec(rec);
3427         }
3428         return (error > 0) ? -1 : 0;
3429 }
3430
3431 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3432                                         u64 objectid)
3433 {
3434         struct cache_extent *cache;
3435         struct root_record *rec = NULL;
3436         int ret;
3437
3438         cache = lookup_cache_extent(root_cache, objectid, 1);
3439         if (cache) {
3440                 rec = container_of(cache, struct root_record, cache);
3441         } else {
3442                 rec = calloc(1, sizeof(*rec));
3443                 if (!rec)
3444                         return ERR_PTR(-ENOMEM);
3445                 rec->objectid = objectid;
3446                 INIT_LIST_HEAD(&rec->backrefs);
3447                 rec->cache.start = objectid;
3448                 rec->cache.size = 1;
3449
3450                 ret = insert_cache_extent(root_cache, &rec->cache);
3451                 if (ret)
3452                         return ERR_PTR(-EEXIST);
3453         }
3454         return rec;
3455 }
3456
3457 static struct root_backref *get_root_backref(struct root_record *rec,
3458                                              u64 ref_root, u64 dir, u64 index,
3459                                              const char *name, int namelen)
3460 {
3461         struct root_backref *backref;
3462
3463         list_for_each_entry(backref, &rec->backrefs, list) {
3464                 if (backref->ref_root != ref_root || backref->dir != dir ||
3465                     backref->namelen != namelen)
3466                         continue;
3467                 if (memcmp(name, backref->name, namelen))
3468                         continue;
3469                 return backref;
3470         }
3471
3472         backref = calloc(1, sizeof(*backref) + namelen + 1);
3473         if (!backref)
3474                 return NULL;
3475         backref->ref_root = ref_root;
3476         backref->dir = dir;
3477         backref->index = index;
3478         backref->namelen = namelen;
3479         memcpy(backref->name, name, namelen);
3480         backref->name[namelen] = '\0';
3481         list_add_tail(&backref->list, &rec->backrefs);
3482         return backref;
3483 }
3484
3485 static void free_root_record(struct cache_extent *cache)
3486 {
3487         struct root_record *rec;
3488         struct root_backref *backref;
3489
3490         rec = container_of(cache, struct root_record, cache);
3491         while (!list_empty(&rec->backrefs)) {
3492                 backref = to_root_backref(rec->backrefs.next);
3493                 list_del(&backref->list);
3494                 free(backref);
3495         }
3496
3497         free(rec);
3498 }
3499
3500 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3501
3502 static int add_root_backref(struct cache_tree *root_cache,
3503                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3504                             const char *name, int namelen,
3505                             int item_type, int errors)
3506 {
3507         struct root_record *rec;
3508         struct root_backref *backref;
3509
3510         rec = get_root_rec(root_cache, root_id);
3511         BUG_ON(IS_ERR(rec));
3512         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3513         BUG_ON(!backref);
3514
3515         backref->errors |= errors;
3516
3517         if (item_type != BTRFS_DIR_ITEM_KEY) {
3518                 if (backref->found_dir_index || backref->found_back_ref ||
3519                     backref->found_forward_ref) {
3520                         if (backref->index != index)
3521                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3522                 } else {
3523                         backref->index = index;
3524                 }
3525         }
3526
3527         if (item_type == BTRFS_DIR_ITEM_KEY) {
3528                 if (backref->found_forward_ref)
3529                         rec->found_ref++;
3530                 backref->found_dir_item = 1;
3531         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3532                 backref->found_dir_index = 1;
3533         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3534                 if (backref->found_forward_ref)
3535                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3536                 else if (backref->found_dir_item)
3537                         rec->found_ref++;
3538                 backref->found_forward_ref = 1;
3539         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3540                 if (backref->found_back_ref)
3541                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3542                 backref->found_back_ref = 1;
3543         } else {
3544                 BUG_ON(1);
3545         }
3546
3547         if (backref->found_forward_ref && backref->found_dir_item)
3548                 backref->reachable = 1;
3549         return 0;
3550 }
3551
3552 static int merge_root_recs(struct btrfs_root *root,
3553                            struct cache_tree *src_cache,
3554                            struct cache_tree *dst_cache)
3555 {
3556         struct cache_extent *cache;
3557         struct ptr_node *node;
3558         struct inode_record *rec;
3559         struct inode_backref *backref;
3560         int ret = 0;
3561
3562         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3563                 free_inode_recs_tree(src_cache);
3564                 return 0;
3565         }
3566
3567         while (1) {
3568                 cache = search_cache_extent(src_cache, 0);
3569                 if (!cache)
3570                         break;
3571                 node = container_of(cache, struct ptr_node, cache);
3572                 rec = node->data;
3573                 remove_cache_extent(src_cache, &node->cache);
3574                 free(node);
3575
3576                 ret = is_child_root(root, root->objectid, rec->ino);
3577                 if (ret < 0)
3578                         break;
3579                 else if (ret == 0)
3580                         goto skip;
3581
3582                 list_for_each_entry(backref, &rec->backrefs, list) {
3583                         BUG_ON(backref->found_inode_ref);
3584                         if (backref->found_dir_item)
3585                                 add_root_backref(dst_cache, rec->ino,
3586                                         root->root_key.objectid, backref->dir,
3587                                         backref->index, backref->name,
3588                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3589                                         backref->errors);
3590                         if (backref->found_dir_index)
3591                                 add_root_backref(dst_cache, rec->ino,
3592                                         root->root_key.objectid, backref->dir,
3593                                         backref->index, backref->name,
3594                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3595                                         backref->errors);
3596                 }
3597 skip:
3598                 free_inode_rec(rec);
3599         }
3600         if (ret < 0)
3601                 return ret;
3602         return 0;
3603 }
3604
3605 static int check_root_refs(struct btrfs_root *root,
3606                            struct cache_tree *root_cache)
3607 {
3608         struct root_record *rec;
3609         struct root_record *ref_root;
3610         struct root_backref *backref;
3611         struct cache_extent *cache;
3612         int loop = 1;
3613         int ret;
3614         int error;
3615         int errors = 0;
3616
3617         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3618         BUG_ON(IS_ERR(rec));
3619         rec->found_ref = 1;
3620
3621         /* fixme: this can not detect circular references */
3622         while (loop) {
3623                 loop = 0;
3624                 cache = search_cache_extent(root_cache, 0);
3625                 while (1) {
3626                         if (!cache)
3627                                 break;
3628                         rec = container_of(cache, struct root_record, cache);
3629                         cache = next_cache_extent(cache);
3630
3631                         if (rec->found_ref == 0)
3632                                 continue;
3633
3634                         list_for_each_entry(backref, &rec->backrefs, list) {
3635                                 if (!backref->reachable)
3636                                         continue;
3637
3638                                 ref_root = get_root_rec(root_cache,
3639                                                         backref->ref_root);
3640                                 BUG_ON(IS_ERR(ref_root));
3641                                 if (ref_root->found_ref > 0)
3642                                         continue;
3643
3644                                 backref->reachable = 0;
3645                                 rec->found_ref--;
3646                                 if (rec->found_ref == 0)
3647                                         loop = 1;
3648                         }
3649                 }
3650         }
3651
3652         cache = search_cache_extent(root_cache, 0);
3653         while (1) {
3654                 if (!cache)
3655                         break;
3656                 rec = container_of(cache, struct root_record, cache);
3657                 cache = next_cache_extent(cache);
3658
3659                 if (rec->found_ref == 0 &&
3660                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3661                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3662                         ret = check_orphan_item(root->fs_info->tree_root,
3663                                                 rec->objectid);
3664                         if (ret == 0)
3665                                 continue;
3666
3667                         /*
3668                          * If we don't have a root item then we likely just have
3669                          * a dir item in a snapshot for this root but no actual
3670                          * ref key or anything so it's meaningless.
3671                          */
3672                         if (!rec->found_root_item)
3673                                 continue;
3674                         errors++;
3675                         fprintf(stderr, "fs tree %llu not referenced\n",
3676                                 (unsigned long long)rec->objectid);
3677                 }
3678
3679                 error = 0;
3680                 if (rec->found_ref > 0 && !rec->found_root_item)
3681                         error = 1;
3682                 list_for_each_entry(backref, &rec->backrefs, list) {
3683                         if (!backref->found_dir_item)
3684                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3685                         if (!backref->found_dir_index)
3686                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3687                         if (!backref->found_back_ref)
3688                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3689                         if (!backref->found_forward_ref)
3690                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3691                         if (backref->reachable && backref->errors)
3692                                 error = 1;
3693                 }
3694                 if (!error)
3695                         continue;
3696
3697                 errors++;
3698                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3699                         (unsigned long long)rec->objectid, rec->found_ref,
3700                          rec->found_root_item ? "" : "not found");
3701
3702                 list_for_each_entry(backref, &rec->backrefs, list) {
3703                         if (!backref->reachable)
3704                                 continue;
3705                         if (!backref->errors && rec->found_root_item)
3706                                 continue;
3707                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3708                                 " index %llu namelen %u name %s errors %x\n",
3709                                 (unsigned long long)backref->ref_root,
3710                                 (unsigned long long)backref->dir,
3711                                 (unsigned long long)backref->index,
3712                                 backref->namelen, backref->name,
3713                                 backref->errors);
3714                         print_ref_error(backref->errors);
3715                 }
3716         }
3717         return errors > 0 ? 1 : 0;
3718 }
3719
3720 static int process_root_ref(struct extent_buffer *eb, int slot,
3721                             struct btrfs_key *key,
3722                             struct cache_tree *root_cache)
3723 {
3724         u64 dirid;
3725         u64 index;
3726         u32 len;
3727         u32 name_len;
3728         struct btrfs_root_ref *ref;
3729         char namebuf[BTRFS_NAME_LEN];
3730         int error;
3731
3732         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3733
3734         dirid = btrfs_root_ref_dirid(eb, ref);
3735         index = btrfs_root_ref_sequence(eb, ref);
3736         name_len = btrfs_root_ref_name_len(eb, ref);
3737
3738         if (name_len <= BTRFS_NAME_LEN) {
3739                 len = name_len;
3740                 error = 0;
3741         } else {
3742                 len = BTRFS_NAME_LEN;
3743                 error = REF_ERR_NAME_TOO_LONG;
3744         }
3745         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3746
3747         if (key->type == BTRFS_ROOT_REF_KEY) {
3748                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3749                                  index, namebuf, len, key->type, error);
3750         } else {
3751                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3752                                  index, namebuf, len, key->type, error);
3753         }
3754         return 0;
3755 }
3756
3757 static void free_corrupt_block(struct cache_extent *cache)
3758 {
3759         struct btrfs_corrupt_block *corrupt;
3760
3761         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3762         free(corrupt);
3763 }
3764
3765 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3766
3767 /*
3768  * Repair the btree of the given root.
3769  *
3770  * The fix is to remove the node key in corrupt_blocks cache_tree.
3771  * and rebalance the tree.
3772  * After the fix, the btree should be writeable.
3773  */
3774 static int repair_btree(struct btrfs_root *root,
3775                         struct cache_tree *corrupt_blocks)
3776 {
3777         struct btrfs_trans_handle *trans;
3778         struct btrfs_path path;
3779         struct btrfs_corrupt_block *corrupt;
3780         struct cache_extent *cache;
3781         struct btrfs_key key;
3782         u64 offset;
3783         int level;
3784         int ret = 0;
3785
3786         if (cache_tree_empty(corrupt_blocks))
3787                 return 0;
3788
3789         trans = btrfs_start_transaction(root, 1);
3790         if (IS_ERR(trans)) {
3791                 ret = PTR_ERR(trans);
3792                 fprintf(stderr, "Error starting transaction: %s\n",
3793                         strerror(-ret));
3794                 return ret;
3795         }
3796         btrfs_init_path(&path);
3797         cache = first_cache_extent(corrupt_blocks);
3798         while (cache) {
3799                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3800                                        cache);
3801                 level = corrupt->level;
3802                 path.lowest_level = level;
3803                 key.objectid = corrupt->key.objectid;
3804                 key.type = corrupt->key.type;
3805                 key.offset = corrupt->key.offset;
3806
3807                 /*
3808                  * Here we don't want to do any tree balance, since it may
3809                  * cause a balance with corrupted brother leaf/node,
3810                  * so ins_len set to 0 here.
3811                  * Balance will be done after all corrupt node/leaf is deleted.
3812                  */
3813                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3814                 if (ret < 0)
3815                         goto out;
3816                 offset = btrfs_node_blockptr(path.nodes[level],
3817                                              path.slots[level]);
3818
3819                 /* Remove the ptr */
3820                 ret = btrfs_del_ptr(trans, root, &path, level,
3821                                     path.slots[level]);
3822                 if (ret < 0)
3823                         goto out;
3824                 /*
3825                  * Remove the corresponding extent
3826                  * return value is not concerned.
3827                  */
3828                 btrfs_release_path(&path);
3829                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3830                                         0, root->root_key.objectid,
3831                                         level - 1, 0);
3832                 cache = next_cache_extent(cache);
3833         }
3834
3835         /* Balance the btree using btrfs_search_slot() */
3836         cache = first_cache_extent(corrupt_blocks);
3837         while (cache) {
3838                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3839                                        cache);
3840                 memcpy(&key, &corrupt->key, sizeof(key));
3841                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3842                 if (ret < 0)
3843                         goto out;
3844                 /* return will always >0 since it won't find the item */
3845                 ret = 0;
3846                 btrfs_release_path(&path);
3847                 cache = next_cache_extent(cache);
3848         }
3849 out:
3850         btrfs_commit_transaction(trans, root);
3851         btrfs_release_path(&path);
3852         return ret;
3853 }
3854
3855 static int check_fs_root(struct btrfs_root *root,
3856                          struct cache_tree *root_cache,
3857                          struct walk_control *wc)
3858 {
3859         int ret = 0;
3860         int err = 0;
3861         int wret;
3862         int level;
3863         struct btrfs_path path;
3864         struct shared_node root_node;
3865         struct root_record *rec;
3866         struct btrfs_root_item *root_item = &root->root_item;
3867         struct cache_tree corrupt_blocks;
3868         struct orphan_data_extent *orphan;
3869         struct orphan_data_extent *tmp;
3870         enum btrfs_tree_block_status status;
3871         struct node_refs nrefs;
3872
3873         /*
3874          * Reuse the corrupt_block cache tree to record corrupted tree block
3875          *
3876          * Unlike the usage in extent tree check, here we do it in a per
3877          * fs/subvol tree base.
3878          */
3879         cache_tree_init(&corrupt_blocks);
3880         root->fs_info->corrupt_blocks = &corrupt_blocks;
3881
3882         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3883                 rec = get_root_rec(root_cache, root->root_key.objectid);
3884                 BUG_ON(IS_ERR(rec));
3885                 if (btrfs_root_refs(root_item) > 0)
3886                         rec->found_root_item = 1;
3887         }
3888
3889         btrfs_init_path(&path);
3890         memset(&root_node, 0, sizeof(root_node));
3891         cache_tree_init(&root_node.root_cache);
3892         cache_tree_init(&root_node.inode_cache);
3893         memset(&nrefs, 0, sizeof(nrefs));
3894
3895         /* Move the orphan extent record to corresponding inode_record */
3896         list_for_each_entry_safe(orphan, tmp,
3897                                  &root->orphan_data_extents, list) {
3898                 struct inode_record *inode;
3899
3900                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3901                                       1);
3902                 BUG_ON(IS_ERR(inode));
3903                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3904                 list_move(&orphan->list, &inode->orphan_extents);
3905         }
3906
3907         level = btrfs_header_level(root->node);
3908         memset(wc->nodes, 0, sizeof(wc->nodes));
3909         wc->nodes[level] = &root_node;
3910         wc->active_node = level;
3911         wc->root_level = level;
3912
3913         /* We may not have checked the root block, lets do that now */
3914         if (btrfs_is_leaf(root->node))
3915                 status = btrfs_check_leaf(root, NULL, root->node);
3916         else
3917                 status = btrfs_check_node(root, NULL, root->node);
3918         if (status != BTRFS_TREE_BLOCK_CLEAN)
3919                 return -EIO;
3920
3921         if (btrfs_root_refs(root_item) > 0 ||
3922             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3923                 path.nodes[level] = root->node;
3924                 extent_buffer_get(root->node);
3925                 path.slots[level] = 0;
3926         } else {
3927                 struct btrfs_key key;
3928                 struct btrfs_disk_key found_key;
3929
3930                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3931                 level = root_item->drop_level;
3932                 path.lowest_level = level;
3933                 if (level > btrfs_header_level(root->node) ||
3934                     level >= BTRFS_MAX_LEVEL) {
3935                         error("ignoring invalid drop level: %u", level);
3936                         goto skip_walking;
3937                 }
3938                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3939                 if (wret < 0)
3940                         goto skip_walking;
3941                 btrfs_node_key(path.nodes[level], &found_key,
3942                                 path.slots[level]);
3943                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3944                                         sizeof(found_key)));
3945         }
3946
3947         while (1) {
3948                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3949                 if (wret < 0)
3950                         ret = wret;
3951                 if (wret != 0)
3952                         break;
3953
3954                 wret = walk_up_tree(root, &path, wc, &level);
3955                 if (wret < 0)
3956                         ret = wret;
3957                 if (wret != 0)
3958                         break;
3959         }
3960 skip_walking:
3961         btrfs_release_path(&path);
3962
3963         if (!cache_tree_empty(&corrupt_blocks)) {
3964                 struct cache_extent *cache;
3965                 struct btrfs_corrupt_block *corrupt;
3966
3967                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3968                        root->root_key.objectid);
3969                 cache = first_cache_extent(&corrupt_blocks);
3970                 while (cache) {
3971                         corrupt = container_of(cache,
3972                                                struct btrfs_corrupt_block,
3973                                                cache);
3974                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3975                                cache->start, corrupt->level,
3976                                corrupt->key.objectid, corrupt->key.type,
3977                                corrupt->key.offset);
3978                         cache = next_cache_extent(cache);
3979                 }
3980                 if (repair) {
3981                         printf("Try to repair the btree for root %llu\n",
3982                                root->root_key.objectid);
3983                         ret = repair_btree(root, &corrupt_blocks);
3984                         if (ret < 0)
3985                                 fprintf(stderr, "Failed to repair btree: %s\n",
3986                                         strerror(-ret));
3987                         if (!ret)
3988                                 printf("Btree for root %llu is fixed\n",
3989                                        root->root_key.objectid);
3990                 }
3991         }
3992
3993         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3994         if (err < 0)
3995                 ret = err;
3996
3997         if (root_node.current) {
3998                 root_node.current->checked = 1;
3999                 maybe_free_inode_rec(&root_node.inode_cache,
4000                                 root_node.current);
4001         }
4002
4003         err = check_inode_recs(root, &root_node.inode_cache);
4004         if (!ret)
4005                 ret = err;
4006
4007         free_corrupt_blocks_tree(&corrupt_blocks);
4008         root->fs_info->corrupt_blocks = NULL;
4009         free_orphan_data_extents(&root->orphan_data_extents);
4010         return ret;
4011 }
4012
4013 static int fs_root_objectid(u64 objectid)
4014 {
4015         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4016             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4017                 return 1;
4018         return is_fstree(objectid);
4019 }
4020
4021 static int check_fs_roots(struct btrfs_root *root,
4022                           struct cache_tree *root_cache)
4023 {
4024         struct btrfs_path path;
4025         struct btrfs_key key;
4026         struct walk_control wc;
4027         struct extent_buffer *leaf, *tree_node;
4028         struct btrfs_root *tmp_root;
4029         struct btrfs_root *tree_root = root->fs_info->tree_root;
4030         int ret;
4031         int err = 0;
4032
4033         if (ctx.progress_enabled) {
4034                 ctx.tp = TASK_FS_ROOTS;
4035                 task_start(ctx.info);
4036         }
4037
4038         /*
4039          * Just in case we made any changes to the extent tree that weren't
4040          * reflected into the free space cache yet.
4041          */
4042         if (repair)
4043                 reset_cached_block_groups(root->fs_info);
4044         memset(&wc, 0, sizeof(wc));
4045         cache_tree_init(&wc.shared);
4046         btrfs_init_path(&path);
4047
4048 again:
4049         key.offset = 0;
4050         key.objectid = 0;
4051         key.type = BTRFS_ROOT_ITEM_KEY;
4052         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4053         if (ret < 0) {
4054                 err = 1;
4055                 goto out;
4056         }
4057         tree_node = tree_root->node;
4058         while (1) {
4059                 if (tree_node != tree_root->node) {
4060                         free_root_recs_tree(root_cache);
4061                         btrfs_release_path(&path);
4062                         goto again;
4063                 }
4064                 leaf = path.nodes[0];
4065                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4066                         ret = btrfs_next_leaf(tree_root, &path);
4067                         if (ret) {
4068                                 if (ret < 0)
4069                                         err = 1;
4070                                 break;
4071                         }
4072                         leaf = path.nodes[0];
4073                 }
4074                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4075                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4076                     fs_root_objectid(key.objectid)) {
4077                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4078                                 tmp_root = btrfs_read_fs_root_no_cache(
4079                                                 root->fs_info, &key);
4080                         } else {
4081                                 key.offset = (u64)-1;
4082                                 tmp_root = btrfs_read_fs_root(
4083                                                 root->fs_info, &key);
4084                         }
4085                         if (IS_ERR(tmp_root)) {
4086                                 err = 1;
4087                                 goto next;
4088                         }
4089                         ret = check_fs_root(tmp_root, root_cache, &wc);
4090                         if (ret == -EAGAIN) {
4091                                 free_root_recs_tree(root_cache);
4092                                 btrfs_release_path(&path);
4093                                 goto again;
4094                         }
4095                         if (ret)
4096                                 err = 1;
4097                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4098                                 btrfs_free_fs_root(tmp_root);
4099                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4100                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4101                         process_root_ref(leaf, path.slots[0], &key,
4102                                          root_cache);
4103                 }
4104 next:
4105                 path.slots[0]++;
4106         }
4107 out:
4108         btrfs_release_path(&path);
4109         if (err)
4110                 free_extent_cache_tree(&wc.shared);
4111         if (!cache_tree_empty(&wc.shared))
4112                 fprintf(stderr, "warning line %d\n", __LINE__);
4113
4114         task_stop(ctx.info);
4115
4116         return err;
4117 }
4118
4119 /*
4120  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4121  * INODE_REF/INODE_EXTREF match.
4122  *
4123  * @root:       the root of the fs/file tree
4124  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4125  * @key:        the key of the DIR_ITEM/DIR_INDEX
4126  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4127  *              distinguish root_dir between normal dir/file
4128  * @name:       the name in the INODE_REF/INODE_EXTREF
4129  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4130  * @mode:       the st_mode of INODE_ITEM
4131  *
4132  * Return 0 if no error occurred.
4133  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4134  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4135  * dir/file.
4136  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4137  * not match for normal dir/file.
4138  */
4139 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4140                          struct btrfs_key *key, u64 index, char *name,
4141                          u32 namelen, u32 mode)
4142 {
4143         struct btrfs_path path;
4144         struct extent_buffer *node;
4145         struct btrfs_dir_item *di;
4146         struct btrfs_key location;
4147         char namebuf[BTRFS_NAME_LEN] = {0};
4148         u32 total;
4149         u32 cur = 0;
4150         u32 len;
4151         u32 name_len;
4152         u32 data_len;
4153         u8 filetype;
4154         int slot;
4155         int ret;
4156
4157         btrfs_init_path(&path);
4158         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4159         if (ret < 0) {
4160                 ret = DIR_ITEM_MISSING;
4161                 goto out;
4162         }
4163
4164         /* Process root dir and goto out*/
4165         if (index == 0) {
4166                 if (ret == 0) {
4167                         ret = ROOT_DIR_ERROR;
4168                         error(
4169                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4170                                 root->objectid,
4171                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4172                                         "REF" : "EXTREF",
4173                                 ref_key->objectid, ref_key->offset,
4174                                 key->type == BTRFS_DIR_ITEM_KEY ?
4175                                         "DIR_ITEM" : "DIR_INDEX");
4176                 } else {
4177                         ret = 0;
4178                 }
4179
4180                 goto out;
4181         }
4182
4183         /* Process normal file/dir */
4184         if (ret > 0) {
4185                 ret = DIR_ITEM_MISSING;
4186                 error(
4187                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4188                         root->objectid,
4189                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4190                         ref_key->objectid, ref_key->offset,
4191                         key->type == BTRFS_DIR_ITEM_KEY ?
4192                                 "DIR_ITEM" : "DIR_INDEX",
4193                         key->objectid, key->offset, namelen, name,
4194                         imode_to_type(mode));
4195                 goto out;
4196         }
4197
4198         /* Check whether inode_id/filetype/name match */
4199         node = path.nodes[0];
4200         slot = path.slots[0];
4201         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4202         total = btrfs_item_size_nr(node, slot);
4203         while (cur < total) {
4204                 ret = DIR_ITEM_MISMATCH;
4205                 name_len = btrfs_dir_name_len(node, di);
4206                 data_len = btrfs_dir_data_len(node, di);
4207
4208                 btrfs_dir_item_key_to_cpu(node, di, &location);
4209                 if (location.objectid != ref_key->objectid ||
4210                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4211                     location.offset != 0)
4212                         goto next;
4213
4214                 filetype = btrfs_dir_type(node, di);
4215                 if (imode_to_type(mode) != filetype)
4216                         goto next;
4217
4218                 if (name_len <= BTRFS_NAME_LEN) {
4219                         len = name_len;
4220                 } else {
4221                         len = BTRFS_NAME_LEN;
4222                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4223                         root->objectid,
4224                         key->type == BTRFS_DIR_ITEM_KEY ?
4225                         "DIR_ITEM" : "DIR_INDEX",
4226                         key->objectid, key->offset, name_len);
4227                 }
4228                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4229                 if (len != namelen || strncmp(namebuf, name, len))
4230                         goto next;
4231
4232                 ret = 0;
4233                 goto out;
4234 next:
4235                 len = sizeof(*di) + name_len + data_len;
4236                 di = (struct btrfs_dir_item *)((char *)di + len);
4237                 cur += len;
4238         }
4239         if (ret == DIR_ITEM_MISMATCH)
4240                 error(
4241                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4242                         root->objectid,
4243                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4244                         ref_key->objectid, ref_key->offset,
4245                         key->type == BTRFS_DIR_ITEM_KEY ?
4246                                 "DIR_ITEM" : "DIR_INDEX",
4247                         key->objectid, key->offset, namelen, name,
4248                         imode_to_type(mode));
4249 out:
4250         btrfs_release_path(&path);
4251         return ret;
4252 }
4253
4254 /*
4255  * Traverse the given INODE_REF and call find_dir_item() to find related
4256  * DIR_ITEM/DIR_INDEX.
4257  *
4258  * @root:       the root of the fs/file tree
4259  * @ref_key:    the key of the INODE_REF
4260  * @refs:       the count of INODE_REF
4261  * @mode:       the st_mode of INODE_ITEM
4262  *
4263  * Return 0 if no error occurred.
4264  */
4265 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4266                            struct extent_buffer *node, int slot, u64 *refs,
4267                            int mode)
4268 {
4269         struct btrfs_key key;
4270         struct btrfs_inode_ref *ref;
4271         char namebuf[BTRFS_NAME_LEN] = {0};
4272         u32 total;
4273         u32 cur = 0;
4274         u32 len;
4275         u32 name_len;
4276         u64 index;
4277         int ret, err = 0;
4278
4279         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4280         total = btrfs_item_size_nr(node, slot);
4281
4282 next:
4283         /* Update inode ref count */
4284         (*refs)++;
4285
4286         index = btrfs_inode_ref_index(node, ref);
4287         name_len = btrfs_inode_ref_name_len(node, ref);
4288         if (name_len <= BTRFS_NAME_LEN) {
4289                 len = name_len;
4290         } else {
4291                 len = BTRFS_NAME_LEN;
4292                 warning("root %llu INODE_REF[%llu %llu] name too long",
4293                         root->objectid, ref_key->objectid, ref_key->offset);
4294         }
4295
4296         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4297
4298         /* Check root dir ref name */
4299         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4300                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4301                       root->objectid, ref_key->objectid, ref_key->offset,
4302                       namebuf);
4303                 err |= ROOT_DIR_ERROR;
4304         }
4305
4306         /* Find related DIR_INDEX */
4307         key.objectid = ref_key->offset;
4308         key.type = BTRFS_DIR_INDEX_KEY;
4309         key.offset = index;
4310         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4311         err |= ret;
4312
4313         /* Find related dir_item */
4314         key.objectid = ref_key->offset;
4315         key.type = BTRFS_DIR_ITEM_KEY;
4316         key.offset = btrfs_name_hash(namebuf, len);
4317         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4318         err |= ret;
4319
4320         len = sizeof(*ref) + name_len;
4321         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4322         cur += len;
4323         if (cur < total)
4324                 goto next;
4325
4326         return err;
4327 }
4328
4329 /*
4330  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4331  * DIR_ITEM/DIR_INDEX.
4332  *
4333  * @root:       the root of the fs/file tree
4334  * @ref_key:    the key of the INODE_EXTREF
4335  * @refs:       the count of INODE_EXTREF
4336  * @mode:       the st_mode of INODE_ITEM
4337  *
4338  * Return 0 if no error occurred.
4339  */
4340 static int check_inode_extref(struct btrfs_root *root,
4341                               struct btrfs_key *ref_key,
4342                               struct extent_buffer *node, int slot, u64 *refs,
4343                               int mode)
4344 {
4345         struct btrfs_key key;
4346         struct btrfs_inode_extref *extref;
4347         char namebuf[BTRFS_NAME_LEN] = {0};
4348         u32 total;
4349         u32 cur = 0;
4350         u32 len;
4351         u32 name_len;
4352         u64 index;
4353         u64 parent;
4354         int ret;
4355         int err = 0;
4356
4357         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4358         total = btrfs_item_size_nr(node, slot);
4359
4360 next:
4361         /* update inode ref count */
4362         (*refs)++;
4363         name_len = btrfs_inode_extref_name_len(node, extref);
4364         index = btrfs_inode_extref_index(node, extref);
4365         parent = btrfs_inode_extref_parent(node, extref);
4366         if (name_len <= BTRFS_NAME_LEN) {
4367                 len = name_len;
4368         } else {
4369                 len = BTRFS_NAME_LEN;
4370                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4371                         root->objectid, ref_key->objectid, ref_key->offset);
4372         }
4373         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4374
4375         /* Check root dir ref name */
4376         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4377                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4378                       root->objectid, ref_key->objectid, ref_key->offset,
4379                       namebuf);
4380                 err |= ROOT_DIR_ERROR;
4381         }
4382
4383         /* find related dir_index */
4384         key.objectid = parent;
4385         key.type = BTRFS_DIR_INDEX_KEY;
4386         key.offset = index;
4387         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4388         err |= ret;
4389
4390         /* find related dir_item */
4391         key.objectid = parent;
4392         key.type = BTRFS_DIR_ITEM_KEY;
4393         key.offset = btrfs_name_hash(namebuf, len);
4394         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4395         err |= ret;
4396
4397         len = sizeof(*extref) + name_len;
4398         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4399         cur += len;
4400
4401         if (cur < total)
4402                 goto next;
4403
4404         return err;
4405 }
4406
4407 /*
4408  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4409  * DIR_ITEM/DIR_INDEX match.
4410  *
4411  * @root:       the root of the fs/file tree
4412  * @key:        the key of the INODE_REF/INODE_EXTREF
4413  * @name:       the name in the INODE_REF/INODE_EXTREF
4414  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4415  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4416  * to (u64)-1
4417  * @ext_ref:    the EXTENDED_IREF feature
4418  *
4419  * Return 0 if no error occurred.
4420  * Return >0 for error bitmap
4421  */
4422 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4423                           char *name, int namelen, u64 index,
4424                           unsigned int ext_ref)
4425 {
4426         struct btrfs_path path;
4427         struct btrfs_inode_ref *ref;
4428         struct btrfs_inode_extref *extref;
4429         struct extent_buffer *node;
4430         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4431         u32 total;
4432         u32 cur = 0;
4433         u32 len;
4434         u32 ref_namelen;
4435         u64 ref_index;
4436         u64 parent;
4437         u64 dir_id;
4438         int slot;
4439         int ret;
4440
4441         btrfs_init_path(&path);
4442         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4443         if (ret) {
4444                 ret = INODE_REF_MISSING;
4445                 goto extref;
4446         }
4447
4448         node = path.nodes[0];
4449         slot = path.slots[0];
4450
4451         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4452         total = btrfs_item_size_nr(node, slot);
4453
4454         /* Iterate all entry of INODE_REF */
4455         while (cur < total) {
4456                 ret = INODE_REF_MISSING;
4457
4458                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4459                 ref_index = btrfs_inode_ref_index(node, ref);
4460                 if (index != (u64)-1 && index != ref_index)
4461                         goto next_ref;
4462
4463                 if (ref_namelen <= BTRFS_NAME_LEN) {
4464                         len = ref_namelen;
4465                 } else {
4466                         len = BTRFS_NAME_LEN;
4467                         warning("root %llu INODE %s[%llu %llu] name too long",
4468                                 root->objectid,
4469                                 key->type == BTRFS_INODE_REF_KEY ?
4470                                         "REF" : "EXTREF",
4471                                 key->objectid, key->offset);
4472                 }
4473                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4474                                    len);
4475
4476                 if (len != namelen || strncmp(ref_namebuf, name, len))
4477                         goto next_ref;
4478
4479                 ret = 0;
4480                 goto out;
4481 next_ref:
4482                 len = sizeof(*ref) + ref_namelen;
4483                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4484                 cur += len;
4485         }
4486
4487 extref:
4488         /* Skip if not support EXTENDED_IREF feature */
4489         if (!ext_ref)
4490                 goto out;
4491
4492         btrfs_release_path(&path);
4493         btrfs_init_path(&path);
4494
4495         dir_id = key->offset;
4496         key->type = BTRFS_INODE_EXTREF_KEY;
4497         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4498
4499         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4500         if (ret) {
4501                 ret = INODE_REF_MISSING;
4502                 goto out;
4503         }
4504
4505         node = path.nodes[0];
4506         slot = path.slots[0];
4507
4508         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4509         cur = 0;
4510         total = btrfs_item_size_nr(node, slot);
4511
4512         /* Iterate all entry of INODE_EXTREF */
4513         while (cur < total) {
4514                 ret = INODE_REF_MISSING;
4515
4516                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4517                 ref_index = btrfs_inode_extref_index(node, extref);
4518                 parent = btrfs_inode_extref_parent(node, extref);
4519                 if (index != (u64)-1 && index != ref_index)
4520                         goto next_extref;
4521
4522                 if (parent != dir_id)
4523                         goto next_extref;
4524
4525                 if (ref_namelen <= BTRFS_NAME_LEN) {
4526                         len = ref_namelen;
4527                 } else {
4528                         len = BTRFS_NAME_LEN;
4529                         warning("root %llu INODE %s[%llu %llu] name too long",
4530                                 root->objectid,
4531                                 key->type == BTRFS_INODE_REF_KEY ?
4532                                         "REF" : "EXTREF",
4533                                 key->objectid, key->offset);
4534                 }
4535                 read_extent_buffer(node, ref_namebuf,
4536                                    (unsigned long)(extref + 1), len);
4537
4538                 if (len != namelen || strncmp(ref_namebuf, name, len))
4539                         goto next_extref;
4540
4541                 ret = 0;
4542                 goto out;
4543
4544 next_extref:
4545                 len = sizeof(*extref) + ref_namelen;
4546                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4547                 cur += len;
4548
4549         }
4550 out:
4551         btrfs_release_path(&path);
4552         return ret;
4553 }
4554
4555 /*
4556  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4557  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4558  *
4559  * @root:       the root of the fs/file tree
4560  * @key:        the key of the INODE_REF/INODE_EXTREF
4561  * @size:       the st_size of the INODE_ITEM
4562  * @ext_ref:    the EXTENDED_IREF feature
4563  *
4564  * Return 0 if no error occurred.
4565  */
4566 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4567                           struct extent_buffer *node, int slot, u64 *size,
4568                           unsigned int ext_ref)
4569 {
4570         struct btrfs_dir_item *di;
4571         struct btrfs_inode_item *ii;
4572         struct btrfs_path path;
4573         struct btrfs_key location;
4574         char namebuf[BTRFS_NAME_LEN] = {0};
4575         u32 total;
4576         u32 cur = 0;
4577         u32 len;
4578         u32 name_len;
4579         u32 data_len;
4580         u8 filetype;
4581         u32 mode;
4582         u64 index;
4583         int ret;
4584         int err = 0;
4585
4586         /*
4587          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4588          * ignore index check.
4589          */
4590         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4591
4592         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4593         total = btrfs_item_size_nr(node, slot);
4594
4595         while (cur < total) {
4596                 data_len = btrfs_dir_data_len(node, di);
4597                 if (data_len)
4598                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4599                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4600                               "DIR_ITEM" : "DIR_INDEX",
4601                               key->objectid, key->offset, data_len);
4602
4603                 name_len = btrfs_dir_name_len(node, di);
4604                 if (name_len <= BTRFS_NAME_LEN) {
4605                         len = name_len;
4606                 } else {
4607                         len = BTRFS_NAME_LEN;
4608                         warning("root %llu %s[%llu %llu] name too long",
4609                                 root->objectid,
4610                                 key->type == BTRFS_DIR_ITEM_KEY ?
4611                                 "DIR_ITEM" : "DIR_INDEX",
4612                                 key->objectid, key->offset);
4613                 }
4614                 (*size) += name_len;
4615
4616                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4617                 filetype = btrfs_dir_type(node, di);
4618
4619                 btrfs_init_path(&path);
4620                 btrfs_dir_item_key_to_cpu(node, di, &location);
4621
4622                 /* Ignore related ROOT_ITEM check */
4623                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4624                         goto next;
4625
4626                 /* Check relative INODE_ITEM(existence/filetype) */
4627                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4628                 if (ret) {
4629                         err |= INODE_ITEM_MISSING;
4630                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4631                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4632                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4633                               key->offset, location.objectid, name_len,
4634                               namebuf, filetype);
4635                         goto next;
4636                 }
4637
4638                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4639                                     struct btrfs_inode_item);
4640                 mode = btrfs_inode_mode(path.nodes[0], ii);
4641
4642                 if (imode_to_type(mode) != filetype) {
4643                         err |= INODE_ITEM_MISMATCH;
4644                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4645                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4646                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4647                               key->offset, name_len, namebuf, filetype);
4648                 }
4649
4650                 /* Check relative INODE_REF/INODE_EXTREF */
4651                 location.type = BTRFS_INODE_REF_KEY;
4652                 location.offset = key->objectid;
4653                 ret = find_inode_ref(root, &location, namebuf, len,
4654                                        index, ext_ref);
4655                 err |= ret;
4656                 if (ret & INODE_REF_MISSING)
4657                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4658                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4659                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4660                               key->offset, name_len, namebuf, filetype);
4661
4662 next:
4663                 btrfs_release_path(&path);
4664                 len = sizeof(*di) + name_len + data_len;
4665                 di = (struct btrfs_dir_item *)((char *)di + len);
4666                 cur += len;
4667
4668                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4669                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4670                               root->objectid, key->objectid, key->offset);
4671                         break;
4672                 }
4673         }
4674
4675         return err;
4676 }
4677
4678 /*
4679  * Check file extent datasum/hole, update the size of the file extents,
4680  * check and update the last offset of the file extent.
4681  *
4682  * @root:       the root of fs/file tree.
4683  * @fkey:       the key of the file extent.
4684  * @nodatasum:  INODE_NODATASUM feature.
4685  * @size:       the sum of all EXTENT_DATA items size for this inode.
4686  * @end:        the offset of the last extent.
4687  *
4688  * Return 0 if no error occurred.
4689  */
4690 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4691                              struct extent_buffer *node, int slot,
4692                              unsigned int nodatasum, u64 *size, u64 *end)
4693 {
4694         struct btrfs_file_extent_item *fi;
4695         u64 disk_bytenr;
4696         u64 disk_num_bytes;
4697         u64 extent_num_bytes;
4698         u64 found;
4699         unsigned int extent_type;
4700         unsigned int is_hole;
4701         int ret;
4702         int err = 0;
4703
4704         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4705
4706         extent_type = btrfs_file_extent_type(node, fi);
4707         /* Skip if file extent is inline */
4708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4709                 struct btrfs_item *e = btrfs_item_nr(slot);
4710                 u32 item_inline_len;
4711
4712                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4713                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4714                 if (extent_num_bytes == 0 ||
4715                     extent_num_bytes != item_inline_len)
4716                         err |= FILE_EXTENT_ERROR;
4717                 *size += extent_num_bytes;
4718                 return err;
4719         }
4720
4721         /* Check extent type */
4722         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4723                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4724                 err |= FILE_EXTENT_ERROR;
4725                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4726                       root->objectid, fkey->objectid, fkey->offset);
4727                 return err;
4728         }
4729
4730         /* Check REG_EXTENT/PREALLOC_EXTENT */
4731         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4732         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4733         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4734         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4735
4736         /* Check EXTENT_DATA datasum */
4737         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4738         if (found > 0 && nodatasum) {
4739                 err |= ODD_CSUM_ITEM;
4740                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4741                       root->objectid, fkey->objectid, fkey->offset);
4742         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4743                    !is_hole &&
4744                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4745                 err |= CSUM_ITEM_MISSING;
4746                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4747                       root->objectid, fkey->objectid, fkey->offset);
4748         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4749                 err |= ODD_CSUM_ITEM;
4750                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         }
4753
4754         /* Check EXTENT_DATA hole */
4755         if (no_holes && is_hole) {
4756                 err |= FILE_EXTENT_ERROR;
4757                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4758                       root->objectid, fkey->objectid, fkey->offset);
4759         } else if (!no_holes && *end != fkey->offset) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         }
4764
4765         *end += extent_num_bytes;
4766         if (!is_hole)
4767                 *size += extent_num_bytes;
4768
4769         return err;
4770 }
4771
4772 /*
4773  * Check INODE_ITEM and related ITEMs (the same inode number)
4774  * 1. check link count
4775  * 2. check inode ref/extref
4776  * 3. check dir item/index
4777  *
4778  * @ext_ref:    the EXTENDED_IREF feature
4779  *
4780  * Return 0 if no error occurred.
4781  * Return >0 for error or hit the traversal is done(by error bitmap)
4782  */
4783 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4784                             unsigned int ext_ref)
4785 {
4786         struct extent_buffer *node;
4787         struct btrfs_inode_item *ii;
4788         struct btrfs_key key;
4789         u64 inode_id;
4790         u32 mode;
4791         u64 nlink;
4792         u64 nbytes;
4793         u64 isize;
4794         u64 size = 0;
4795         u64 refs = 0;
4796         u64 extent_end = 0;
4797         u64 extent_size = 0;
4798         unsigned int dir;
4799         unsigned int nodatasum;
4800         int slot;
4801         int ret;
4802         int err = 0;
4803
4804         node = path->nodes[0];
4805         slot = path->slots[0];
4806
4807         btrfs_item_key_to_cpu(node, &key, slot);
4808         inode_id = key.objectid;
4809
4810         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4811                 ret = btrfs_next_item(root, path);
4812                 if (ret > 0)
4813                         err |= LAST_ITEM;
4814                 return err;
4815         }
4816
4817         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4818         isize = btrfs_inode_size(node, ii);
4819         nbytes = btrfs_inode_nbytes(node, ii);
4820         mode = btrfs_inode_mode(node, ii);
4821         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4822         nlink = btrfs_inode_nlink(node, ii);
4823         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4824
4825         while (1) {
4826                 ret = btrfs_next_item(root, path);
4827                 if (ret < 0) {
4828                         /* out will fill 'err' rusing current statistics */
4829                         goto out;
4830                 } else if (ret > 0) {
4831                         err |= LAST_ITEM;
4832                         goto out;
4833                 }
4834
4835                 node = path->nodes[0];
4836                 slot = path->slots[0];
4837                 btrfs_item_key_to_cpu(node, &key, slot);
4838                 if (key.objectid != inode_id)
4839                         goto out;
4840
4841                 switch (key.type) {
4842                 case BTRFS_INODE_REF_KEY:
4843                         ret = check_inode_ref(root, &key, node, slot, &refs,
4844                                               mode);
4845                         err |= ret;
4846                         break;
4847                 case BTRFS_INODE_EXTREF_KEY:
4848                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4849                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4850                                         root->objectid, key.objectid,
4851                                         key.offset);
4852                         ret = check_inode_extref(root, &key, node, slot, &refs,
4853                                                  mode);
4854                         err |= ret;
4855                         break;
4856                 case BTRFS_DIR_ITEM_KEY:
4857                 case BTRFS_DIR_INDEX_KEY:
4858                         if (!dir) {
4859                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4860                                         root->objectid, inode_id,
4861                                         imode_to_type(mode), key.objectid,
4862                                         key.offset);
4863                         }
4864                         ret = check_dir_item(root, &key, node, slot, &size,
4865                                              ext_ref);
4866                         err |= ret;
4867                         break;
4868                 case BTRFS_EXTENT_DATA_KEY:
4869                         if (dir) {
4870                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4871                                         root->objectid, inode_id, key.objectid,
4872                                         key.offset);
4873                         }
4874                         ret = check_file_extent(root, &key, node, slot,
4875                                                 nodatasum, &extent_size,
4876                                                 &extent_end);
4877                         err |= ret;
4878                         break;
4879                 case BTRFS_XATTR_ITEM_KEY:
4880                         break;
4881                 default:
4882                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4883                               key.objectid, key.type, key.offset);
4884                 }
4885         }
4886
4887 out:
4888         /* verify INODE_ITEM nlink/isize/nbytes */
4889         if (dir) {
4890                 if (nlink != 1) {
4891                         err |= LINK_COUNT_ERROR;
4892                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4893                               root->objectid, inode_id, nlink);
4894                 }
4895
4896                 /*
4897                  * Just a warning, as dir inode nbytes is just an
4898                  * instructive value.
4899                  */
4900                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4901                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4902                                 root->objectid, inode_id, root->nodesize);
4903                 }
4904
4905                 if (isize != size) {
4906                         err |= ISIZE_ERROR;
4907                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4908                               root->objectid, inode_id, isize, size);
4909                 }
4910         } else {
4911                 if (nlink != refs) {
4912                         err |= LINK_COUNT_ERROR;
4913                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4914                               root->objectid, inode_id, nlink, refs);
4915                 } else if (!nlink) {
4916                         err |= ORPHAN_ITEM;
4917                 }
4918
4919                 if (!nbytes && !no_holes && extent_end < isize) {
4920                         err |= NBYTES_ERROR;
4921                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4922                               root->objectid, inode_id, isize);
4923                 }
4924
4925                 if (nbytes != extent_size) {
4926                         err |= NBYTES_ERROR;
4927                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4928                               root->objectid, inode_id, nbytes, extent_size);
4929                 }
4930         }
4931
4932         return err;
4933 }
4934
4935 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4936 {
4937         struct btrfs_path path;
4938         struct btrfs_key key;
4939         int err = 0;
4940         int ret;
4941
4942         btrfs_init_path(&path);
4943         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4944         key.type = BTRFS_INODE_ITEM_KEY;
4945         key.offset = 0;
4946
4947         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4948         if (ret < 0)
4949                 goto out;
4950         if (ret > 0) {
4951                 ret = 0;
4952                 err |= INODE_ITEM_MISSING;
4953         }
4954
4955         err |= check_inode_item(root, &path, ext_ref);
4956         err &= ~LAST_ITEM;
4957         if (err && !ret)
4958                 ret = -EIO;
4959 out:
4960         btrfs_release_path(&path);
4961         return ret;
4962 }
4963
4964 /*
4965  * Iterate all item on the tree and call check_inode_item() to check.
4966  *
4967  * @root:       the root of the tree to be checked.
4968  * @ext_ref:    the EXTENDED_IREF feature
4969  *
4970  * Return 0 if no error found.
4971  * Return <0 for error.
4972  */
4973 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4974 {
4975         struct btrfs_path path;
4976         struct node_refs nrefs;
4977         struct btrfs_root_item *root_item = &root->root_item;
4978         int ret, wret;
4979         int level;
4980
4981         /*
4982          * We need to manually check the first inode item(256)
4983          * As the following traversal function will only start from
4984          * the first inode item in the leaf, if inode item(256) is missing
4985          * we will just skip it forever.
4986          */
4987         ret = check_fs_first_inode(root, ext_ref);
4988         if (ret < 0)
4989                 return ret;
4990
4991         memset(&nrefs, 0, sizeof(nrefs));
4992         level = btrfs_header_level(root->node);
4993         btrfs_init_path(&path);
4994
4995         if (btrfs_root_refs(root_item) > 0 ||
4996             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4997                 path.nodes[level] = root->node;
4998                 path.slots[level] = 0;
4999                 extent_buffer_get(root->node);
5000         } else {
5001                 struct btrfs_key key;
5002
5003                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5004                 level = root_item->drop_level;
5005                 path.lowest_level = level;
5006                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5007                 if (ret < 0)
5008                         goto out;
5009                 ret = 0;
5010         }
5011
5012         while (1) {
5013                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5014                 if (wret < 0)
5015                         ret = wret;
5016                 if (wret != 0)
5017                         break;
5018
5019                 wret = walk_up_tree_v2(root, &path, &level);
5020                 if (wret < 0)
5021                         ret = wret;
5022                 if (wret != 0)
5023                         break;
5024         }
5025
5026 out:
5027         btrfs_release_path(&path);
5028         return ret;
5029 }
5030
5031 /*
5032  * Find the relative ref for root_ref and root_backref.
5033  *
5034  * @root:       the root of the root tree.
5035  * @ref_key:    the key of the root ref.
5036  *
5037  * Return 0 if no error occurred.
5038  */
5039 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5040                           struct extent_buffer *node, int slot)
5041 {
5042         struct btrfs_path path;
5043         struct btrfs_key key;
5044         struct btrfs_root_ref *ref;
5045         struct btrfs_root_ref *backref;
5046         char ref_name[BTRFS_NAME_LEN] = {0};
5047         char backref_name[BTRFS_NAME_LEN] = {0};
5048         u64 ref_dirid;
5049         u64 ref_seq;
5050         u32 ref_namelen;
5051         u64 backref_dirid;
5052         u64 backref_seq;
5053         u32 backref_namelen;
5054         u32 len;
5055         int ret;
5056         int err = 0;
5057
5058         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5059         ref_dirid = btrfs_root_ref_dirid(node, ref);
5060         ref_seq = btrfs_root_ref_sequence(node, ref);
5061         ref_namelen = btrfs_root_ref_name_len(node, ref);
5062
5063         if (ref_namelen <= BTRFS_NAME_LEN) {
5064                 len = ref_namelen;
5065         } else {
5066                 len = BTRFS_NAME_LEN;
5067                 warning("%s[%llu %llu] ref_name too long",
5068                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5069                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5070                         ref_key->offset);
5071         }
5072         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5073
5074         /* Find relative root_ref */
5075         key.objectid = ref_key->offset;
5076         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5077         key.offset = ref_key->objectid;
5078
5079         btrfs_init_path(&path);
5080         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5081         if (ret) {
5082                 err |= ROOT_REF_MISSING;
5083                 error("%s[%llu %llu] couldn't find relative ref",
5084                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5085                       "ROOT_REF" : "ROOT_BACKREF",
5086                       ref_key->objectid, ref_key->offset);
5087                 goto out;
5088         }
5089
5090         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5091                                  struct btrfs_root_ref);
5092         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5093         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5094         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5095
5096         if (backref_namelen <= BTRFS_NAME_LEN) {
5097                 len = backref_namelen;
5098         } else {
5099                 len = BTRFS_NAME_LEN;
5100                 warning("%s[%llu %llu] ref_name too long",
5101                         key.type == BTRFS_ROOT_REF_KEY ?
5102                         "ROOT_REF" : "ROOT_BACKREF",
5103                         key.objectid, key.offset);
5104         }
5105         read_extent_buffer(path.nodes[0], backref_name,
5106                            (unsigned long)(backref + 1), len);
5107
5108         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5109             ref_namelen != backref_namelen ||
5110             strncmp(ref_name, backref_name, len)) {
5111                 err |= ROOT_REF_MISMATCH;
5112                 error("%s[%llu %llu] mismatch relative ref",
5113                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5114                       "ROOT_REF" : "ROOT_BACKREF",
5115                       ref_key->objectid, ref_key->offset);
5116         }
5117 out:
5118         btrfs_release_path(&path);
5119         return err;
5120 }
5121
5122 /*
5123  * Check all fs/file tree in low_memory mode.
5124  *
5125  * 1. for fs tree root item, call check_fs_root_v2()
5126  * 2. for fs tree root ref/backref, call check_root_ref()
5127  *
5128  * Return 0 if no error occurred.
5129  */
5130 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5131 {
5132         struct btrfs_root *tree_root = fs_info->tree_root;
5133         struct btrfs_root *cur_root = NULL;
5134         struct btrfs_path *path;
5135         struct btrfs_key key;
5136         struct extent_buffer *node;
5137         unsigned int ext_ref;
5138         int slot;
5139         int ret;
5140         int err = 0;
5141
5142         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5143
5144         path = btrfs_alloc_path();
5145         if (!path)
5146                 return -ENOMEM;
5147
5148         key.objectid = BTRFS_FS_TREE_OBJECTID;
5149         key.offset = 0;
5150         key.type = BTRFS_ROOT_ITEM_KEY;
5151
5152         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
5153         if (ret < 0) {
5154                 err = ret;
5155                 goto out;
5156         } else if (ret > 0) {
5157                 err = -ENOENT;
5158                 goto out;
5159         }
5160
5161         while (1) {
5162                 node = path->nodes[0];
5163                 slot = path->slots[0];
5164                 btrfs_item_key_to_cpu(node, &key, slot);
5165                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5166                         goto out;
5167                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5168                     fs_root_objectid(key.objectid)) {
5169                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5170                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5171                                                                        &key);
5172                         } else {
5173                                 key.offset = (u64)-1;
5174                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5175                         }
5176
5177                         if (IS_ERR(cur_root)) {
5178                                 error("Fail to read fs/subvol tree: %lld",
5179                                       key.objectid);
5180                                 err = -EIO;
5181                                 goto next;
5182                         }
5183
5184                         ret = check_fs_root_v2(cur_root, ext_ref);
5185                         err |= ret;
5186
5187                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5188                                 btrfs_free_fs_root(cur_root);
5189                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5190                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5191                         ret = check_root_ref(tree_root, &key, node, slot);
5192                         err |= ret;
5193                 }
5194 next:
5195                 ret = btrfs_next_item(tree_root, path);
5196                 if (ret > 0)
5197                         goto out;
5198                 if (ret < 0) {
5199                         err = ret;
5200                         goto out;
5201                 }
5202         }
5203
5204 out:
5205         btrfs_free_path(path);
5206         return err;
5207 }
5208
5209 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5210 {
5211         struct list_head *cur = rec->backrefs.next;
5212         struct extent_backref *back;
5213         struct tree_backref *tback;
5214         struct data_backref *dback;
5215         u64 found = 0;
5216         int err = 0;
5217
5218         while(cur != &rec->backrefs) {
5219                 back = to_extent_backref(cur);
5220                 cur = cur->next;
5221                 if (!back->found_extent_tree) {
5222                         err = 1;
5223                         if (!print_errs)
5224                                 goto out;
5225                         if (back->is_data) {
5226                                 dback = to_data_backref(back);
5227                                 fprintf(stderr, "Backref %llu %s %llu"
5228                                         " owner %llu offset %llu num_refs %lu"
5229                                         " not found in extent tree\n",
5230                                         (unsigned long long)rec->start,
5231                                         back->full_backref ?
5232                                         "parent" : "root",
5233                                         back->full_backref ?
5234                                         (unsigned long long)dback->parent:
5235                                         (unsigned long long)dback->root,
5236                                         (unsigned long long)dback->owner,
5237                                         (unsigned long long)dback->offset,
5238                                         (unsigned long)dback->num_refs);
5239                         } else {
5240                                 tback = to_tree_backref(back);
5241                                 fprintf(stderr, "Backref %llu parent %llu"
5242                                         " root %llu not found in extent tree\n",
5243                                         (unsigned long long)rec->start,
5244                                         (unsigned long long)tback->parent,
5245                                         (unsigned long long)tback->root);
5246                         }
5247                 }
5248                 if (!back->is_data && !back->found_ref) {
5249                         err = 1;
5250                         if (!print_errs)
5251                                 goto out;
5252                         tback = to_tree_backref(back);
5253                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5254                                 (unsigned long long)rec->start,
5255                                 back->full_backref ? "parent" : "root",
5256                                 back->full_backref ?
5257                                 (unsigned long long)tback->parent :
5258                                 (unsigned long long)tback->root, back);
5259                 }
5260                 if (back->is_data) {
5261                         dback = to_data_backref(back);
5262                         if (dback->found_ref != dback->num_refs) {
5263                                 err = 1;
5264                                 if (!print_errs)
5265                                         goto out;
5266                                 fprintf(stderr, "Incorrect local backref count"
5267                                         " on %llu %s %llu owner %llu"
5268                                         " offset %llu found %u wanted %u back %p\n",
5269                                         (unsigned long long)rec->start,
5270                                         back->full_backref ?
5271                                         "parent" : "root",
5272                                         back->full_backref ?
5273                                         (unsigned long long)dback->parent:
5274                                         (unsigned long long)dback->root,
5275                                         (unsigned long long)dback->owner,
5276                                         (unsigned long long)dback->offset,
5277                                         dback->found_ref, dback->num_refs, back);
5278                         }
5279                         if (dback->disk_bytenr != rec->start) {
5280                                 err = 1;
5281                                 if (!print_errs)
5282                                         goto out;
5283                                 fprintf(stderr, "Backref disk bytenr does not"
5284                                         " match extent record, bytenr=%llu, "
5285                                         "ref bytenr=%llu\n",
5286                                         (unsigned long long)rec->start,
5287                                         (unsigned long long)dback->disk_bytenr);
5288                         }
5289
5290                         if (dback->bytes != rec->nr) {
5291                                 err = 1;
5292                                 if (!print_errs)
5293                                         goto out;
5294                                 fprintf(stderr, "Backref bytes do not match "
5295                                         "extent backref, bytenr=%llu, ref "
5296                                         "bytes=%llu, backref bytes=%llu\n",
5297                                         (unsigned long long)rec->start,
5298                                         (unsigned long long)rec->nr,
5299                                         (unsigned long long)dback->bytes);
5300                         }
5301                 }
5302                 if (!back->is_data) {
5303                         found += 1;
5304                 } else {
5305                         dback = to_data_backref(back);
5306                         found += dback->found_ref;
5307                 }
5308         }
5309         if (found != rec->refs) {
5310                 err = 1;
5311                 if (!print_errs)
5312                         goto out;
5313                 fprintf(stderr, "Incorrect global backref count "
5314                         "on %llu found %llu wanted %llu\n",
5315                         (unsigned long long)rec->start,
5316                         (unsigned long long)found,
5317                         (unsigned long long)rec->refs);
5318         }
5319 out:
5320         return err;
5321 }
5322
5323 static int free_all_extent_backrefs(struct extent_record *rec)
5324 {
5325         struct extent_backref *back;
5326         struct list_head *cur;
5327         while (!list_empty(&rec->backrefs)) {
5328                 cur = rec->backrefs.next;
5329                 back = to_extent_backref(cur);
5330                 list_del(cur);
5331                 free(back);
5332         }
5333         return 0;
5334 }
5335
5336 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5337                                      struct cache_tree *extent_cache)
5338 {
5339         struct cache_extent *cache;
5340         struct extent_record *rec;
5341
5342         while (1) {
5343                 cache = first_cache_extent(extent_cache);
5344                 if (!cache)
5345                         break;
5346                 rec = container_of(cache, struct extent_record, cache);
5347                 remove_cache_extent(extent_cache, cache);
5348                 free_all_extent_backrefs(rec);
5349                 free(rec);
5350         }
5351 }
5352
5353 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5354                                  struct extent_record *rec)
5355 {
5356         if (rec->content_checked && rec->owner_ref_checked &&
5357             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5358             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5359             !rec->bad_full_backref && !rec->crossing_stripes &&
5360             !rec->wrong_chunk_type) {
5361                 remove_cache_extent(extent_cache, &rec->cache);
5362                 free_all_extent_backrefs(rec);
5363                 list_del_init(&rec->list);
5364                 free(rec);
5365         }
5366         return 0;
5367 }
5368
5369 static int check_owner_ref(struct btrfs_root *root,
5370                             struct extent_record *rec,
5371                             struct extent_buffer *buf)
5372 {
5373         struct extent_backref *node;
5374         struct tree_backref *back;
5375         struct btrfs_root *ref_root;
5376         struct btrfs_key key;
5377         struct btrfs_path path;
5378         struct extent_buffer *parent;
5379         int level;
5380         int found = 0;
5381         int ret;
5382
5383         list_for_each_entry(node, &rec->backrefs, list) {
5384                 if (node->is_data)
5385                         continue;
5386                 if (!node->found_ref)
5387                         continue;
5388                 if (node->full_backref)
5389                         continue;
5390                 back = to_tree_backref(node);
5391                 if (btrfs_header_owner(buf) == back->root)
5392                         return 0;
5393         }
5394         BUG_ON(rec->is_root);
5395
5396         /* try to find the block by search corresponding fs tree */
5397         key.objectid = btrfs_header_owner(buf);
5398         key.type = BTRFS_ROOT_ITEM_KEY;
5399         key.offset = (u64)-1;
5400
5401         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5402         if (IS_ERR(ref_root))
5403                 return 1;
5404
5405         level = btrfs_header_level(buf);
5406         if (level == 0)
5407                 btrfs_item_key_to_cpu(buf, &key, 0);
5408         else
5409                 btrfs_node_key_to_cpu(buf, &key, 0);
5410
5411         btrfs_init_path(&path);
5412         path.lowest_level = level + 1;
5413         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5414         if (ret < 0)
5415                 return 0;
5416
5417         parent = path.nodes[level + 1];
5418         if (parent && buf->start == btrfs_node_blockptr(parent,
5419                                                         path.slots[level + 1]))
5420                 found = 1;
5421
5422         btrfs_release_path(&path);
5423         return found ? 0 : 1;
5424 }
5425
5426 static int is_extent_tree_record(struct extent_record *rec)
5427 {
5428         struct list_head *cur = rec->backrefs.next;
5429         struct extent_backref *node;
5430         struct tree_backref *back;
5431         int is_extent = 0;
5432
5433         while(cur != &rec->backrefs) {
5434                 node = to_extent_backref(cur);
5435                 cur = cur->next;
5436                 if (node->is_data)
5437                         return 0;
5438                 back = to_tree_backref(node);
5439                 if (node->full_backref)
5440                         return 0;
5441                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5442                         is_extent = 1;
5443         }
5444         return is_extent;
5445 }
5446
5447
5448 static int record_bad_block_io(struct btrfs_fs_info *info,
5449                                struct cache_tree *extent_cache,
5450                                u64 start, u64 len)
5451 {
5452         struct extent_record *rec;
5453         struct cache_extent *cache;
5454         struct btrfs_key key;
5455
5456         cache = lookup_cache_extent(extent_cache, start, len);
5457         if (!cache)
5458                 return 0;
5459
5460         rec = container_of(cache, struct extent_record, cache);
5461         if (!is_extent_tree_record(rec))
5462                 return 0;
5463
5464         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5465         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5466 }
5467
5468 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5469                        struct extent_buffer *buf, int slot)
5470 {
5471         if (btrfs_header_level(buf)) {
5472                 struct btrfs_key_ptr ptr1, ptr2;
5473
5474                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5475                                    sizeof(struct btrfs_key_ptr));
5476                 read_extent_buffer(buf, &ptr2,
5477                                    btrfs_node_key_ptr_offset(slot + 1),
5478                                    sizeof(struct btrfs_key_ptr));
5479                 write_extent_buffer(buf, &ptr1,
5480                                     btrfs_node_key_ptr_offset(slot + 1),
5481                                     sizeof(struct btrfs_key_ptr));
5482                 write_extent_buffer(buf, &ptr2,
5483                                     btrfs_node_key_ptr_offset(slot),
5484                                     sizeof(struct btrfs_key_ptr));
5485                 if (slot == 0) {
5486                         struct btrfs_disk_key key;
5487                         btrfs_node_key(buf, &key, 0);
5488                         btrfs_fixup_low_keys(root, path, &key,
5489                                              btrfs_header_level(buf) + 1);
5490                 }
5491         } else {
5492                 struct btrfs_item *item1, *item2;
5493                 struct btrfs_key k1, k2;
5494                 char *item1_data, *item2_data;
5495                 u32 item1_offset, item2_offset, item1_size, item2_size;
5496
5497                 item1 = btrfs_item_nr(slot);
5498                 item2 = btrfs_item_nr(slot + 1);
5499                 btrfs_item_key_to_cpu(buf, &k1, slot);
5500                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5501                 item1_offset = btrfs_item_offset(buf, item1);
5502                 item2_offset = btrfs_item_offset(buf, item2);
5503                 item1_size = btrfs_item_size(buf, item1);
5504                 item2_size = btrfs_item_size(buf, item2);
5505
5506                 item1_data = malloc(item1_size);
5507                 if (!item1_data)
5508                         return -ENOMEM;
5509                 item2_data = malloc(item2_size);
5510                 if (!item2_data) {
5511                         free(item1_data);
5512                         return -ENOMEM;
5513                 }
5514
5515                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5516                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5517
5518                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5519                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5520                 free(item1_data);
5521                 free(item2_data);
5522
5523                 btrfs_set_item_offset(buf, item1, item2_offset);
5524                 btrfs_set_item_offset(buf, item2, item1_offset);
5525                 btrfs_set_item_size(buf, item1, item2_size);
5526                 btrfs_set_item_size(buf, item2, item1_size);
5527
5528                 path->slots[0] = slot;
5529                 btrfs_set_item_key_unsafe(root, path, &k2);
5530                 path->slots[0] = slot + 1;
5531                 btrfs_set_item_key_unsafe(root, path, &k1);
5532         }
5533         return 0;
5534 }
5535
5536 static int fix_key_order(struct btrfs_trans_handle *trans,
5537                          struct btrfs_root *root,
5538                          struct btrfs_path *path)
5539 {
5540         struct extent_buffer *buf;
5541         struct btrfs_key k1, k2;
5542         int i;
5543         int level = path->lowest_level;
5544         int ret = -EIO;
5545
5546         buf = path->nodes[level];
5547         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5548                 if (level) {
5549                         btrfs_node_key_to_cpu(buf, &k1, i);
5550                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5551                 } else {
5552                         btrfs_item_key_to_cpu(buf, &k1, i);
5553                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5554                 }
5555                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5556                         continue;
5557                 ret = swap_values(root, path, buf, i);
5558                 if (ret)
5559                         break;
5560                 btrfs_mark_buffer_dirty(buf);
5561                 i = 0;
5562         }
5563         return ret;
5564 }
5565
5566 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5567                              struct btrfs_root *root,
5568                              struct btrfs_path *path,
5569                              struct extent_buffer *buf, int slot)
5570 {
5571         struct btrfs_key key;
5572         int nritems = btrfs_header_nritems(buf);
5573
5574         btrfs_item_key_to_cpu(buf, &key, slot);
5575
5576         /* These are all the keys we can deal with missing. */
5577         if (key.type != BTRFS_DIR_INDEX_KEY &&
5578             key.type != BTRFS_EXTENT_ITEM_KEY &&
5579             key.type != BTRFS_METADATA_ITEM_KEY &&
5580             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5581             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5582                 return -1;
5583
5584         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5585                (unsigned long long)key.objectid, key.type,
5586                (unsigned long long)key.offset, slot, buf->start);
5587         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5588                               btrfs_item_nr_offset(slot + 1),
5589                               sizeof(struct btrfs_item) *
5590                               (nritems - slot - 1));
5591         btrfs_set_header_nritems(buf, nritems - 1);
5592         if (slot == 0) {
5593                 struct btrfs_disk_key disk_key;
5594
5595                 btrfs_item_key(buf, &disk_key, 0);
5596                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5597         }
5598         btrfs_mark_buffer_dirty(buf);
5599         return 0;
5600 }
5601
5602 static int fix_item_offset(struct btrfs_trans_handle *trans,
5603                            struct btrfs_root *root,
5604                            struct btrfs_path *path)
5605 {
5606         struct extent_buffer *buf;
5607         int i;
5608         int ret = 0;
5609
5610         /* We should only get this for leaves */
5611         BUG_ON(path->lowest_level);
5612         buf = path->nodes[0];
5613 again:
5614         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5615                 unsigned int shift = 0, offset;
5616
5617                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5618                     BTRFS_LEAF_DATA_SIZE(root)) {
5619                         if (btrfs_item_end_nr(buf, i) >
5620                             BTRFS_LEAF_DATA_SIZE(root)) {
5621                                 ret = delete_bogus_item(trans, root, path,
5622                                                         buf, i);
5623                                 if (!ret)
5624                                         goto again;
5625                                 fprintf(stderr, "item is off the end of the "
5626                                         "leaf, can't fix\n");
5627                                 ret = -EIO;
5628                                 break;
5629                         }
5630                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5631                                 btrfs_item_end_nr(buf, i);
5632                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5633                            btrfs_item_offset_nr(buf, i - 1)) {
5634                         if (btrfs_item_end_nr(buf, i) >
5635                             btrfs_item_offset_nr(buf, i - 1)) {
5636                                 ret = delete_bogus_item(trans, root, path,
5637                                                         buf, i);
5638                                 if (!ret)
5639                                         goto again;
5640                                 fprintf(stderr, "items overlap, can't fix\n");
5641                                 ret = -EIO;
5642                                 break;
5643                         }
5644                         shift = btrfs_item_offset_nr(buf, i - 1) -
5645                                 btrfs_item_end_nr(buf, i);
5646                 }
5647                 if (!shift)
5648                         continue;
5649
5650                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5651                        i, shift, (unsigned long long)buf->start);
5652                 offset = btrfs_item_offset_nr(buf, i);
5653                 memmove_extent_buffer(buf,
5654                                       btrfs_leaf_data(buf) + offset + shift,
5655                                       btrfs_leaf_data(buf) + offset,
5656                                       btrfs_item_size_nr(buf, i));
5657                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5658                                       offset + shift);
5659                 btrfs_mark_buffer_dirty(buf);
5660         }
5661
5662         /*
5663          * We may have moved things, in which case we want to exit so we don't
5664          * write those changes out.  Once we have proper abort functionality in
5665          * progs this can be changed to something nicer.
5666          */
5667         BUG_ON(ret);
5668         return ret;
5669 }
5670
5671 /*
5672  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5673  * then just return -EIO.
5674  */
5675 static int try_to_fix_bad_block(struct btrfs_root *root,
5676                                 struct extent_buffer *buf,
5677                                 enum btrfs_tree_block_status status)
5678 {
5679         struct btrfs_trans_handle *trans;
5680         struct ulist *roots;
5681         struct ulist_node *node;
5682         struct btrfs_root *search_root;
5683         struct btrfs_path path;
5684         struct ulist_iterator iter;
5685         struct btrfs_key root_key, key;
5686         int ret;
5687
5688         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5689             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5690                 return -EIO;
5691
5692         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5693         if (ret)
5694                 return -EIO;
5695
5696         btrfs_init_path(&path);
5697         ULIST_ITER_INIT(&iter);
5698         while ((node = ulist_next(roots, &iter))) {
5699                 root_key.objectid = node->val;
5700                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5701                 root_key.offset = (u64)-1;
5702
5703                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5704                 if (IS_ERR(root)) {
5705                         ret = -EIO;
5706                         break;
5707                 }
5708
5709
5710                 trans = btrfs_start_transaction(search_root, 0);
5711                 if (IS_ERR(trans)) {
5712                         ret = PTR_ERR(trans);
5713                         break;
5714                 }
5715
5716                 path.lowest_level = btrfs_header_level(buf);
5717                 path.skip_check_block = 1;
5718                 if (path.lowest_level)
5719                         btrfs_node_key_to_cpu(buf, &key, 0);
5720                 else
5721                         btrfs_item_key_to_cpu(buf, &key, 0);
5722                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5723                 if (ret) {
5724                         ret = -EIO;
5725                         btrfs_commit_transaction(trans, search_root);
5726                         break;
5727                 }
5728                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5729                         ret = fix_key_order(trans, search_root, &path);
5730                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5731                         ret = fix_item_offset(trans, search_root, &path);
5732                 if (ret) {
5733                         btrfs_commit_transaction(trans, search_root);
5734                         break;
5735                 }
5736                 btrfs_release_path(&path);
5737                 btrfs_commit_transaction(trans, search_root);
5738         }
5739         ulist_free(roots);
5740         btrfs_release_path(&path);
5741         return ret;
5742 }
5743
5744 static int check_block(struct btrfs_root *root,
5745                        struct cache_tree *extent_cache,
5746                        struct extent_buffer *buf, u64 flags)
5747 {
5748         struct extent_record *rec;
5749         struct cache_extent *cache;
5750         struct btrfs_key key;
5751         enum btrfs_tree_block_status status;
5752         int ret = 0;
5753         int level;
5754
5755         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5756         if (!cache)
5757                 return 1;
5758         rec = container_of(cache, struct extent_record, cache);
5759         rec->generation = btrfs_header_generation(buf);
5760
5761         level = btrfs_header_level(buf);
5762         if (btrfs_header_nritems(buf) > 0) {
5763
5764                 if (level == 0)
5765                         btrfs_item_key_to_cpu(buf, &key, 0);
5766                 else
5767                         btrfs_node_key_to_cpu(buf, &key, 0);
5768
5769                 rec->info_objectid = key.objectid;
5770         }
5771         rec->info_level = level;
5772
5773         if (btrfs_is_leaf(buf))
5774                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5775         else
5776                 status = btrfs_check_node(root, &rec->parent_key, buf);
5777
5778         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5779                 if (repair)
5780                         status = try_to_fix_bad_block(root, buf, status);
5781                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5782                         ret = -EIO;
5783                         fprintf(stderr, "bad block %llu\n",
5784                                 (unsigned long long)buf->start);
5785                 } else {
5786                         /*
5787                          * Signal to callers we need to start the scan over
5788                          * again since we'll have cowed blocks.
5789                          */
5790                         ret = -EAGAIN;
5791                 }
5792         } else {
5793                 rec->content_checked = 1;
5794                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5795                         rec->owner_ref_checked = 1;
5796                 else {
5797                         ret = check_owner_ref(root, rec, buf);
5798                         if (!ret)
5799                                 rec->owner_ref_checked = 1;
5800                 }
5801         }
5802         if (!ret)
5803                 maybe_free_extent_rec(extent_cache, rec);
5804         return ret;
5805 }
5806
5807 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5808                                                 u64 parent, u64 root)
5809 {
5810         struct list_head *cur = rec->backrefs.next;
5811         struct extent_backref *node;
5812         struct tree_backref *back;
5813
5814         while(cur != &rec->backrefs) {
5815                 node = to_extent_backref(cur);
5816                 cur = cur->next;
5817                 if (node->is_data)
5818                         continue;
5819                 back = to_tree_backref(node);
5820                 if (parent > 0) {
5821                         if (!node->full_backref)
5822                                 continue;
5823                         if (parent == back->parent)
5824                                 return back;
5825                 } else {
5826                         if (node->full_backref)
5827                                 continue;
5828                         if (back->root == root)
5829                                 return back;
5830                 }
5831         }
5832         return NULL;
5833 }
5834
5835 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5836                                                 u64 parent, u64 root)
5837 {
5838         struct tree_backref *ref = malloc(sizeof(*ref));
5839
5840         if (!ref)
5841                 return NULL;
5842         memset(&ref->node, 0, sizeof(ref->node));
5843         if (parent > 0) {
5844                 ref->parent = parent;
5845                 ref->node.full_backref = 1;
5846         } else {
5847                 ref->root = root;
5848                 ref->node.full_backref = 0;
5849         }
5850         list_add_tail(&ref->node.list, &rec->backrefs);
5851
5852         return ref;
5853 }
5854
5855 static struct data_backref *find_data_backref(struct extent_record *rec,
5856                                                 u64 parent, u64 root,
5857                                                 u64 owner, u64 offset,
5858                                                 int found_ref,
5859                                                 u64 disk_bytenr, u64 bytes)
5860 {
5861         struct list_head *cur = rec->backrefs.next;
5862         struct extent_backref *node;
5863         struct data_backref *back;
5864
5865         while(cur != &rec->backrefs) {
5866                 node = to_extent_backref(cur);
5867                 cur = cur->next;
5868                 if (!node->is_data)
5869                         continue;
5870                 back = to_data_backref(node);
5871                 if (parent > 0) {
5872                         if (!node->full_backref)
5873                                 continue;
5874                         if (parent == back->parent)
5875                                 return back;
5876                 } else {
5877                         if (node->full_backref)
5878                                 continue;
5879                         if (back->root == root && back->owner == owner &&
5880                             back->offset == offset) {
5881                                 if (found_ref && node->found_ref &&
5882                                     (back->bytes != bytes ||
5883                                     back->disk_bytenr != disk_bytenr))
5884                                         continue;
5885                                 return back;
5886                         }
5887                 }
5888         }
5889         return NULL;
5890 }
5891
5892 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5893                                                 u64 parent, u64 root,
5894                                                 u64 owner, u64 offset,
5895                                                 u64 max_size)
5896 {
5897         struct data_backref *ref = malloc(sizeof(*ref));
5898
5899         if (!ref)
5900                 return NULL;
5901         memset(&ref->node, 0, sizeof(ref->node));
5902         ref->node.is_data = 1;
5903
5904         if (parent > 0) {
5905                 ref->parent = parent;
5906                 ref->owner = 0;
5907                 ref->offset = 0;
5908                 ref->node.full_backref = 1;
5909         } else {
5910                 ref->root = root;
5911                 ref->owner = owner;
5912                 ref->offset = offset;
5913                 ref->node.full_backref = 0;
5914         }
5915         ref->bytes = max_size;
5916         ref->found_ref = 0;
5917         ref->num_refs = 0;
5918         list_add_tail(&ref->node.list, &rec->backrefs);
5919         if (max_size > rec->max_size)
5920                 rec->max_size = max_size;
5921         return ref;
5922 }
5923
5924 /* Check if the type of extent matches with its chunk */
5925 static void check_extent_type(struct extent_record *rec)
5926 {
5927         struct btrfs_block_group_cache *bg_cache;
5928
5929         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5930         if (!bg_cache)
5931                 return;
5932
5933         /* data extent, check chunk directly*/
5934         if (!rec->metadata) {
5935                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5936                         rec->wrong_chunk_type = 1;
5937                 return;
5938         }
5939
5940         /* metadata extent, check the obvious case first */
5941         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5942                                  BTRFS_BLOCK_GROUP_METADATA))) {
5943                 rec->wrong_chunk_type = 1;
5944                 return;
5945         }
5946
5947         /*
5948          * Check SYSTEM extent, as it's also marked as metadata, we can only
5949          * make sure it's a SYSTEM extent by its backref
5950          */
5951         if (!list_empty(&rec->backrefs)) {
5952                 struct extent_backref *node;
5953                 struct tree_backref *tback;
5954                 u64 bg_type;
5955
5956                 node = to_extent_backref(rec->backrefs.next);
5957                 if (node->is_data) {
5958                         /* tree block shouldn't have data backref */
5959                         rec->wrong_chunk_type = 1;
5960                         return;
5961                 }
5962                 tback = container_of(node, struct tree_backref, node);
5963
5964                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5965                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5966                 else
5967                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5968                 if (!(bg_cache->flags & bg_type))
5969                         rec->wrong_chunk_type = 1;
5970         }
5971 }
5972
5973 /*
5974  * Allocate a new extent record, fill default values from @tmpl and insert int
5975  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5976  * the cache, otherwise it fails.
5977  */
5978 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5979                 struct extent_record *tmpl)
5980 {
5981         struct extent_record *rec;
5982         int ret = 0;
5983
5984         rec = malloc(sizeof(*rec));
5985         if (!rec)
5986                 return -ENOMEM;
5987         rec->start = tmpl->start;
5988         rec->max_size = tmpl->max_size;
5989         rec->nr = max(tmpl->nr, tmpl->max_size);
5990         rec->found_rec = tmpl->found_rec;
5991         rec->content_checked = tmpl->content_checked;
5992         rec->owner_ref_checked = tmpl->owner_ref_checked;
5993         rec->num_duplicates = 0;
5994         rec->metadata = tmpl->metadata;
5995         rec->flag_block_full_backref = FLAG_UNSET;
5996         rec->bad_full_backref = 0;
5997         rec->crossing_stripes = 0;
5998         rec->wrong_chunk_type = 0;
5999         rec->is_root = tmpl->is_root;
6000         rec->refs = tmpl->refs;
6001         rec->extent_item_refs = tmpl->extent_item_refs;
6002         rec->parent_generation = tmpl->parent_generation;
6003         INIT_LIST_HEAD(&rec->backrefs);
6004         INIT_LIST_HEAD(&rec->dups);
6005         INIT_LIST_HEAD(&rec->list);
6006         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6007         rec->cache.start = tmpl->start;
6008         rec->cache.size = tmpl->nr;
6009         ret = insert_cache_extent(extent_cache, &rec->cache);
6010         if (ret) {
6011                 free(rec);
6012                 return ret;
6013         }
6014         bytes_used += rec->nr;
6015
6016         if (tmpl->metadata)
6017                 rec->crossing_stripes = check_crossing_stripes(global_info,
6018                                 rec->start, global_info->tree_root->nodesize);
6019         check_extent_type(rec);
6020         return ret;
6021 }
6022
6023 /*
6024  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6025  * some are hints:
6026  * - refs              - if found, increase refs
6027  * - is_root           - if found, set
6028  * - content_checked   - if found, set
6029  * - owner_ref_checked - if found, set
6030  *
6031  * If not found, create a new one, initialize and insert.
6032  */
6033 static int add_extent_rec(struct cache_tree *extent_cache,
6034                 struct extent_record *tmpl)
6035 {
6036         struct extent_record *rec;
6037         struct cache_extent *cache;
6038         int ret = 0;
6039         int dup = 0;
6040
6041         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6042         if (cache) {
6043                 rec = container_of(cache, struct extent_record, cache);
6044                 if (tmpl->refs)
6045                         rec->refs++;
6046                 if (rec->nr == 1)
6047                         rec->nr = max(tmpl->nr, tmpl->max_size);
6048
6049                 /*
6050                  * We need to make sure to reset nr to whatever the extent
6051                  * record says was the real size, this way we can compare it to
6052                  * the backrefs.
6053                  */
6054                 if (tmpl->found_rec) {
6055                         if (tmpl->start != rec->start || rec->found_rec) {
6056                                 struct extent_record *tmp;
6057
6058                                 dup = 1;
6059                                 if (list_empty(&rec->list))
6060                                         list_add_tail(&rec->list,
6061                                                       &duplicate_extents);
6062
6063                                 /*
6064                                  * We have to do this song and dance in case we
6065                                  * find an extent record that falls inside of
6066                                  * our current extent record but does not have
6067                                  * the same objectid.
6068                                  */
6069                                 tmp = malloc(sizeof(*tmp));
6070                                 if (!tmp)
6071                                         return -ENOMEM;
6072                                 tmp->start = tmpl->start;
6073                                 tmp->max_size = tmpl->max_size;
6074                                 tmp->nr = tmpl->nr;
6075                                 tmp->found_rec = 1;
6076                                 tmp->metadata = tmpl->metadata;
6077                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6078                                 INIT_LIST_HEAD(&tmp->list);
6079                                 list_add_tail(&tmp->list, &rec->dups);
6080                                 rec->num_duplicates++;
6081                         } else {
6082                                 rec->nr = tmpl->nr;
6083                                 rec->found_rec = 1;
6084                         }
6085                 }
6086
6087                 if (tmpl->extent_item_refs && !dup) {
6088                         if (rec->extent_item_refs) {
6089                                 fprintf(stderr, "block %llu rec "
6090                                         "extent_item_refs %llu, passed %llu\n",
6091                                         (unsigned long long)tmpl->start,
6092                                         (unsigned long long)
6093                                                         rec->extent_item_refs,
6094                                         (unsigned long long)tmpl->extent_item_refs);
6095                         }
6096                         rec->extent_item_refs = tmpl->extent_item_refs;
6097                 }
6098                 if (tmpl->is_root)
6099                         rec->is_root = 1;
6100                 if (tmpl->content_checked)
6101                         rec->content_checked = 1;
6102                 if (tmpl->owner_ref_checked)
6103                         rec->owner_ref_checked = 1;
6104                 memcpy(&rec->parent_key, &tmpl->parent_key,
6105                                 sizeof(tmpl->parent_key));
6106                 if (tmpl->parent_generation)
6107                         rec->parent_generation = tmpl->parent_generation;
6108                 if (rec->max_size < tmpl->max_size)
6109                         rec->max_size = tmpl->max_size;
6110
6111                 /*
6112                  * A metadata extent can't cross stripe_len boundary, otherwise
6113                  * kernel scrub won't be able to handle it.
6114                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6115                  * it.
6116                  */
6117                 if (tmpl->metadata)
6118                         rec->crossing_stripes = check_crossing_stripes(
6119                                         global_info, rec->start,
6120                                         global_info->tree_root->nodesize);
6121                 check_extent_type(rec);
6122                 maybe_free_extent_rec(extent_cache, rec);
6123                 return ret;
6124         }
6125
6126         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6127
6128         return ret;
6129 }
6130
6131 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6132                             u64 parent, u64 root, int found_ref)
6133 {
6134         struct extent_record *rec;
6135         struct tree_backref *back;
6136         struct cache_extent *cache;
6137         int ret;
6138
6139         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6140         if (!cache) {
6141                 struct extent_record tmpl;
6142
6143                 memset(&tmpl, 0, sizeof(tmpl));
6144                 tmpl.start = bytenr;
6145                 tmpl.nr = 1;
6146                 tmpl.metadata = 1;
6147
6148                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6149                 if (ret)
6150                         return ret;
6151
6152                 /* really a bug in cache_extent implement now */
6153                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6154                 if (!cache)
6155                         return -ENOENT;
6156         }
6157
6158         rec = container_of(cache, struct extent_record, cache);
6159         if (rec->start != bytenr) {
6160                 /*
6161                  * Several cause, from unaligned bytenr to over lapping extents
6162                  */
6163                 return -EEXIST;
6164         }
6165
6166         back = find_tree_backref(rec, parent, root);
6167         if (!back) {
6168                 back = alloc_tree_backref(rec, parent, root);
6169                 if (!back)
6170                         return -ENOMEM;
6171         }
6172
6173         if (found_ref) {
6174                 if (back->node.found_ref) {
6175                         fprintf(stderr, "Extent back ref already exists "
6176                                 "for %llu parent %llu root %llu \n",
6177                                 (unsigned long long)bytenr,
6178                                 (unsigned long long)parent,
6179                                 (unsigned long long)root);
6180                 }
6181                 back->node.found_ref = 1;
6182         } else {
6183                 if (back->node.found_extent_tree) {
6184                         fprintf(stderr, "Extent back ref already exists "
6185                                 "for %llu parent %llu root %llu \n",
6186                                 (unsigned long long)bytenr,
6187                                 (unsigned long long)parent,
6188                                 (unsigned long long)root);
6189                 }
6190                 back->node.found_extent_tree = 1;
6191         }
6192         check_extent_type(rec);
6193         maybe_free_extent_rec(extent_cache, rec);
6194         return 0;
6195 }
6196
6197 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6198                             u64 parent, u64 root, u64 owner, u64 offset,
6199                             u32 num_refs, int found_ref, u64 max_size)
6200 {
6201         struct extent_record *rec;
6202         struct data_backref *back;
6203         struct cache_extent *cache;
6204         int ret;
6205
6206         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6207         if (!cache) {
6208                 struct extent_record tmpl;
6209
6210                 memset(&tmpl, 0, sizeof(tmpl));
6211                 tmpl.start = bytenr;
6212                 tmpl.nr = 1;
6213                 tmpl.max_size = max_size;
6214
6215                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6216                 if (ret)
6217                         return ret;
6218
6219                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6220                 if (!cache)
6221                         abort();
6222         }
6223
6224         rec = container_of(cache, struct extent_record, cache);
6225         if (rec->max_size < max_size)
6226                 rec->max_size = max_size;
6227
6228         /*
6229          * If found_ref is set then max_size is the real size and must match the
6230          * existing refs.  So if we have already found a ref then we need to
6231          * make sure that this ref matches the existing one, otherwise we need
6232          * to add a new backref so we can notice that the backrefs don't match
6233          * and we need to figure out who is telling the truth.  This is to
6234          * account for that awful fsync bug I introduced where we'd end up with
6235          * a btrfs_file_extent_item that would have its length include multiple
6236          * prealloc extents or point inside of a prealloc extent.
6237          */
6238         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6239                                  bytenr, max_size);
6240         if (!back) {
6241                 back = alloc_data_backref(rec, parent, root, owner, offset,
6242                                           max_size);
6243                 BUG_ON(!back);
6244         }
6245
6246         if (found_ref) {
6247                 BUG_ON(num_refs != 1);
6248                 if (back->node.found_ref)
6249                         BUG_ON(back->bytes != max_size);
6250                 back->node.found_ref = 1;
6251                 back->found_ref += 1;
6252                 back->bytes = max_size;
6253                 back->disk_bytenr = bytenr;
6254                 rec->refs += 1;
6255                 rec->content_checked = 1;
6256                 rec->owner_ref_checked = 1;
6257         } else {
6258                 if (back->node.found_extent_tree) {
6259                         fprintf(stderr, "Extent back ref already exists "
6260                                 "for %llu parent %llu root %llu "
6261                                 "owner %llu offset %llu num_refs %lu\n",
6262                                 (unsigned long long)bytenr,
6263                                 (unsigned long long)parent,
6264                                 (unsigned long long)root,
6265                                 (unsigned long long)owner,
6266                                 (unsigned long long)offset,
6267                                 (unsigned long)num_refs);
6268                 }
6269                 back->num_refs = num_refs;
6270                 back->node.found_extent_tree = 1;
6271         }
6272         maybe_free_extent_rec(extent_cache, rec);
6273         return 0;
6274 }
6275
6276 static int add_pending(struct cache_tree *pending,
6277                        struct cache_tree *seen, u64 bytenr, u32 size)
6278 {
6279         int ret;
6280         ret = add_cache_extent(seen, bytenr, size);
6281         if (ret)
6282                 return ret;
6283         add_cache_extent(pending, bytenr, size);
6284         return 0;
6285 }
6286
6287 static int pick_next_pending(struct cache_tree *pending,
6288                         struct cache_tree *reada,
6289                         struct cache_tree *nodes,
6290                         u64 last, struct block_info *bits, int bits_nr,
6291                         int *reada_bits)
6292 {
6293         unsigned long node_start = last;
6294         struct cache_extent *cache;
6295         int ret;
6296
6297         cache = search_cache_extent(reada, 0);
6298         if (cache) {
6299                 bits[0].start = cache->start;
6300                 bits[0].size = cache->size;
6301                 *reada_bits = 1;
6302                 return 1;
6303         }
6304         *reada_bits = 0;
6305         if (node_start > 32768)
6306                 node_start -= 32768;
6307
6308         cache = search_cache_extent(nodes, node_start);
6309         if (!cache)
6310                 cache = search_cache_extent(nodes, 0);
6311
6312         if (!cache) {
6313                  cache = search_cache_extent(pending, 0);
6314                  if (!cache)
6315                          return 0;
6316                  ret = 0;
6317                  do {
6318                          bits[ret].start = cache->start;
6319                          bits[ret].size = cache->size;
6320                          cache = next_cache_extent(cache);
6321                          ret++;
6322                  } while (cache && ret < bits_nr);
6323                  return ret;
6324         }
6325
6326         ret = 0;
6327         do {
6328                 bits[ret].start = cache->start;
6329                 bits[ret].size = cache->size;
6330                 cache = next_cache_extent(cache);
6331                 ret++;
6332         } while (cache && ret < bits_nr);
6333
6334         if (bits_nr - ret > 8) {
6335                 u64 lookup = bits[0].start + bits[0].size;
6336                 struct cache_extent *next;
6337                 next = search_cache_extent(pending, lookup);
6338                 while(next) {
6339                         if (next->start - lookup > 32768)
6340                                 break;
6341                         bits[ret].start = next->start;
6342                         bits[ret].size = next->size;
6343                         lookup = next->start + next->size;
6344                         ret++;
6345                         if (ret == bits_nr)
6346                                 break;
6347                         next = next_cache_extent(next);
6348                         if (!next)
6349                                 break;
6350                 }
6351         }
6352         return ret;
6353 }
6354
6355 static void free_chunk_record(struct cache_extent *cache)
6356 {
6357         struct chunk_record *rec;
6358
6359         rec = container_of(cache, struct chunk_record, cache);
6360         list_del_init(&rec->list);
6361         list_del_init(&rec->dextents);
6362         free(rec);
6363 }
6364
6365 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6366 {
6367         cache_tree_free_extents(chunk_cache, free_chunk_record);
6368 }
6369
6370 static void free_device_record(struct rb_node *node)
6371 {
6372         struct device_record *rec;
6373
6374         rec = container_of(node, struct device_record, node);
6375         free(rec);
6376 }
6377
6378 FREE_RB_BASED_TREE(device_cache, free_device_record);
6379
6380 int insert_block_group_record(struct block_group_tree *tree,
6381                               struct block_group_record *bg_rec)
6382 {
6383         int ret;
6384
6385         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6386         if (ret)
6387                 return ret;
6388
6389         list_add_tail(&bg_rec->list, &tree->block_groups);
6390         return 0;
6391 }
6392
6393 static void free_block_group_record(struct cache_extent *cache)
6394 {
6395         struct block_group_record *rec;
6396
6397         rec = container_of(cache, struct block_group_record, cache);
6398         list_del_init(&rec->list);
6399         free(rec);
6400 }
6401
6402 void free_block_group_tree(struct block_group_tree *tree)
6403 {
6404         cache_tree_free_extents(&tree->tree, free_block_group_record);
6405 }
6406
6407 int insert_device_extent_record(struct device_extent_tree *tree,
6408                                 struct device_extent_record *de_rec)
6409 {
6410         int ret;
6411
6412         /*
6413          * Device extent is a bit different from the other extents, because
6414          * the extents which belong to the different devices may have the
6415          * same start and size, so we need use the special extent cache
6416          * search/insert functions.
6417          */
6418         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6419         if (ret)
6420                 return ret;
6421
6422         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6423         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6424         return 0;
6425 }
6426
6427 static void free_device_extent_record(struct cache_extent *cache)
6428 {
6429         struct device_extent_record *rec;
6430
6431         rec = container_of(cache, struct device_extent_record, cache);
6432         if (!list_empty(&rec->chunk_list))
6433                 list_del_init(&rec->chunk_list);
6434         if (!list_empty(&rec->device_list))
6435                 list_del_init(&rec->device_list);
6436         free(rec);
6437 }
6438
6439 void free_device_extent_tree(struct device_extent_tree *tree)
6440 {
6441         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6442 }
6443
6444 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6445 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6446                                  struct extent_buffer *leaf, int slot)
6447 {
6448         struct btrfs_extent_ref_v0 *ref0;
6449         struct btrfs_key key;
6450         int ret;
6451
6452         btrfs_item_key_to_cpu(leaf, &key, slot);
6453         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6454         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6455                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6456                                 0, 0);
6457         } else {
6458                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6459                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6460         }
6461         return ret;
6462 }
6463 #endif
6464
6465 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6466                                             struct btrfs_key *key,
6467                                             int slot)
6468 {
6469         struct btrfs_chunk *ptr;
6470         struct chunk_record *rec;
6471         int num_stripes, i;
6472
6473         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6474         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6475
6476         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6477         if (!rec) {
6478                 fprintf(stderr, "memory allocation failed\n");
6479                 exit(-1);
6480         }
6481
6482         INIT_LIST_HEAD(&rec->list);
6483         INIT_LIST_HEAD(&rec->dextents);
6484         rec->bg_rec = NULL;
6485
6486         rec->cache.start = key->offset;
6487         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6488
6489         rec->generation = btrfs_header_generation(leaf);
6490
6491         rec->objectid = key->objectid;
6492         rec->type = key->type;
6493         rec->offset = key->offset;
6494
6495         rec->length = rec->cache.size;
6496         rec->owner = btrfs_chunk_owner(leaf, ptr);
6497         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6498         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6499         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6500         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6501         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6502         rec->num_stripes = num_stripes;
6503         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6504
6505         for (i = 0; i < rec->num_stripes; ++i) {
6506                 rec->stripes[i].devid =
6507                         btrfs_stripe_devid_nr(leaf, ptr, i);
6508                 rec->stripes[i].offset =
6509                         btrfs_stripe_offset_nr(leaf, ptr, i);
6510                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6511                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6512                                 BTRFS_UUID_SIZE);
6513         }
6514
6515         return rec;
6516 }
6517
6518 static int process_chunk_item(struct cache_tree *chunk_cache,
6519                               struct btrfs_key *key, struct extent_buffer *eb,
6520                               int slot)
6521 {
6522         struct chunk_record *rec;
6523         struct btrfs_chunk *chunk;
6524         int ret = 0;
6525
6526         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6527         /*
6528          * Do extra check for this chunk item,
6529          *
6530          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6531          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6532          * and owner<->key_type check.
6533          */
6534         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6535                                       key->offset);
6536         if (ret < 0) {
6537                 error("chunk(%llu, %llu) is not valid, ignore it",
6538                       key->offset, btrfs_chunk_length(eb, chunk));
6539                 return 0;
6540         }
6541         rec = btrfs_new_chunk_record(eb, key, slot);
6542         ret = insert_cache_extent(chunk_cache, &rec->cache);
6543         if (ret) {
6544                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6545                         rec->offset, rec->length);
6546                 free(rec);
6547         }
6548
6549         return ret;
6550 }
6551
6552 static int process_device_item(struct rb_root *dev_cache,
6553                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6554 {
6555         struct btrfs_dev_item *ptr;
6556         struct device_record *rec;
6557         int ret = 0;
6558
6559         ptr = btrfs_item_ptr(eb,
6560                 slot, struct btrfs_dev_item);
6561
6562         rec = malloc(sizeof(*rec));
6563         if (!rec) {
6564                 fprintf(stderr, "memory allocation failed\n");
6565                 return -ENOMEM;
6566         }
6567
6568         rec->devid = key->offset;
6569         rec->generation = btrfs_header_generation(eb);
6570
6571         rec->objectid = key->objectid;
6572         rec->type = key->type;
6573         rec->offset = key->offset;
6574
6575         rec->devid = btrfs_device_id(eb, ptr);
6576         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6577         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6578
6579         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6580         if (ret) {
6581                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6582                 free(rec);
6583         }
6584
6585         return ret;
6586 }
6587
6588 struct block_group_record *
6589 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6590                              int slot)
6591 {
6592         struct btrfs_block_group_item *ptr;
6593         struct block_group_record *rec;
6594
6595         rec = calloc(1, sizeof(*rec));
6596         if (!rec) {
6597                 fprintf(stderr, "memory allocation failed\n");
6598                 exit(-1);
6599         }
6600
6601         rec->cache.start = key->objectid;
6602         rec->cache.size = key->offset;
6603
6604         rec->generation = btrfs_header_generation(leaf);
6605
6606         rec->objectid = key->objectid;
6607         rec->type = key->type;
6608         rec->offset = key->offset;
6609
6610         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6611         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6612
6613         INIT_LIST_HEAD(&rec->list);
6614
6615         return rec;
6616 }
6617
6618 static int process_block_group_item(struct block_group_tree *block_group_cache,
6619                                     struct btrfs_key *key,
6620                                     struct extent_buffer *eb, int slot)
6621 {
6622         struct block_group_record *rec;
6623         int ret = 0;
6624
6625         rec = btrfs_new_block_group_record(eb, key, slot);
6626         ret = insert_block_group_record(block_group_cache, rec);
6627         if (ret) {
6628                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6629                         rec->objectid, rec->offset);
6630                 free(rec);
6631         }
6632
6633         return ret;
6634 }
6635
6636 struct device_extent_record *
6637 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6638                                struct btrfs_key *key, int slot)
6639 {
6640         struct device_extent_record *rec;
6641         struct btrfs_dev_extent *ptr;
6642
6643         rec = calloc(1, sizeof(*rec));
6644         if (!rec) {
6645                 fprintf(stderr, "memory allocation failed\n");
6646                 exit(-1);
6647         }
6648
6649         rec->cache.objectid = key->objectid;
6650         rec->cache.start = key->offset;
6651
6652         rec->generation = btrfs_header_generation(leaf);
6653
6654         rec->objectid = key->objectid;
6655         rec->type = key->type;
6656         rec->offset = key->offset;
6657
6658         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6659         rec->chunk_objecteid =
6660                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6661         rec->chunk_offset =
6662                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6663         rec->length = btrfs_dev_extent_length(leaf, ptr);
6664         rec->cache.size = rec->length;
6665
6666         INIT_LIST_HEAD(&rec->chunk_list);
6667         INIT_LIST_HEAD(&rec->device_list);
6668
6669         return rec;
6670 }
6671
6672 static int
6673 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6674                            struct btrfs_key *key, struct extent_buffer *eb,
6675                            int slot)
6676 {
6677         struct device_extent_record *rec;
6678         int ret;
6679
6680         rec = btrfs_new_device_extent_record(eb, key, slot);
6681         ret = insert_device_extent_record(dev_extent_cache, rec);
6682         if (ret) {
6683                 fprintf(stderr,
6684                         "Device extent[%llu, %llu, %llu] existed.\n",
6685                         rec->objectid, rec->offset, rec->length);
6686                 free(rec);
6687         }
6688
6689         return ret;
6690 }
6691
6692 static int process_extent_item(struct btrfs_root *root,
6693                                struct cache_tree *extent_cache,
6694                                struct extent_buffer *eb, int slot)
6695 {
6696         struct btrfs_extent_item *ei;
6697         struct btrfs_extent_inline_ref *iref;
6698         struct btrfs_extent_data_ref *dref;
6699         struct btrfs_shared_data_ref *sref;
6700         struct btrfs_key key;
6701         struct extent_record tmpl;
6702         unsigned long end;
6703         unsigned long ptr;
6704         int ret;
6705         int type;
6706         u32 item_size = btrfs_item_size_nr(eb, slot);
6707         u64 refs = 0;
6708         u64 offset;
6709         u64 num_bytes;
6710         int metadata = 0;
6711
6712         btrfs_item_key_to_cpu(eb, &key, slot);
6713
6714         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6715                 metadata = 1;
6716                 num_bytes = root->nodesize;
6717         } else {
6718                 num_bytes = key.offset;
6719         }
6720
6721         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6722                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6723                       key.objectid, root->sectorsize);
6724                 return -EIO;
6725         }
6726         if (item_size < sizeof(*ei)) {
6727 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6728                 struct btrfs_extent_item_v0 *ei0;
6729                 BUG_ON(item_size != sizeof(*ei0));
6730                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6731                 refs = btrfs_extent_refs_v0(eb, ei0);
6732 #else
6733                 BUG();
6734 #endif
6735                 memset(&tmpl, 0, sizeof(tmpl));
6736                 tmpl.start = key.objectid;
6737                 tmpl.nr = num_bytes;
6738                 tmpl.extent_item_refs = refs;
6739                 tmpl.metadata = metadata;
6740                 tmpl.found_rec = 1;
6741                 tmpl.max_size = num_bytes;
6742
6743                 return add_extent_rec(extent_cache, &tmpl);
6744         }
6745
6746         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6747         refs = btrfs_extent_refs(eb, ei);
6748         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6749                 metadata = 1;
6750         else
6751                 metadata = 0;
6752         if (metadata && num_bytes != root->nodesize) {
6753                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6754                       num_bytes, root->nodesize);
6755                 return -EIO;
6756         }
6757         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6758                 error("ignore invalid data extent, length %llu is not aligned to %u",
6759                       num_bytes, root->sectorsize);
6760                 return -EIO;
6761         }
6762
6763         memset(&tmpl, 0, sizeof(tmpl));
6764         tmpl.start = key.objectid;
6765         tmpl.nr = num_bytes;
6766         tmpl.extent_item_refs = refs;
6767         tmpl.metadata = metadata;
6768         tmpl.found_rec = 1;
6769         tmpl.max_size = num_bytes;
6770         add_extent_rec(extent_cache, &tmpl);
6771
6772         ptr = (unsigned long)(ei + 1);
6773         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6774             key.type == BTRFS_EXTENT_ITEM_KEY)
6775                 ptr += sizeof(struct btrfs_tree_block_info);
6776
6777         end = (unsigned long)ei + item_size;
6778         while (ptr < end) {
6779                 iref = (struct btrfs_extent_inline_ref *)ptr;
6780                 type = btrfs_extent_inline_ref_type(eb, iref);
6781                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6782                 switch (type) {
6783                 case BTRFS_TREE_BLOCK_REF_KEY:
6784                         ret = add_tree_backref(extent_cache, key.objectid,
6785                                         0, offset, 0);
6786                         if (ret < 0)
6787                                 error("add_tree_backref failed: %s",
6788                                       strerror(-ret));
6789                         break;
6790                 case BTRFS_SHARED_BLOCK_REF_KEY:
6791                         ret = add_tree_backref(extent_cache, key.objectid,
6792                                         offset, 0, 0);
6793                         if (ret < 0)
6794                                 error("add_tree_backref failed: %s",
6795                                       strerror(-ret));
6796                         break;
6797                 case BTRFS_EXTENT_DATA_REF_KEY:
6798                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6799                         add_data_backref(extent_cache, key.objectid, 0,
6800                                         btrfs_extent_data_ref_root(eb, dref),
6801                                         btrfs_extent_data_ref_objectid(eb,
6802                                                                        dref),
6803                                         btrfs_extent_data_ref_offset(eb, dref),
6804                                         btrfs_extent_data_ref_count(eb, dref),
6805                                         0, num_bytes);
6806                         break;
6807                 case BTRFS_SHARED_DATA_REF_KEY:
6808                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6809                         add_data_backref(extent_cache, key.objectid, offset,
6810                                         0, 0, 0,
6811                                         btrfs_shared_data_ref_count(eb, sref),
6812                                         0, num_bytes);
6813                         break;
6814                 default:
6815                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6816                                 key.objectid, key.type, num_bytes);
6817                         goto out;
6818                 }
6819                 ptr += btrfs_extent_inline_ref_size(type);
6820         }
6821         WARN_ON(ptr > end);
6822 out:
6823         return 0;
6824 }
6825
6826 static int check_cache_range(struct btrfs_root *root,
6827                              struct btrfs_block_group_cache *cache,
6828                              u64 offset, u64 bytes)
6829 {
6830         struct btrfs_free_space *entry;
6831         u64 *logical;
6832         u64 bytenr;
6833         int stripe_len;
6834         int i, nr, ret;
6835
6836         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6837                 bytenr = btrfs_sb_offset(i);
6838                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6839                                        cache->key.objectid, bytenr, 0,
6840                                        &logical, &nr, &stripe_len);
6841                 if (ret)
6842                         return ret;
6843
6844                 while (nr--) {
6845                         if (logical[nr] + stripe_len <= offset)
6846                                 continue;
6847                         if (offset + bytes <= logical[nr])
6848                                 continue;
6849                         if (logical[nr] == offset) {
6850                                 if (stripe_len >= bytes) {
6851                                         free(logical);
6852                                         return 0;
6853                                 }
6854                                 bytes -= stripe_len;
6855                                 offset += stripe_len;
6856                         } else if (logical[nr] < offset) {
6857                                 if (logical[nr] + stripe_len >=
6858                                     offset + bytes) {
6859                                         free(logical);
6860                                         return 0;
6861                                 }
6862                                 bytes = (offset + bytes) -
6863                                         (logical[nr] + stripe_len);
6864                                 offset = logical[nr] + stripe_len;
6865                         } else {
6866                                 /*
6867                                  * Could be tricky, the super may land in the
6868                                  * middle of the area we're checking.  First
6869                                  * check the easiest case, it's at the end.
6870                                  */
6871                                 if (logical[nr] + stripe_len >=
6872                                     bytes + offset) {
6873                                         bytes = logical[nr] - offset;
6874                                         continue;
6875                                 }
6876
6877                                 /* Check the left side */
6878                                 ret = check_cache_range(root, cache,
6879                                                         offset,
6880                                                         logical[nr] - offset);
6881                                 if (ret) {
6882                                         free(logical);
6883                                         return ret;
6884                                 }
6885
6886                                 /* Now we continue with the right side */
6887                                 bytes = (offset + bytes) -
6888                                         (logical[nr] + stripe_len);
6889                                 offset = logical[nr] + stripe_len;
6890                         }
6891                 }
6892
6893                 free(logical);
6894         }
6895
6896         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6897         if (!entry) {
6898                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6899                         offset, offset+bytes);
6900                 return -EINVAL;
6901         }
6902
6903         if (entry->offset != offset) {
6904                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6905                         entry->offset);
6906                 return -EINVAL;
6907         }
6908
6909         if (entry->bytes != bytes) {
6910                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6911                         bytes, entry->bytes, offset);
6912                 return -EINVAL;
6913         }
6914
6915         unlink_free_space(cache->free_space_ctl, entry);
6916         free(entry);
6917         return 0;
6918 }
6919
6920 static int verify_space_cache(struct btrfs_root *root,
6921                               struct btrfs_block_group_cache *cache)
6922 {
6923         struct btrfs_path path;
6924         struct extent_buffer *leaf;
6925         struct btrfs_key key;
6926         u64 last;
6927         int ret = 0;
6928
6929         root = root->fs_info->extent_root;
6930
6931         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6932
6933         btrfs_init_path(&path);
6934         key.objectid = last;
6935         key.offset = 0;
6936         key.type = BTRFS_EXTENT_ITEM_KEY;
6937         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6938         if (ret < 0)
6939                 goto out;
6940         ret = 0;
6941         while (1) {
6942                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6943                         ret = btrfs_next_leaf(root, &path);
6944                         if (ret < 0)
6945                                 goto out;
6946                         if (ret > 0) {
6947                                 ret = 0;
6948                                 break;
6949                         }
6950                 }
6951                 leaf = path.nodes[0];
6952                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6953                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6954                         break;
6955                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6956                     key.type != BTRFS_METADATA_ITEM_KEY) {
6957                         path.slots[0]++;
6958                         continue;
6959                 }
6960
6961                 if (last == key.objectid) {
6962                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6963                                 last = key.objectid + key.offset;
6964                         else
6965                                 last = key.objectid + root->nodesize;
6966                         path.slots[0]++;
6967                         continue;
6968                 }
6969
6970                 ret = check_cache_range(root, cache, last,
6971                                         key.objectid - last);
6972                 if (ret)
6973                         break;
6974                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6975                         last = key.objectid + key.offset;
6976                 else
6977                         last = key.objectid + root->nodesize;
6978                 path.slots[0]++;
6979         }
6980
6981         if (last < cache->key.objectid + cache->key.offset)
6982                 ret = check_cache_range(root, cache, last,
6983                                         cache->key.objectid +
6984                                         cache->key.offset - last);
6985
6986 out:
6987         btrfs_release_path(&path);
6988
6989         if (!ret &&
6990             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6991                 fprintf(stderr, "There are still entries left in the space "
6992                         "cache\n");
6993                 ret = -EINVAL;
6994         }
6995
6996         return ret;
6997 }
6998
6999 static int check_space_cache(struct btrfs_root *root)
7000 {
7001         struct btrfs_block_group_cache *cache;
7002         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7003         int ret;
7004         int error = 0;
7005
7006         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7007             btrfs_super_generation(root->fs_info->super_copy) !=
7008             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7009                 printf("cache and super generation don't match, space cache "
7010                        "will be invalidated\n");
7011                 return 0;
7012         }
7013
7014         if (ctx.progress_enabled) {
7015                 ctx.tp = TASK_FREE_SPACE;
7016                 task_start(ctx.info);
7017         }
7018
7019         while (1) {
7020                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7021                 if (!cache)
7022                         break;
7023
7024                 start = cache->key.objectid + cache->key.offset;
7025                 if (!cache->free_space_ctl) {
7026                         if (btrfs_init_free_space_ctl(cache,
7027                                                       root->sectorsize)) {
7028                                 ret = -ENOMEM;
7029                                 break;
7030                         }
7031                 } else {
7032                         btrfs_remove_free_space_cache(cache);
7033                 }
7034
7035                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7036                         ret = exclude_super_stripes(root, cache);
7037                         if (ret) {
7038                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7039                                         strerror(-ret));
7040                                 error++;
7041                                 continue;
7042                         }
7043                         ret = load_free_space_tree(root->fs_info, cache);
7044                         free_excluded_extents(root, cache);
7045                         if (ret < 0) {
7046                                 fprintf(stderr, "could not load free space tree: %s\n",
7047                                         strerror(-ret));
7048                                 error++;
7049                                 continue;
7050                         }
7051                         error += ret;
7052                 } else {
7053                         ret = load_free_space_cache(root->fs_info, cache);
7054                         if (!ret)
7055                                 continue;
7056                 }
7057
7058                 ret = verify_space_cache(root, cache);
7059                 if (ret) {
7060                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7061                                 cache->key.objectid);
7062                         error++;
7063                 }
7064         }
7065
7066         task_stop(ctx.info);
7067
7068         return error ? -EINVAL : 0;
7069 }
7070
7071 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7072                         u64 num_bytes, unsigned long leaf_offset,
7073                         struct extent_buffer *eb) {
7074
7075         u64 offset = 0;
7076         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7077         char *data;
7078         unsigned long csum_offset;
7079         u32 csum;
7080         u32 csum_expected;
7081         u64 read_len;
7082         u64 data_checked = 0;
7083         u64 tmp;
7084         int ret = 0;
7085         int mirror;
7086         int num_copies;
7087
7088         if (num_bytes % root->sectorsize)
7089                 return -EINVAL;
7090
7091         data = malloc(num_bytes);
7092         if (!data)
7093                 return -ENOMEM;
7094
7095         while (offset < num_bytes) {
7096                 mirror = 0;
7097 again:
7098                 read_len = num_bytes - offset;
7099                 /* read as much space once a time */
7100                 ret = read_extent_data(root, data + offset,
7101                                 bytenr + offset, &read_len, mirror);
7102                 if (ret)
7103                         goto out;
7104                 data_checked = 0;
7105                 /* verify every 4k data's checksum */
7106                 while (data_checked < read_len) {
7107                         csum = ~(u32)0;
7108                         tmp = offset + data_checked;
7109
7110                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
7111                                                csum, root->sectorsize);
7112                         btrfs_csum_final(csum, (u8 *)&csum);
7113
7114                         csum_offset = leaf_offset +
7115                                  tmp / root->sectorsize * csum_size;
7116                         read_extent_buffer(eb, (char *)&csum_expected,
7117                                            csum_offset, csum_size);
7118                         /* try another mirror */
7119                         if (csum != csum_expected) {
7120                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7121                                                 mirror, bytenr + tmp,
7122                                                 csum, csum_expected);
7123                                 num_copies = btrfs_num_copies(
7124                                                 &root->fs_info->mapping_tree,
7125                                                 bytenr, num_bytes);
7126                                 if (mirror < num_copies - 1) {
7127                                         mirror += 1;
7128                                         goto again;
7129                                 }
7130                         }
7131                         data_checked += root->sectorsize;
7132                 }
7133                 offset += read_len;
7134         }
7135 out:
7136         free(data);
7137         return ret;
7138 }
7139
7140 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7141                                u64 num_bytes)
7142 {
7143         struct btrfs_path path;
7144         struct extent_buffer *leaf;
7145         struct btrfs_key key;
7146         int ret;
7147
7148         btrfs_init_path(&path);
7149         key.objectid = bytenr;
7150         key.type = BTRFS_EXTENT_ITEM_KEY;
7151         key.offset = (u64)-1;
7152
7153 again:
7154         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7155                                 0, 0);
7156         if (ret < 0) {
7157                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7158                 btrfs_release_path(&path);
7159                 return ret;
7160         } else if (ret) {
7161                 if (path.slots[0] > 0) {
7162                         path.slots[0]--;
7163                 } else {
7164                         ret = btrfs_prev_leaf(root, &path);
7165                         if (ret < 0) {
7166                                 goto out;
7167                         } else if (ret > 0) {
7168                                 ret = 0;
7169                                 goto out;
7170                         }
7171                 }
7172         }
7173
7174         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7175
7176         /*
7177          * Block group items come before extent items if they have the same
7178          * bytenr, so walk back one more just in case.  Dear future traveller,
7179          * first congrats on mastering time travel.  Now if it's not too much
7180          * trouble could you go back to 2006 and tell Chris to make the
7181          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7182          * EXTENT_ITEM_KEY please?
7183          */
7184         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7185                 if (path.slots[0] > 0) {
7186                         path.slots[0]--;
7187                 } else {
7188                         ret = btrfs_prev_leaf(root, &path);
7189                         if (ret < 0) {
7190                                 goto out;
7191                         } else if (ret > 0) {
7192                                 ret = 0;
7193                                 goto out;
7194                         }
7195                 }
7196                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7197         }
7198
7199         while (num_bytes) {
7200                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7201                         ret = btrfs_next_leaf(root, &path);
7202                         if (ret < 0) {
7203                                 fprintf(stderr, "Error going to next leaf "
7204                                         "%d\n", ret);
7205                                 btrfs_release_path(&path);
7206                                 return ret;
7207                         } else if (ret) {
7208                                 break;
7209                         }
7210                 }
7211                 leaf = path.nodes[0];
7212                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7213                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7214                         path.slots[0]++;
7215                         continue;
7216                 }
7217                 if (key.objectid + key.offset < bytenr) {
7218                         path.slots[0]++;
7219                         continue;
7220                 }
7221                 if (key.objectid > bytenr + num_bytes)
7222                         break;
7223
7224                 if (key.objectid == bytenr) {
7225                         if (key.offset >= num_bytes) {
7226                                 num_bytes = 0;
7227                                 break;
7228                         }
7229                         num_bytes -= key.offset;
7230                         bytenr += key.offset;
7231                 } else if (key.objectid < bytenr) {
7232                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7233                                 num_bytes = 0;
7234                                 break;
7235                         }
7236                         num_bytes = (bytenr + num_bytes) -
7237                                 (key.objectid + key.offset);
7238                         bytenr = key.objectid + key.offset;
7239                 } else {
7240                         if (key.objectid + key.offset < bytenr + num_bytes) {
7241                                 u64 new_start = key.objectid + key.offset;
7242                                 u64 new_bytes = bytenr + num_bytes - new_start;
7243
7244                                 /*
7245                                  * Weird case, the extent is in the middle of
7246                                  * our range, we'll have to search one side
7247                                  * and then the other.  Not sure if this happens
7248                                  * in real life, but no harm in coding it up
7249                                  * anyway just in case.
7250                                  */
7251                                 btrfs_release_path(&path);
7252                                 ret = check_extent_exists(root, new_start,
7253                                                           new_bytes);
7254                                 if (ret) {
7255                                         fprintf(stderr, "Right section didn't "
7256                                                 "have a record\n");
7257                                         break;
7258                                 }
7259                                 num_bytes = key.objectid - bytenr;
7260                                 goto again;
7261                         }
7262                         num_bytes = key.objectid - bytenr;
7263                 }
7264                 path.slots[0]++;
7265         }
7266         ret = 0;
7267
7268 out:
7269         if (num_bytes && !ret) {
7270                 fprintf(stderr, "There are no extents for csum range "
7271                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7272                 ret = 1;
7273         }
7274
7275         btrfs_release_path(&path);
7276         return ret;
7277 }
7278
7279 static int check_csums(struct btrfs_root *root)
7280 {
7281         struct btrfs_path path;
7282         struct extent_buffer *leaf;
7283         struct btrfs_key key;
7284         u64 offset = 0, num_bytes = 0;
7285         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7286         int errors = 0;
7287         int ret;
7288         u64 data_len;
7289         unsigned long leaf_offset;
7290
7291         root = root->fs_info->csum_root;
7292         if (!extent_buffer_uptodate(root->node)) {
7293                 fprintf(stderr, "No valid csum tree found\n");
7294                 return -ENOENT;
7295         }
7296
7297         btrfs_init_path(&path);
7298         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7299         key.type = BTRFS_EXTENT_CSUM_KEY;
7300         key.offset = 0;
7301         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7302         if (ret < 0) {
7303                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7304                 btrfs_release_path(&path);
7305                 return ret;
7306         }
7307
7308         if (ret > 0 && path.slots[0])
7309                 path.slots[0]--;
7310         ret = 0;
7311
7312         while (1) {
7313                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7314                         ret = btrfs_next_leaf(root, &path);
7315                         if (ret < 0) {
7316                                 fprintf(stderr, "Error going to next leaf "
7317                                         "%d\n", ret);
7318                                 break;
7319                         }
7320                         if (ret)
7321                                 break;
7322                 }
7323                 leaf = path.nodes[0];
7324
7325                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7326                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7327                         path.slots[0]++;
7328                         continue;
7329                 }
7330
7331                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7332                               csum_size) * root->sectorsize;
7333                 if (!check_data_csum)
7334                         goto skip_csum_check;
7335                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7336                 ret = check_extent_csums(root, key.offset, data_len,
7337                                          leaf_offset, leaf);
7338                 if (ret)
7339                         break;
7340 skip_csum_check:
7341                 if (!num_bytes) {
7342                         offset = key.offset;
7343                 } else if (key.offset != offset + num_bytes) {
7344                         ret = check_extent_exists(root, offset, num_bytes);
7345                         if (ret) {
7346                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7347                                         "there is no extent record\n",
7348                                         offset, offset+num_bytes);
7349                                 errors++;
7350                         }
7351                         offset = key.offset;
7352                         num_bytes = 0;
7353                 }
7354                 num_bytes += data_len;
7355                 path.slots[0]++;
7356         }
7357
7358         btrfs_release_path(&path);
7359         return errors;
7360 }
7361
7362 static int is_dropped_key(struct btrfs_key *key,
7363                           struct btrfs_key *drop_key) {
7364         if (key->objectid < drop_key->objectid)
7365                 return 1;
7366         else if (key->objectid == drop_key->objectid) {
7367                 if (key->type < drop_key->type)
7368                         return 1;
7369                 else if (key->type == drop_key->type) {
7370                         if (key->offset < drop_key->offset)
7371                                 return 1;
7372                 }
7373         }
7374         return 0;
7375 }
7376
7377 /*
7378  * Here are the rules for FULL_BACKREF.
7379  *
7380  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7381  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7382  *      FULL_BACKREF set.
7383  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7384  *    if it happened after the relocation occurred since we'll have dropped the
7385  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7386  *    have no real way to know for sure.
7387  *
7388  * We process the blocks one root at a time, and we start from the lowest root
7389  * objectid and go to the highest.  So we can just lookup the owner backref for
7390  * the record and if we don't find it then we know it doesn't exist and we have
7391  * a FULL BACKREF.
7392  *
7393  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7394  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7395  * be set or not and then we can check later once we've gathered all the refs.
7396  */
7397 static int calc_extent_flag(struct btrfs_root *root,
7398                            struct cache_tree *extent_cache,
7399                            struct extent_buffer *buf,
7400                            struct root_item_record *ri,
7401                            u64 *flags)
7402 {
7403         struct extent_record *rec;
7404         struct cache_extent *cache;
7405         struct tree_backref *tback;
7406         u64 owner = 0;
7407
7408         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7409         /* we have added this extent before */
7410         if (!cache)
7411                 return -ENOENT;
7412
7413         rec = container_of(cache, struct extent_record, cache);
7414
7415         /*
7416          * Except file/reloc tree, we can not have
7417          * FULL BACKREF MODE
7418          */
7419         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7420                 goto normal;
7421         /*
7422          * root node
7423          */
7424         if (buf->start == ri->bytenr)
7425                 goto normal;
7426
7427         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7428                 goto full_backref;
7429
7430         owner = btrfs_header_owner(buf);
7431         if (owner == ri->objectid)
7432                 goto normal;
7433
7434         tback = find_tree_backref(rec, 0, owner);
7435         if (!tback)
7436                 goto full_backref;
7437 normal:
7438         *flags = 0;
7439         if (rec->flag_block_full_backref != FLAG_UNSET &&
7440             rec->flag_block_full_backref != 0)
7441                 rec->bad_full_backref = 1;
7442         return 0;
7443 full_backref:
7444         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7445         if (rec->flag_block_full_backref != FLAG_UNSET &&
7446             rec->flag_block_full_backref != 1)
7447                 rec->bad_full_backref = 1;
7448         return 0;
7449 }
7450
7451 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7452 {
7453         fprintf(stderr, "Invalid key type(");
7454         print_key_type(stderr, 0, key_type);
7455         fprintf(stderr, ") found in root(");
7456         print_objectid(stderr, rootid, 0);
7457         fprintf(stderr, ")\n");
7458 }
7459
7460 /*
7461  * Check if the key is valid with its extent buffer.
7462  *
7463  * This is a early check in case invalid key exists in a extent buffer
7464  * This is not comprehensive yet, but should prevent wrong key/item passed
7465  * further
7466  */
7467 static int check_type_with_root(u64 rootid, u8 key_type)
7468 {
7469         switch (key_type) {
7470         /* Only valid in chunk tree */
7471         case BTRFS_DEV_ITEM_KEY:
7472         case BTRFS_CHUNK_ITEM_KEY:
7473                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7474                         goto err;
7475                 break;
7476         /* valid in csum and log tree */
7477         case BTRFS_CSUM_TREE_OBJECTID:
7478                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7479                       is_fstree(rootid)))
7480                         goto err;
7481                 break;
7482         case BTRFS_EXTENT_ITEM_KEY:
7483         case BTRFS_METADATA_ITEM_KEY:
7484         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7485                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7486                         goto err;
7487                 break;
7488         case BTRFS_ROOT_ITEM_KEY:
7489                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7490                         goto err;
7491                 break;
7492         case BTRFS_DEV_EXTENT_KEY:
7493                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7494                         goto err;
7495                 break;
7496         }
7497         return 0;
7498 err:
7499         report_mismatch_key_root(key_type, rootid);
7500         return -EINVAL;
7501 }
7502
7503 static int run_next_block(struct btrfs_root *root,
7504                           struct block_info *bits,
7505                           int bits_nr,
7506                           u64 *last,
7507                           struct cache_tree *pending,
7508                           struct cache_tree *seen,
7509                           struct cache_tree *reada,
7510                           struct cache_tree *nodes,
7511                           struct cache_tree *extent_cache,
7512                           struct cache_tree *chunk_cache,
7513                           struct rb_root *dev_cache,
7514                           struct block_group_tree *block_group_cache,
7515                           struct device_extent_tree *dev_extent_cache,
7516                           struct root_item_record *ri)
7517 {
7518         struct extent_buffer *buf;
7519         struct extent_record *rec = NULL;
7520         u64 bytenr;
7521         u32 size;
7522         u64 parent;
7523         u64 owner;
7524         u64 flags;
7525         u64 ptr;
7526         u64 gen = 0;
7527         int ret = 0;
7528         int i;
7529         int nritems;
7530         struct btrfs_key key;
7531         struct cache_extent *cache;
7532         int reada_bits;
7533
7534         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7535                                     bits_nr, &reada_bits);
7536         if (nritems == 0)
7537                 return 1;
7538
7539         if (!reada_bits) {
7540                 for(i = 0; i < nritems; i++) {
7541                         ret = add_cache_extent(reada, bits[i].start,
7542                                                bits[i].size);
7543                         if (ret == -EEXIST)
7544                                 continue;
7545
7546                         /* fixme, get the parent transid */
7547                         readahead_tree_block(root, bits[i].start,
7548                                              bits[i].size, 0);
7549                 }
7550         }
7551         *last = bits[0].start;
7552         bytenr = bits[0].start;
7553         size = bits[0].size;
7554
7555         cache = lookup_cache_extent(pending, bytenr, size);
7556         if (cache) {
7557                 remove_cache_extent(pending, cache);
7558                 free(cache);
7559         }
7560         cache = lookup_cache_extent(reada, bytenr, size);
7561         if (cache) {
7562                 remove_cache_extent(reada, cache);
7563                 free(cache);
7564         }
7565         cache = lookup_cache_extent(nodes, bytenr, size);
7566         if (cache) {
7567                 remove_cache_extent(nodes, cache);
7568                 free(cache);
7569         }
7570         cache = lookup_cache_extent(extent_cache, bytenr, size);
7571         if (cache) {
7572                 rec = container_of(cache, struct extent_record, cache);
7573                 gen = rec->parent_generation;
7574         }
7575
7576         /* fixme, get the real parent transid */
7577         buf = read_tree_block(root, bytenr, size, gen);
7578         if (!extent_buffer_uptodate(buf)) {
7579                 record_bad_block_io(root->fs_info,
7580                                     extent_cache, bytenr, size);
7581                 goto out;
7582         }
7583
7584         nritems = btrfs_header_nritems(buf);
7585
7586         flags = 0;
7587         if (!init_extent_tree) {
7588                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7589                                        btrfs_header_level(buf), 1, NULL,
7590                                        &flags);
7591                 if (ret < 0) {
7592                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7593                         if (ret < 0) {
7594                                 fprintf(stderr, "Couldn't calc extent flags\n");
7595                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7596                         }
7597                 }
7598         } else {
7599                 flags = 0;
7600                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7601                 if (ret < 0) {
7602                         fprintf(stderr, "Couldn't calc extent flags\n");
7603                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7604                 }
7605         }
7606
7607         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7608                 if (ri != NULL &&
7609                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7610                     ri->objectid == btrfs_header_owner(buf)) {
7611                         /*
7612                          * Ok we got to this block from it's original owner and
7613                          * we have FULL_BACKREF set.  Relocation can leave
7614                          * converted blocks over so this is altogether possible,
7615                          * however it's not possible if the generation > the
7616                          * last snapshot, so check for this case.
7617                          */
7618                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7619                             btrfs_header_generation(buf) > ri->last_snapshot) {
7620                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7621                                 rec->bad_full_backref = 1;
7622                         }
7623                 }
7624         } else {
7625                 if (ri != NULL &&
7626                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7627                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7628                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7629                         rec->bad_full_backref = 1;
7630                 }
7631         }
7632
7633         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7634                 rec->flag_block_full_backref = 1;
7635                 parent = bytenr;
7636                 owner = 0;
7637         } else {
7638                 rec->flag_block_full_backref = 0;
7639                 parent = 0;
7640                 owner = btrfs_header_owner(buf);
7641         }
7642
7643         ret = check_block(root, extent_cache, buf, flags);
7644         if (ret)
7645                 goto out;
7646
7647         if (btrfs_is_leaf(buf)) {
7648                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7649                 for (i = 0; i < nritems; i++) {
7650                         struct btrfs_file_extent_item *fi;
7651                         btrfs_item_key_to_cpu(buf, &key, i);
7652                         /*
7653                          * Check key type against the leaf owner.
7654                          * Could filter quite a lot of early error if
7655                          * owner is correct
7656                          */
7657                         if (check_type_with_root(btrfs_header_owner(buf),
7658                                                  key.type)) {
7659                                 fprintf(stderr, "ignoring invalid key\n");
7660                                 continue;
7661                         }
7662                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7663                                 process_extent_item(root, extent_cache, buf,
7664                                                     i);
7665                                 continue;
7666                         }
7667                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7668                                 process_extent_item(root, extent_cache, buf,
7669                                                     i);
7670                                 continue;
7671                         }
7672                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7673                                 total_csum_bytes +=
7674                                         btrfs_item_size_nr(buf, i);
7675                                 continue;
7676                         }
7677                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7678                                 process_chunk_item(chunk_cache, &key, buf, i);
7679                                 continue;
7680                         }
7681                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7682                                 process_device_item(dev_cache, &key, buf, i);
7683                                 continue;
7684                         }
7685                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7686                                 process_block_group_item(block_group_cache,
7687                                         &key, buf, i);
7688                                 continue;
7689                         }
7690                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7691                                 process_device_extent_item(dev_extent_cache,
7692                                         &key, buf, i);
7693                                 continue;
7694
7695                         }
7696                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7697 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7698                                 process_extent_ref_v0(extent_cache, buf, i);
7699 #else
7700                                 BUG();
7701 #endif
7702                                 continue;
7703                         }
7704
7705                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7706                                 ret = add_tree_backref(extent_cache,
7707                                                 key.objectid, 0, key.offset, 0);
7708                                 if (ret < 0)
7709                                         error("add_tree_backref failed: %s",
7710                                               strerror(-ret));
7711                                 continue;
7712                         }
7713                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7714                                 ret = add_tree_backref(extent_cache,
7715                                                 key.objectid, key.offset, 0, 0);
7716                                 if (ret < 0)
7717                                         error("add_tree_backref failed: %s",
7718                                               strerror(-ret));
7719                                 continue;
7720                         }
7721                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7722                                 struct btrfs_extent_data_ref *ref;
7723                                 ref = btrfs_item_ptr(buf, i,
7724                                                 struct btrfs_extent_data_ref);
7725                                 add_data_backref(extent_cache,
7726                                         key.objectid, 0,
7727                                         btrfs_extent_data_ref_root(buf, ref),
7728                                         btrfs_extent_data_ref_objectid(buf,
7729                                                                        ref),
7730                                         btrfs_extent_data_ref_offset(buf, ref),
7731                                         btrfs_extent_data_ref_count(buf, ref),
7732                                         0, root->sectorsize);
7733                                 continue;
7734                         }
7735                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7736                                 struct btrfs_shared_data_ref *ref;
7737                                 ref = btrfs_item_ptr(buf, i,
7738                                                 struct btrfs_shared_data_ref);
7739                                 add_data_backref(extent_cache,
7740                                         key.objectid, key.offset, 0, 0, 0,
7741                                         btrfs_shared_data_ref_count(buf, ref),
7742                                         0, root->sectorsize);
7743                                 continue;
7744                         }
7745                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7746                                 struct bad_item *bad;
7747
7748                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7749                                         continue;
7750                                 if (!owner)
7751                                         continue;
7752                                 bad = malloc(sizeof(struct bad_item));
7753                                 if (!bad)
7754                                         continue;
7755                                 INIT_LIST_HEAD(&bad->list);
7756                                 memcpy(&bad->key, &key,
7757                                        sizeof(struct btrfs_key));
7758                                 bad->root_id = owner;
7759                                 list_add_tail(&bad->list, &delete_items);
7760                                 continue;
7761                         }
7762                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7763                                 continue;
7764                         fi = btrfs_item_ptr(buf, i,
7765                                             struct btrfs_file_extent_item);
7766                         if (btrfs_file_extent_type(buf, fi) ==
7767                             BTRFS_FILE_EXTENT_INLINE)
7768                                 continue;
7769                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7770                                 continue;
7771
7772                         data_bytes_allocated +=
7773                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7774                         if (data_bytes_allocated < root->sectorsize) {
7775                                 abort();
7776                         }
7777                         data_bytes_referenced +=
7778                                 btrfs_file_extent_num_bytes(buf, fi);
7779                         add_data_backref(extent_cache,
7780                                 btrfs_file_extent_disk_bytenr(buf, fi),
7781                                 parent, owner, key.objectid, key.offset -
7782                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7783                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7784                 }
7785         } else {
7786                 int level;
7787                 struct btrfs_key first_key;
7788
7789                 first_key.objectid = 0;
7790
7791                 if (nritems > 0)
7792                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7793                 level = btrfs_header_level(buf);
7794                 for (i = 0; i < nritems; i++) {
7795                         struct extent_record tmpl;
7796
7797                         ptr = btrfs_node_blockptr(buf, i);
7798                         size = root->nodesize;
7799                         btrfs_node_key_to_cpu(buf, &key, i);
7800                         if (ri != NULL) {
7801                                 if ((level == ri->drop_level)
7802                                     && is_dropped_key(&key, &ri->drop_key)) {
7803                                         continue;
7804                                 }
7805                         }
7806
7807                         memset(&tmpl, 0, sizeof(tmpl));
7808                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7809                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7810                         tmpl.start = ptr;
7811                         tmpl.nr = size;
7812                         tmpl.refs = 1;
7813                         tmpl.metadata = 1;
7814                         tmpl.max_size = size;
7815                         ret = add_extent_rec(extent_cache, &tmpl);
7816                         if (ret < 0)
7817                                 goto out;
7818
7819                         ret = add_tree_backref(extent_cache, ptr, parent,
7820                                         owner, 1);
7821                         if (ret < 0) {
7822                                 error("add_tree_backref failed: %s",
7823                                       strerror(-ret));
7824                                 continue;
7825                         }
7826
7827                         if (level > 1) {
7828                                 add_pending(nodes, seen, ptr, size);
7829                         } else {
7830                                 add_pending(pending, seen, ptr, size);
7831                         }
7832                 }
7833                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7834                                       nritems) * sizeof(struct btrfs_key_ptr);
7835         }
7836         total_btree_bytes += buf->len;
7837         if (fs_root_objectid(btrfs_header_owner(buf)))
7838                 total_fs_tree_bytes += buf->len;
7839         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7840                 total_extent_tree_bytes += buf->len;
7841         if (!found_old_backref &&
7842             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7843             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7844             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7845                 found_old_backref = 1;
7846 out:
7847         free_extent_buffer(buf);
7848         return ret;
7849 }
7850
7851 static int add_root_to_pending(struct extent_buffer *buf,
7852                                struct cache_tree *extent_cache,
7853                                struct cache_tree *pending,
7854                                struct cache_tree *seen,
7855                                struct cache_tree *nodes,
7856                                u64 objectid)
7857 {
7858         struct extent_record tmpl;
7859         int ret;
7860
7861         if (btrfs_header_level(buf) > 0)
7862                 add_pending(nodes, seen, buf->start, buf->len);
7863         else
7864                 add_pending(pending, seen, buf->start, buf->len);
7865
7866         memset(&tmpl, 0, sizeof(tmpl));
7867         tmpl.start = buf->start;
7868         tmpl.nr = buf->len;
7869         tmpl.is_root = 1;
7870         tmpl.refs = 1;
7871         tmpl.metadata = 1;
7872         tmpl.max_size = buf->len;
7873         add_extent_rec(extent_cache, &tmpl);
7874
7875         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7876             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7877                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7878                                 0, 1);
7879         else
7880                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7881                                 1);
7882         return ret;
7883 }
7884
7885 /* as we fix the tree, we might be deleting blocks that
7886  * we're tracking for repair.  This hook makes sure we
7887  * remove any backrefs for blocks as we are fixing them.
7888  */
7889 static int free_extent_hook(struct btrfs_trans_handle *trans,
7890                             struct btrfs_root *root,
7891                             u64 bytenr, u64 num_bytes, u64 parent,
7892                             u64 root_objectid, u64 owner, u64 offset,
7893                             int refs_to_drop)
7894 {
7895         struct extent_record *rec;
7896         struct cache_extent *cache;
7897         int is_data;
7898         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7899
7900         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7901         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7902         if (!cache)
7903                 return 0;
7904
7905         rec = container_of(cache, struct extent_record, cache);
7906         if (is_data) {
7907                 struct data_backref *back;
7908                 back = find_data_backref(rec, parent, root_objectid, owner,
7909                                          offset, 1, bytenr, num_bytes);
7910                 if (!back)
7911                         goto out;
7912                 if (back->node.found_ref) {
7913                         back->found_ref -= refs_to_drop;
7914                         if (rec->refs)
7915                                 rec->refs -= refs_to_drop;
7916                 }
7917                 if (back->node.found_extent_tree) {
7918                         back->num_refs -= refs_to_drop;
7919                         if (rec->extent_item_refs)
7920                                 rec->extent_item_refs -= refs_to_drop;
7921                 }
7922                 if (back->found_ref == 0)
7923                         back->node.found_ref = 0;
7924                 if (back->num_refs == 0)
7925                         back->node.found_extent_tree = 0;
7926
7927                 if (!back->node.found_extent_tree && back->node.found_ref) {
7928                         list_del(&back->node.list);
7929                         free(back);
7930                 }
7931         } else {
7932                 struct tree_backref *back;
7933                 back = find_tree_backref(rec, parent, root_objectid);
7934                 if (!back)
7935                         goto out;
7936                 if (back->node.found_ref) {
7937                         if (rec->refs)
7938                                 rec->refs--;
7939                         back->node.found_ref = 0;
7940                 }
7941                 if (back->node.found_extent_tree) {
7942                         if (rec->extent_item_refs)
7943                                 rec->extent_item_refs--;
7944                         back->node.found_extent_tree = 0;
7945                 }
7946                 if (!back->node.found_extent_tree && back->node.found_ref) {
7947                         list_del(&back->node.list);
7948                         free(back);
7949                 }
7950         }
7951         maybe_free_extent_rec(extent_cache, rec);
7952 out:
7953         return 0;
7954 }
7955
7956 static int delete_extent_records(struct btrfs_trans_handle *trans,
7957                                  struct btrfs_root *root,
7958                                  struct btrfs_path *path,
7959                                  u64 bytenr, u64 new_len)
7960 {
7961         struct btrfs_key key;
7962         struct btrfs_key found_key;
7963         struct extent_buffer *leaf;
7964         int ret;
7965         int slot;
7966
7967
7968         key.objectid = bytenr;
7969         key.type = (u8)-1;
7970         key.offset = (u64)-1;
7971
7972         while(1) {
7973                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7974                                         &key, path, 0, 1);
7975                 if (ret < 0)
7976                         break;
7977
7978                 if (ret > 0) {
7979                         ret = 0;
7980                         if (path->slots[0] == 0)
7981                                 break;
7982                         path->slots[0]--;
7983                 }
7984                 ret = 0;
7985
7986                 leaf = path->nodes[0];
7987                 slot = path->slots[0];
7988
7989                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7990                 if (found_key.objectid != bytenr)
7991                         break;
7992
7993                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7994                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7995                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7996                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7997                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7998                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7999                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8000                         btrfs_release_path(path);
8001                         if (found_key.type == 0) {
8002                                 if (found_key.offset == 0)
8003                                         break;
8004                                 key.offset = found_key.offset - 1;
8005                                 key.type = found_key.type;
8006                         }
8007                         key.type = found_key.type - 1;
8008                         key.offset = (u64)-1;
8009                         continue;
8010                 }
8011
8012                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8013                         found_key.objectid, found_key.type, found_key.offset);
8014
8015                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8016                 if (ret)
8017                         break;
8018                 btrfs_release_path(path);
8019
8020                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8021                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8022                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8023                                 found_key.offset : root->nodesize;
8024
8025                         ret = btrfs_update_block_group(trans, root, bytenr,
8026                                                        bytes, 0, 0);
8027                         if (ret)
8028                                 break;
8029                 }
8030         }
8031
8032         btrfs_release_path(path);
8033         return ret;
8034 }
8035
8036 /*
8037  * for a single backref, this will allocate a new extent
8038  * and add the backref to it.
8039  */
8040 static int record_extent(struct btrfs_trans_handle *trans,
8041                          struct btrfs_fs_info *info,
8042                          struct btrfs_path *path,
8043                          struct extent_record *rec,
8044                          struct extent_backref *back,
8045                          int allocated, u64 flags)
8046 {
8047         int ret;
8048         struct btrfs_root *extent_root = info->extent_root;
8049         struct extent_buffer *leaf;
8050         struct btrfs_key ins_key;
8051         struct btrfs_extent_item *ei;
8052         struct data_backref *dback;
8053         struct btrfs_tree_block_info *bi;
8054
8055         if (!back->is_data)
8056                 rec->max_size = max_t(u64, rec->max_size,
8057                                     info->extent_root->nodesize);
8058
8059         if (!allocated) {
8060                 u32 item_size = sizeof(*ei);
8061
8062                 if (!back->is_data)
8063                         item_size += sizeof(*bi);
8064
8065                 ins_key.objectid = rec->start;
8066                 ins_key.offset = rec->max_size;
8067                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8068
8069                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8070                                         &ins_key, item_size);
8071                 if (ret)
8072                         goto fail;
8073
8074                 leaf = path->nodes[0];
8075                 ei = btrfs_item_ptr(leaf, path->slots[0],
8076                                     struct btrfs_extent_item);
8077
8078                 btrfs_set_extent_refs(leaf, ei, 0);
8079                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8080
8081                 if (back->is_data) {
8082                         btrfs_set_extent_flags(leaf, ei,
8083                                                BTRFS_EXTENT_FLAG_DATA);
8084                 } else {
8085                         struct btrfs_disk_key copy_key;;
8086
8087                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8088                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8089                                              sizeof(*bi));
8090
8091                         btrfs_set_disk_key_objectid(&copy_key,
8092                                                     rec->info_objectid);
8093                         btrfs_set_disk_key_type(&copy_key, 0);
8094                         btrfs_set_disk_key_offset(&copy_key, 0);
8095
8096                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8097                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8098
8099                         btrfs_set_extent_flags(leaf, ei,
8100                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8101                 }
8102
8103                 btrfs_mark_buffer_dirty(leaf);
8104                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8105                                                rec->max_size, 1, 0);
8106                 if (ret)
8107                         goto fail;
8108                 btrfs_release_path(path);
8109         }
8110
8111         if (back->is_data) {
8112                 u64 parent;
8113                 int i;
8114
8115                 dback = to_data_backref(back);
8116                 if (back->full_backref)
8117                         parent = dback->parent;
8118                 else
8119                         parent = 0;
8120
8121                 for (i = 0; i < dback->found_ref; i++) {
8122                         /* if parent != 0, we're doing a full backref
8123                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8124                          * just makes the backref allocator create a data
8125                          * backref
8126                          */
8127                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8128                                                    rec->start, rec->max_size,
8129                                                    parent,
8130                                                    dback->root,
8131                                                    parent ?
8132                                                    BTRFS_FIRST_FREE_OBJECTID :
8133                                                    dback->owner,
8134                                                    dback->offset);
8135                         if (ret)
8136                                 break;
8137                 }
8138                 fprintf(stderr, "adding new data backref"
8139                                 " on %llu %s %llu owner %llu"
8140                                 " offset %llu found %d\n",
8141                                 (unsigned long long)rec->start,
8142                                 back->full_backref ?
8143                                 "parent" : "root",
8144                                 back->full_backref ?
8145                                 (unsigned long long)parent :
8146                                 (unsigned long long)dback->root,
8147                                 (unsigned long long)dback->owner,
8148                                 (unsigned long long)dback->offset,
8149                                 dback->found_ref);
8150         } else {
8151                 u64 parent;
8152                 struct tree_backref *tback;
8153
8154                 tback = to_tree_backref(back);
8155                 if (back->full_backref)
8156                         parent = tback->parent;
8157                 else
8158                         parent = 0;
8159
8160                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8161                                            rec->start, rec->max_size,
8162                                            parent, tback->root, 0, 0);
8163                 fprintf(stderr, "adding new tree backref on "
8164                         "start %llu len %llu parent %llu root %llu\n",
8165                         rec->start, rec->max_size, parent, tback->root);
8166         }
8167 fail:
8168         btrfs_release_path(path);
8169         return ret;
8170 }
8171
8172 static struct extent_entry *find_entry(struct list_head *entries,
8173                                        u64 bytenr, u64 bytes)
8174 {
8175         struct extent_entry *entry = NULL;
8176
8177         list_for_each_entry(entry, entries, list) {
8178                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8179                         return entry;
8180         }
8181
8182         return NULL;
8183 }
8184
8185 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8186 {
8187         struct extent_entry *entry, *best = NULL, *prev = NULL;
8188
8189         list_for_each_entry(entry, entries, list) {
8190                 /*
8191                  * If there are as many broken entries as entries then we know
8192                  * not to trust this particular entry.
8193                  */
8194                 if (entry->broken == entry->count)
8195                         continue;
8196
8197                 /*
8198                  * Special case, when there are only two entries and 'best' is
8199                  * the first one
8200                  */
8201                 if (!prev) {
8202                         best = entry;
8203                         prev = entry;
8204                         continue;
8205                 }
8206
8207                 /*
8208                  * If our current entry == best then we can't be sure our best
8209                  * is really the best, so we need to keep searching.
8210                  */
8211                 if (best && best->count == entry->count) {
8212                         prev = entry;
8213                         best = NULL;
8214                         continue;
8215                 }
8216
8217                 /* Prev == entry, not good enough, have to keep searching */
8218                 if (!prev->broken && prev->count == entry->count)
8219                         continue;
8220
8221                 if (!best)
8222                         best = (prev->count > entry->count) ? prev : entry;
8223                 else if (best->count < entry->count)
8224                         best = entry;
8225                 prev = entry;
8226         }
8227
8228         return best;
8229 }
8230
8231 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8232                       struct data_backref *dback, struct extent_entry *entry)
8233 {
8234         struct btrfs_trans_handle *trans;
8235         struct btrfs_root *root;
8236         struct btrfs_file_extent_item *fi;
8237         struct extent_buffer *leaf;
8238         struct btrfs_key key;
8239         u64 bytenr, bytes;
8240         int ret, err;
8241
8242         key.objectid = dback->root;
8243         key.type = BTRFS_ROOT_ITEM_KEY;
8244         key.offset = (u64)-1;
8245         root = btrfs_read_fs_root(info, &key);
8246         if (IS_ERR(root)) {
8247                 fprintf(stderr, "Couldn't find root for our ref\n");
8248                 return -EINVAL;
8249         }
8250
8251         /*
8252          * The backref points to the original offset of the extent if it was
8253          * split, so we need to search down to the offset we have and then walk
8254          * forward until we find the backref we're looking for.
8255          */
8256         key.objectid = dback->owner;
8257         key.type = BTRFS_EXTENT_DATA_KEY;
8258         key.offset = dback->offset;
8259         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8260         if (ret < 0) {
8261                 fprintf(stderr, "Error looking up ref %d\n", ret);
8262                 return ret;
8263         }
8264
8265         while (1) {
8266                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8267                         ret = btrfs_next_leaf(root, path);
8268                         if (ret) {
8269                                 fprintf(stderr, "Couldn't find our ref, next\n");
8270                                 return -EINVAL;
8271                         }
8272                 }
8273                 leaf = path->nodes[0];
8274                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8275                 if (key.objectid != dback->owner ||
8276                     key.type != BTRFS_EXTENT_DATA_KEY) {
8277                         fprintf(stderr, "Couldn't find our ref, search\n");
8278                         return -EINVAL;
8279                 }
8280                 fi = btrfs_item_ptr(leaf, path->slots[0],
8281                                     struct btrfs_file_extent_item);
8282                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8283                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8284
8285                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8286                         break;
8287                 path->slots[0]++;
8288         }
8289
8290         btrfs_release_path(path);
8291
8292         trans = btrfs_start_transaction(root, 1);
8293         if (IS_ERR(trans))
8294                 return PTR_ERR(trans);
8295
8296         /*
8297          * Ok we have the key of the file extent we want to fix, now we can cow
8298          * down to the thing and fix it.
8299          */
8300         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8301         if (ret < 0) {
8302                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8303                         key.objectid, key.type, key.offset, ret);
8304                 goto out;
8305         }
8306         if (ret > 0) {
8307                 fprintf(stderr, "Well that's odd, we just found this key "
8308                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8309                         key.offset);
8310                 ret = -EINVAL;
8311                 goto out;
8312         }
8313         leaf = path->nodes[0];
8314         fi = btrfs_item_ptr(leaf, path->slots[0],
8315                             struct btrfs_file_extent_item);
8316
8317         if (btrfs_file_extent_compression(leaf, fi) &&
8318             dback->disk_bytenr != entry->bytenr) {
8319                 fprintf(stderr, "Ref doesn't match the record start and is "
8320                         "compressed, please take a btrfs-image of this file "
8321                         "system and send it to a btrfs developer so they can "
8322                         "complete this functionality for bytenr %Lu\n",
8323                         dback->disk_bytenr);
8324                 ret = -EINVAL;
8325                 goto out;
8326         }
8327
8328         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8329                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8330         } else if (dback->disk_bytenr > entry->bytenr) {
8331                 u64 off_diff, offset;
8332
8333                 off_diff = dback->disk_bytenr - entry->bytenr;
8334                 offset = btrfs_file_extent_offset(leaf, fi);
8335                 if (dback->disk_bytenr + offset +
8336                     btrfs_file_extent_num_bytes(leaf, fi) >
8337                     entry->bytenr + entry->bytes) {
8338                         fprintf(stderr, "Ref is past the entry end, please "
8339                                 "take a btrfs-image of this file system and "
8340                                 "send it to a btrfs developer, ref %Lu\n",
8341                                 dback->disk_bytenr);
8342                         ret = -EINVAL;
8343                         goto out;
8344                 }
8345                 offset += off_diff;
8346                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8347                 btrfs_set_file_extent_offset(leaf, fi, offset);
8348         } else if (dback->disk_bytenr < entry->bytenr) {
8349                 u64 offset;
8350
8351                 offset = btrfs_file_extent_offset(leaf, fi);
8352                 if (dback->disk_bytenr + offset < entry->bytenr) {
8353                         fprintf(stderr, "Ref is before the entry start, please"
8354                                 " take a btrfs-image of this file system and "
8355                                 "send it to a btrfs developer, ref %Lu\n",
8356                                 dback->disk_bytenr);
8357                         ret = -EINVAL;
8358                         goto out;
8359                 }
8360
8361                 offset += dback->disk_bytenr;
8362                 offset -= entry->bytenr;
8363                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8364                 btrfs_set_file_extent_offset(leaf, fi, offset);
8365         }
8366
8367         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8368
8369         /*
8370          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8371          * only do this if we aren't using compression, otherwise it's a
8372          * trickier case.
8373          */
8374         if (!btrfs_file_extent_compression(leaf, fi))
8375                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8376         else
8377                 printf("ram bytes may be wrong?\n");
8378         btrfs_mark_buffer_dirty(leaf);
8379 out:
8380         err = btrfs_commit_transaction(trans, root);
8381         btrfs_release_path(path);
8382         return ret ? ret : err;
8383 }
8384
8385 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8386                            struct extent_record *rec)
8387 {
8388         struct extent_backref *back;
8389         struct data_backref *dback;
8390         struct extent_entry *entry, *best = NULL;
8391         LIST_HEAD(entries);
8392         int nr_entries = 0;
8393         int broken_entries = 0;
8394         int ret = 0;
8395         short mismatch = 0;
8396
8397         /*
8398          * Metadata is easy and the backrefs should always agree on bytenr and
8399          * size, if not we've got bigger issues.
8400          */
8401         if (rec->metadata)
8402                 return 0;
8403
8404         list_for_each_entry(back, &rec->backrefs, list) {
8405                 if (back->full_backref || !back->is_data)
8406                         continue;
8407
8408                 dback = to_data_backref(back);
8409
8410                 /*
8411                  * We only pay attention to backrefs that we found a real
8412                  * backref for.
8413                  */
8414                 if (dback->found_ref == 0)
8415                         continue;
8416
8417                 /*
8418                  * For now we only catch when the bytes don't match, not the
8419                  * bytenr.  We can easily do this at the same time, but I want
8420                  * to have a fs image to test on before we just add repair
8421                  * functionality willy-nilly so we know we won't screw up the
8422                  * repair.
8423                  */
8424
8425                 entry = find_entry(&entries, dback->disk_bytenr,
8426                                    dback->bytes);
8427                 if (!entry) {
8428                         entry = malloc(sizeof(struct extent_entry));
8429                         if (!entry) {
8430                                 ret = -ENOMEM;
8431                                 goto out;
8432                         }
8433                         memset(entry, 0, sizeof(*entry));
8434                         entry->bytenr = dback->disk_bytenr;
8435                         entry->bytes = dback->bytes;
8436                         list_add_tail(&entry->list, &entries);
8437                         nr_entries++;
8438                 }
8439
8440                 /*
8441                  * If we only have on entry we may think the entries agree when
8442                  * in reality they don't so we have to do some extra checking.
8443                  */
8444                 if (dback->disk_bytenr != rec->start ||
8445                     dback->bytes != rec->nr || back->broken)
8446                         mismatch = 1;
8447
8448                 if (back->broken) {
8449                         entry->broken++;
8450                         broken_entries++;
8451                 }
8452
8453                 entry->count++;
8454         }
8455
8456         /* Yay all the backrefs agree, carry on good sir */
8457         if (nr_entries <= 1 && !mismatch)
8458                 goto out;
8459
8460         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8461                 "%Lu\n", rec->start);
8462
8463         /*
8464          * First we want to see if the backrefs can agree amongst themselves who
8465          * is right, so figure out which one of the entries has the highest
8466          * count.
8467          */
8468         best = find_most_right_entry(&entries);
8469
8470         /*
8471          * Ok so we may have an even split between what the backrefs think, so
8472          * this is where we use the extent ref to see what it thinks.
8473          */
8474         if (!best) {
8475                 entry = find_entry(&entries, rec->start, rec->nr);
8476                 if (!entry && (!broken_entries || !rec->found_rec)) {
8477                         fprintf(stderr, "Backrefs don't agree with each other "
8478                                 "and extent record doesn't agree with anybody,"
8479                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8480                                 rec->start, rec->nr);
8481                         ret = -EINVAL;
8482                         goto out;
8483                 } else if (!entry) {
8484                         /*
8485                          * Ok our backrefs were broken, we'll assume this is the
8486                          * correct value and add an entry for this range.
8487                          */
8488                         entry = malloc(sizeof(struct extent_entry));
8489                         if (!entry) {
8490                                 ret = -ENOMEM;
8491                                 goto out;
8492                         }
8493                         memset(entry, 0, sizeof(*entry));
8494                         entry->bytenr = rec->start;
8495                         entry->bytes = rec->nr;
8496                         list_add_tail(&entry->list, &entries);
8497                         nr_entries++;
8498                 }
8499                 entry->count++;
8500                 best = find_most_right_entry(&entries);
8501                 if (!best) {
8502                         fprintf(stderr, "Backrefs and extent record evenly "
8503                                 "split on who is right, this is going to "
8504                                 "require user input to fix bytenr %Lu bytes "
8505                                 "%Lu\n", rec->start, rec->nr);
8506                         ret = -EINVAL;
8507                         goto out;
8508                 }
8509         }
8510
8511         /*
8512          * I don't think this can happen currently as we'll abort() if we catch
8513          * this case higher up, but in case somebody removes that we still can't
8514          * deal with it properly here yet, so just bail out of that's the case.
8515          */
8516         if (best->bytenr != rec->start) {
8517                 fprintf(stderr, "Extent start and backref starts don't match, "
8518                         "please use btrfs-image on this file system and send "
8519                         "it to a btrfs developer so they can make fsck fix "
8520                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8521                         rec->start, rec->nr);
8522                 ret = -EINVAL;
8523                 goto out;
8524         }
8525
8526         /*
8527          * Ok great we all agreed on an extent record, let's go find the real
8528          * references and fix up the ones that don't match.
8529          */
8530         list_for_each_entry(back, &rec->backrefs, list) {
8531                 if (back->full_backref || !back->is_data)
8532                         continue;
8533
8534                 dback = to_data_backref(back);
8535
8536                 /*
8537                  * Still ignoring backrefs that don't have a real ref attached
8538                  * to them.
8539                  */
8540                 if (dback->found_ref == 0)
8541                         continue;
8542
8543                 if (dback->bytes == best->bytes &&
8544                     dback->disk_bytenr == best->bytenr)
8545                         continue;
8546
8547                 ret = repair_ref(info, path, dback, best);
8548                 if (ret)
8549                         goto out;
8550         }
8551
8552         /*
8553          * Ok we messed with the actual refs, which means we need to drop our
8554          * entire cache and go back and rescan.  I know this is a huge pain and
8555          * adds a lot of extra work, but it's the only way to be safe.  Once all
8556          * the backrefs agree we may not need to do anything to the extent
8557          * record itself.
8558          */
8559         ret = -EAGAIN;
8560 out:
8561         while (!list_empty(&entries)) {
8562                 entry = list_entry(entries.next, struct extent_entry, list);
8563                 list_del_init(&entry->list);
8564                 free(entry);
8565         }
8566         return ret;
8567 }
8568
8569 static int process_duplicates(struct btrfs_root *root,
8570                               struct cache_tree *extent_cache,
8571                               struct extent_record *rec)
8572 {
8573         struct extent_record *good, *tmp;
8574         struct cache_extent *cache;
8575         int ret;
8576
8577         /*
8578          * If we found a extent record for this extent then return, or if we
8579          * have more than one duplicate we are likely going to need to delete
8580          * something.
8581          */
8582         if (rec->found_rec || rec->num_duplicates > 1)
8583                 return 0;
8584
8585         /* Shouldn't happen but just in case */
8586         BUG_ON(!rec->num_duplicates);
8587
8588         /*
8589          * So this happens if we end up with a backref that doesn't match the
8590          * actual extent entry.  So either the backref is bad or the extent
8591          * entry is bad.  Either way we want to have the extent_record actually
8592          * reflect what we found in the extent_tree, so we need to take the
8593          * duplicate out and use that as the extent_record since the only way we
8594          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8595          */
8596         remove_cache_extent(extent_cache, &rec->cache);
8597
8598         good = to_extent_record(rec->dups.next);
8599         list_del_init(&good->list);
8600         INIT_LIST_HEAD(&good->backrefs);
8601         INIT_LIST_HEAD(&good->dups);
8602         good->cache.start = good->start;
8603         good->cache.size = good->nr;
8604         good->content_checked = 0;
8605         good->owner_ref_checked = 0;
8606         good->num_duplicates = 0;
8607         good->refs = rec->refs;
8608         list_splice_init(&rec->backrefs, &good->backrefs);
8609         while (1) {
8610                 cache = lookup_cache_extent(extent_cache, good->start,
8611                                             good->nr);
8612                 if (!cache)
8613                         break;
8614                 tmp = container_of(cache, struct extent_record, cache);
8615
8616                 /*
8617                  * If we find another overlapping extent and it's found_rec is
8618                  * set then it's a duplicate and we need to try and delete
8619                  * something.
8620                  */
8621                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8622                         if (list_empty(&good->list))
8623                                 list_add_tail(&good->list,
8624                                               &duplicate_extents);
8625                         good->num_duplicates += tmp->num_duplicates + 1;
8626                         list_splice_init(&tmp->dups, &good->dups);
8627                         list_del_init(&tmp->list);
8628                         list_add_tail(&tmp->list, &good->dups);
8629                         remove_cache_extent(extent_cache, &tmp->cache);
8630                         continue;
8631                 }
8632
8633                 /*
8634                  * Ok we have another non extent item backed extent rec, so lets
8635                  * just add it to this extent and carry on like we did above.
8636                  */
8637                 good->refs += tmp->refs;
8638                 list_splice_init(&tmp->backrefs, &good->backrefs);
8639                 remove_cache_extent(extent_cache, &tmp->cache);
8640                 free(tmp);
8641         }
8642         ret = insert_cache_extent(extent_cache, &good->cache);
8643         BUG_ON(ret);
8644         free(rec);
8645         return good->num_duplicates ? 0 : 1;
8646 }
8647
8648 static int delete_duplicate_records(struct btrfs_root *root,
8649                                     struct extent_record *rec)
8650 {
8651         struct btrfs_trans_handle *trans;
8652         LIST_HEAD(delete_list);
8653         struct btrfs_path path;
8654         struct extent_record *tmp, *good, *n;
8655         int nr_del = 0;
8656         int ret = 0, err;
8657         struct btrfs_key key;
8658
8659         btrfs_init_path(&path);
8660
8661         good = rec;
8662         /* Find the record that covers all of the duplicates. */
8663         list_for_each_entry(tmp, &rec->dups, list) {
8664                 if (good->start < tmp->start)
8665                         continue;
8666                 if (good->nr > tmp->nr)
8667                         continue;
8668
8669                 if (tmp->start + tmp->nr < good->start + good->nr) {
8670                         fprintf(stderr, "Ok we have overlapping extents that "
8671                                 "aren't completely covered by each other, this "
8672                                 "is going to require more careful thought.  "
8673                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8674                                 tmp->start, tmp->nr, good->start, good->nr);
8675                         abort();
8676                 }
8677                 good = tmp;
8678         }
8679
8680         if (good != rec)
8681                 list_add_tail(&rec->list, &delete_list);
8682
8683         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8684                 if (tmp == good)
8685                         continue;
8686                 list_move_tail(&tmp->list, &delete_list);
8687         }
8688
8689         root = root->fs_info->extent_root;
8690         trans = btrfs_start_transaction(root, 1);
8691         if (IS_ERR(trans)) {
8692                 ret = PTR_ERR(trans);
8693                 goto out;
8694         }
8695
8696         list_for_each_entry(tmp, &delete_list, list) {
8697                 if (tmp->found_rec == 0)
8698                         continue;
8699                 key.objectid = tmp->start;
8700                 key.type = BTRFS_EXTENT_ITEM_KEY;
8701                 key.offset = tmp->nr;
8702
8703                 /* Shouldn't happen but just in case */
8704                 if (tmp->metadata) {
8705                         fprintf(stderr, "Well this shouldn't happen, extent "
8706                                 "record overlaps but is metadata? "
8707                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8708                         abort();
8709                 }
8710
8711                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8712                 if (ret) {
8713                         if (ret > 0)
8714                                 ret = -EINVAL;
8715                         break;
8716                 }
8717                 ret = btrfs_del_item(trans, root, &path);
8718                 if (ret)
8719                         break;
8720                 btrfs_release_path(&path);
8721                 nr_del++;
8722         }
8723         err = btrfs_commit_transaction(trans, root);
8724         if (err && !ret)
8725                 ret = err;
8726 out:
8727         while (!list_empty(&delete_list)) {
8728                 tmp = to_extent_record(delete_list.next);
8729                 list_del_init(&tmp->list);
8730                 if (tmp == rec)
8731                         continue;
8732                 free(tmp);
8733         }
8734
8735         while (!list_empty(&rec->dups)) {
8736                 tmp = to_extent_record(rec->dups.next);
8737                 list_del_init(&tmp->list);
8738                 free(tmp);
8739         }
8740
8741         btrfs_release_path(&path);
8742
8743         if (!ret && !nr_del)
8744                 rec->num_duplicates = 0;
8745
8746         return ret ? ret : nr_del;
8747 }
8748
8749 static int find_possible_backrefs(struct btrfs_fs_info *info,
8750                                   struct btrfs_path *path,
8751                                   struct cache_tree *extent_cache,
8752                                   struct extent_record *rec)
8753 {
8754         struct btrfs_root *root;
8755         struct extent_backref *back;
8756         struct data_backref *dback;
8757         struct cache_extent *cache;
8758         struct btrfs_file_extent_item *fi;
8759         struct btrfs_key key;
8760         u64 bytenr, bytes;
8761         int ret;
8762
8763         list_for_each_entry(back, &rec->backrefs, list) {
8764                 /* Don't care about full backrefs (poor unloved backrefs) */
8765                 if (back->full_backref || !back->is_data)
8766                         continue;
8767
8768                 dback = to_data_backref(back);
8769
8770                 /* We found this one, we don't need to do a lookup */
8771                 if (dback->found_ref)
8772                         continue;
8773
8774                 key.objectid = dback->root;
8775                 key.type = BTRFS_ROOT_ITEM_KEY;
8776                 key.offset = (u64)-1;
8777
8778                 root = btrfs_read_fs_root(info, &key);
8779
8780                 /* No root, definitely a bad ref, skip */
8781                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8782                         continue;
8783                 /* Other err, exit */
8784                 if (IS_ERR(root))
8785                         return PTR_ERR(root);
8786
8787                 key.objectid = dback->owner;
8788                 key.type = BTRFS_EXTENT_DATA_KEY;
8789                 key.offset = dback->offset;
8790                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8791                 if (ret) {
8792                         btrfs_release_path(path);
8793                         if (ret < 0)
8794                                 return ret;
8795                         /* Didn't find it, we can carry on */
8796                         ret = 0;
8797                         continue;
8798                 }
8799
8800                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8801                                     struct btrfs_file_extent_item);
8802                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8803                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8804                 btrfs_release_path(path);
8805                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8806                 if (cache) {
8807                         struct extent_record *tmp;
8808                         tmp = container_of(cache, struct extent_record, cache);
8809
8810                         /*
8811                          * If we found an extent record for the bytenr for this
8812                          * particular backref then we can't add it to our
8813                          * current extent record.  We only want to add backrefs
8814                          * that don't have a corresponding extent item in the
8815                          * extent tree since they likely belong to this record
8816                          * and we need to fix it if it doesn't match bytenrs.
8817                          */
8818                         if  (tmp->found_rec)
8819                                 continue;
8820                 }
8821
8822                 dback->found_ref += 1;
8823                 dback->disk_bytenr = bytenr;
8824                 dback->bytes = bytes;
8825
8826                 /*
8827                  * Set this so the verify backref code knows not to trust the
8828                  * values in this backref.
8829                  */
8830                 back->broken = 1;
8831         }
8832
8833         return 0;
8834 }
8835
8836 /*
8837  * Record orphan data ref into corresponding root.
8838  *
8839  * Return 0 if the extent item contains data ref and recorded.
8840  * Return 1 if the extent item contains no useful data ref
8841  *   On that case, it may contains only shared_dataref or metadata backref
8842  *   or the file extent exists(this should be handled by the extent bytenr
8843  *   recovery routine)
8844  * Return <0 if something goes wrong.
8845  */
8846 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8847                                       struct extent_record *rec)
8848 {
8849         struct btrfs_key key;
8850         struct btrfs_root *dest_root;
8851         struct extent_backref *back;
8852         struct data_backref *dback;
8853         struct orphan_data_extent *orphan;
8854         struct btrfs_path path;
8855         int recorded_data_ref = 0;
8856         int ret = 0;
8857
8858         if (rec->metadata)
8859                 return 1;
8860         btrfs_init_path(&path);
8861         list_for_each_entry(back, &rec->backrefs, list) {
8862                 if (back->full_backref || !back->is_data ||
8863                     !back->found_extent_tree)
8864                         continue;
8865                 dback = to_data_backref(back);
8866                 if (dback->found_ref)
8867                         continue;
8868                 key.objectid = dback->root;
8869                 key.type = BTRFS_ROOT_ITEM_KEY;
8870                 key.offset = (u64)-1;
8871
8872                 dest_root = btrfs_read_fs_root(fs_info, &key);
8873
8874                 /* For non-exist root we just skip it */
8875                 if (IS_ERR(dest_root) || !dest_root)
8876                         continue;
8877
8878                 key.objectid = dback->owner;
8879                 key.type = BTRFS_EXTENT_DATA_KEY;
8880                 key.offset = dback->offset;
8881
8882                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8883                 btrfs_release_path(&path);
8884                 /*
8885                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8886                  * we need to record it for inode/file extent rebuild.
8887                  * For ret > 0, we record it only for file extent rebuild.
8888                  * For ret == 0, the file extent exists but only bytenr
8889                  * mismatch, let the original bytenr fix routine to handle,
8890                  * don't record it.
8891                  */
8892                 if (ret == 0)
8893                         continue;
8894                 ret = 0;
8895                 orphan = malloc(sizeof(*orphan));
8896                 if (!orphan) {
8897                         ret = -ENOMEM;
8898                         goto out;
8899                 }
8900                 INIT_LIST_HEAD(&orphan->list);
8901                 orphan->root = dback->root;
8902                 orphan->objectid = dback->owner;
8903                 orphan->offset = dback->offset;
8904                 orphan->disk_bytenr = rec->cache.start;
8905                 orphan->disk_len = rec->cache.size;
8906                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8907                 recorded_data_ref = 1;
8908         }
8909 out:
8910         btrfs_release_path(&path);
8911         if (!ret)
8912                 return !recorded_data_ref;
8913         else
8914                 return ret;
8915 }
8916
8917 /*
8918  * when an incorrect extent item is found, this will delete
8919  * all of the existing entries for it and recreate them
8920  * based on what the tree scan found.
8921  */
8922 static int fixup_extent_refs(struct btrfs_fs_info *info,
8923                              struct cache_tree *extent_cache,
8924                              struct extent_record *rec)
8925 {
8926         struct btrfs_trans_handle *trans = NULL;
8927         int ret;
8928         struct btrfs_path path;
8929         struct list_head *cur = rec->backrefs.next;
8930         struct cache_extent *cache;
8931         struct extent_backref *back;
8932         int allocated = 0;
8933         u64 flags = 0;
8934
8935         if (rec->flag_block_full_backref)
8936                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8937
8938         btrfs_init_path(&path);
8939         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8940                 /*
8941                  * Sometimes the backrefs themselves are so broken they don't
8942                  * get attached to any meaningful rec, so first go back and
8943                  * check any of our backrefs that we couldn't find and throw
8944                  * them into the list if we find the backref so that
8945                  * verify_backrefs can figure out what to do.
8946                  */
8947                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8948                 if (ret < 0)
8949                         goto out;
8950         }
8951
8952         /* step one, make sure all of the backrefs agree */
8953         ret = verify_backrefs(info, &path, rec);
8954         if (ret < 0)
8955                 goto out;
8956
8957         trans = btrfs_start_transaction(info->extent_root, 1);
8958         if (IS_ERR(trans)) {
8959                 ret = PTR_ERR(trans);
8960                 goto out;
8961         }
8962
8963         /* step two, delete all the existing records */
8964         ret = delete_extent_records(trans, info->extent_root, &path,
8965                                     rec->start, rec->max_size);
8966
8967         if (ret < 0)
8968                 goto out;
8969
8970         /* was this block corrupt?  If so, don't add references to it */
8971         cache = lookup_cache_extent(info->corrupt_blocks,
8972                                     rec->start, rec->max_size);
8973         if (cache) {
8974                 ret = 0;
8975                 goto out;
8976         }
8977
8978         /* step three, recreate all the refs we did find */
8979         while(cur != &rec->backrefs) {
8980                 back = to_extent_backref(cur);
8981                 cur = cur->next;
8982
8983                 /*
8984                  * if we didn't find any references, don't create a
8985                  * new extent record
8986                  */
8987                 if (!back->found_ref)
8988                         continue;
8989
8990                 rec->bad_full_backref = 0;
8991                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8992                 allocated = 1;
8993
8994                 if (ret)
8995                         goto out;
8996         }
8997 out:
8998         if (trans) {
8999                 int err = btrfs_commit_transaction(trans, info->extent_root);
9000                 if (!ret)
9001                         ret = err;
9002         }
9003
9004         btrfs_release_path(&path);
9005         return ret;
9006 }
9007
9008 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9009                               struct extent_record *rec)
9010 {
9011         struct btrfs_trans_handle *trans;
9012         struct btrfs_root *root = fs_info->extent_root;
9013         struct btrfs_path path;
9014         struct btrfs_extent_item *ei;
9015         struct btrfs_key key;
9016         u64 flags;
9017         int ret = 0;
9018
9019         key.objectid = rec->start;
9020         if (rec->metadata) {
9021                 key.type = BTRFS_METADATA_ITEM_KEY;
9022                 key.offset = rec->info_level;
9023         } else {
9024                 key.type = BTRFS_EXTENT_ITEM_KEY;
9025                 key.offset = rec->max_size;
9026         }
9027
9028         trans = btrfs_start_transaction(root, 0);
9029         if (IS_ERR(trans))
9030                 return PTR_ERR(trans);
9031
9032         btrfs_init_path(&path);
9033         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9034         if (ret < 0) {
9035                 btrfs_release_path(&path);
9036                 btrfs_commit_transaction(trans, root);
9037                 return ret;
9038         } else if (ret) {
9039                 fprintf(stderr, "Didn't find extent for %llu\n",
9040                         (unsigned long long)rec->start);
9041                 btrfs_release_path(&path);
9042                 btrfs_commit_transaction(trans, root);
9043                 return -ENOENT;
9044         }
9045
9046         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9047                             struct btrfs_extent_item);
9048         flags = btrfs_extent_flags(path.nodes[0], ei);
9049         if (rec->flag_block_full_backref) {
9050                 fprintf(stderr, "setting full backref on %llu\n",
9051                         (unsigned long long)key.objectid);
9052                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9053         } else {
9054                 fprintf(stderr, "clearing full backref on %llu\n",
9055                         (unsigned long long)key.objectid);
9056                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9057         }
9058         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9059         btrfs_mark_buffer_dirty(path.nodes[0]);
9060         btrfs_release_path(&path);
9061         return btrfs_commit_transaction(trans, root);
9062 }
9063
9064 /* right now we only prune from the extent allocation tree */
9065 static int prune_one_block(struct btrfs_trans_handle *trans,
9066                            struct btrfs_fs_info *info,
9067                            struct btrfs_corrupt_block *corrupt)
9068 {
9069         int ret;
9070         struct btrfs_path path;
9071         struct extent_buffer *eb;
9072         u64 found;
9073         int slot;
9074         int nritems;
9075         int level = corrupt->level + 1;
9076
9077         btrfs_init_path(&path);
9078 again:
9079         /* we want to stop at the parent to our busted block */
9080         path.lowest_level = level;
9081
9082         ret = btrfs_search_slot(trans, info->extent_root,
9083                                 &corrupt->key, &path, -1, 1);
9084
9085         if (ret < 0)
9086                 goto out;
9087
9088         eb = path.nodes[level];
9089         if (!eb) {
9090                 ret = -ENOENT;
9091                 goto out;
9092         }
9093
9094         /*
9095          * hopefully the search gave us the block we want to prune,
9096          * lets try that first
9097          */
9098         slot = path.slots[level];
9099         found =  btrfs_node_blockptr(eb, slot);
9100         if (found == corrupt->cache.start)
9101                 goto del_ptr;
9102
9103         nritems = btrfs_header_nritems(eb);
9104
9105         /* the search failed, lets scan this node and hope we find it */
9106         for (slot = 0; slot < nritems; slot++) {
9107                 found =  btrfs_node_blockptr(eb, slot);
9108                 if (found == corrupt->cache.start)
9109                         goto del_ptr;
9110         }
9111         /*
9112          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9113          * to this block
9114          */
9115         if (eb == info->extent_root->node) {
9116                 ret = -ENOENT;
9117                 goto out;
9118         } else {
9119                 level++;
9120                 btrfs_release_path(&path);
9121                 goto again;
9122         }
9123
9124 del_ptr:
9125         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9126         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
9127
9128 out:
9129         btrfs_release_path(&path);
9130         return ret;
9131 }
9132
9133 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9134 {
9135         struct btrfs_trans_handle *trans = NULL;
9136         struct cache_extent *cache;
9137         struct btrfs_corrupt_block *corrupt;
9138
9139         while (1) {
9140                 cache = search_cache_extent(info->corrupt_blocks, 0);
9141                 if (!cache)
9142                         break;
9143                 if (!trans) {
9144                         trans = btrfs_start_transaction(info->extent_root, 1);
9145                         if (IS_ERR(trans))
9146                                 return PTR_ERR(trans);
9147                 }
9148                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9149                 prune_one_block(trans, info, corrupt);
9150                 remove_cache_extent(info->corrupt_blocks, cache);
9151         }
9152         if (trans)
9153                 return btrfs_commit_transaction(trans, info->extent_root);
9154         return 0;
9155 }
9156
9157 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9158 {
9159         struct btrfs_block_group_cache *cache;
9160         u64 start, end;
9161         int ret;
9162
9163         while (1) {
9164                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9165                                             &start, &end, EXTENT_DIRTY);
9166                 if (ret)
9167                         break;
9168                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9169                                    GFP_NOFS);
9170         }
9171
9172         start = 0;
9173         while (1) {
9174                 cache = btrfs_lookup_first_block_group(fs_info, start);
9175                 if (!cache)
9176                         break;
9177                 if (cache->cached)
9178                         cache->cached = 0;
9179                 start = cache->key.objectid + cache->key.offset;
9180         }
9181 }
9182
9183 static int check_extent_refs(struct btrfs_root *root,
9184                              struct cache_tree *extent_cache)
9185 {
9186         struct extent_record *rec;
9187         struct cache_extent *cache;
9188         int err = 0;
9189         int ret = 0;
9190         int fixed = 0;
9191         int had_dups = 0;
9192         int recorded = 0;
9193
9194         if (repair) {
9195                 /*
9196                  * if we're doing a repair, we have to make sure
9197                  * we don't allocate from the problem extents.
9198                  * In the worst case, this will be all the
9199                  * extents in the FS
9200                  */
9201                 cache = search_cache_extent(extent_cache, 0);
9202                 while(cache) {
9203                         rec = container_of(cache, struct extent_record, cache);
9204                         set_extent_dirty(root->fs_info->excluded_extents,
9205                                          rec->start,
9206                                          rec->start + rec->max_size - 1,
9207                                          GFP_NOFS);
9208                         cache = next_cache_extent(cache);
9209                 }
9210
9211                 /* pin down all the corrupted blocks too */
9212                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9213                 while(cache) {
9214                         set_extent_dirty(root->fs_info->excluded_extents,
9215                                          cache->start,
9216                                          cache->start + cache->size - 1,
9217                                          GFP_NOFS);
9218                         cache = next_cache_extent(cache);
9219                 }
9220                 prune_corrupt_blocks(root->fs_info);
9221                 reset_cached_block_groups(root->fs_info);
9222         }
9223
9224         reset_cached_block_groups(root->fs_info);
9225
9226         /*
9227          * We need to delete any duplicate entries we find first otherwise we
9228          * could mess up the extent tree when we have backrefs that actually
9229          * belong to a different extent item and not the weird duplicate one.
9230          */
9231         while (repair && !list_empty(&duplicate_extents)) {
9232                 rec = to_extent_record(duplicate_extents.next);
9233                 list_del_init(&rec->list);
9234
9235                 /* Sometimes we can find a backref before we find an actual
9236                  * extent, so we need to process it a little bit to see if there
9237                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9238                  * if this is a backref screwup.  If we need to delete stuff
9239                  * process_duplicates() will return 0, otherwise it will return
9240                  * 1 and we
9241                  */
9242                 if (process_duplicates(root, extent_cache, rec))
9243                         continue;
9244                 ret = delete_duplicate_records(root, rec);
9245                 if (ret < 0)
9246                         return ret;
9247                 /*
9248                  * delete_duplicate_records will return the number of entries
9249                  * deleted, so if it's greater than 0 then we know we actually
9250                  * did something and we need to remove.
9251                  */
9252                 if (ret)
9253                         had_dups = 1;
9254         }
9255
9256         if (had_dups)
9257                 return -EAGAIN;
9258
9259         while(1) {
9260                 int cur_err = 0;
9261
9262                 fixed = 0;
9263                 recorded = 0;
9264                 cache = search_cache_extent(extent_cache, 0);
9265                 if (!cache)
9266                         break;
9267                 rec = container_of(cache, struct extent_record, cache);
9268                 if (rec->num_duplicates) {
9269                         fprintf(stderr, "extent item %llu has multiple extent "
9270                                 "items\n", (unsigned long long)rec->start);
9271                         err = 1;
9272                         cur_err = 1;
9273                 }
9274
9275                 if (rec->refs != rec->extent_item_refs) {
9276                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9277                                 (unsigned long long)rec->start,
9278                                 (unsigned long long)rec->nr);
9279                         fprintf(stderr, "extent item %llu, found %llu\n",
9280                                 (unsigned long long)rec->extent_item_refs,
9281                                 (unsigned long long)rec->refs);
9282                         ret = record_orphan_data_extents(root->fs_info, rec);
9283                         if (ret < 0)
9284                                 goto repair_abort;
9285                         if (ret == 0) {
9286                                 recorded = 1;
9287                         } else {
9288                                 /*
9289                                  * we can't use the extent to repair file
9290                                  * extent, let the fallback method handle it.
9291                                  */
9292                                 if (!fixed && repair) {
9293                                         ret = fixup_extent_refs(
9294                                                         root->fs_info,
9295                                                         extent_cache, rec);
9296                                         if (ret)
9297                                                 goto repair_abort;
9298                                         fixed = 1;
9299                                 }
9300                         }
9301                         err = 1;
9302                         cur_err = 1;
9303                 }
9304                 if (all_backpointers_checked(rec, 1)) {
9305                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9306                                 (unsigned long long)rec->start,
9307                                 (unsigned long long)rec->nr);
9308
9309                         if (!fixed && !recorded && repair) {
9310                                 ret = fixup_extent_refs(root->fs_info,
9311                                                         extent_cache, rec);
9312                                 if (ret)
9313                                         goto repair_abort;
9314                                 fixed = 1;
9315                         }
9316                         cur_err = 1;
9317                         err = 1;
9318                 }
9319                 if (!rec->owner_ref_checked) {
9320                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9321                                 (unsigned long long)rec->start,
9322                                 (unsigned long long)rec->nr);
9323                         if (!fixed && !recorded && repair) {
9324                                 ret = fixup_extent_refs(root->fs_info,
9325                                                         extent_cache, rec);
9326                                 if (ret)
9327                                         goto repair_abort;
9328                                 fixed = 1;
9329                         }
9330                         err = 1;
9331                         cur_err = 1;
9332                 }
9333                 if (rec->bad_full_backref) {
9334                         fprintf(stderr, "bad full backref, on [%llu]\n",
9335                                 (unsigned long long)rec->start);
9336                         if (repair) {
9337                                 ret = fixup_extent_flags(root->fs_info, rec);
9338                                 if (ret)
9339                                         goto repair_abort;
9340                                 fixed = 1;
9341                         }
9342                         err = 1;
9343                         cur_err = 1;
9344                 }
9345                 /*
9346                  * Although it's not a extent ref's problem, we reuse this
9347                  * routine for error reporting.
9348                  * No repair function yet.
9349                  */
9350                 if (rec->crossing_stripes) {
9351                         fprintf(stderr,
9352                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9353                                 rec->start, rec->start + rec->max_size);
9354                         err = 1;
9355                         cur_err = 1;
9356                 }
9357
9358                 if (rec->wrong_chunk_type) {
9359                         fprintf(stderr,
9360                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9361                                 rec->start, rec->start + rec->max_size);
9362                         err = 1;
9363                         cur_err = 1;
9364                 }
9365
9366                 remove_cache_extent(extent_cache, cache);
9367                 free_all_extent_backrefs(rec);
9368                 if (!init_extent_tree && repair && (!cur_err || fixed))
9369                         clear_extent_dirty(root->fs_info->excluded_extents,
9370                                            rec->start,
9371                                            rec->start + rec->max_size - 1,
9372                                            GFP_NOFS);
9373                 free(rec);
9374         }
9375 repair_abort:
9376         if (repair) {
9377                 if (ret && ret != -EAGAIN) {
9378                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9379                         exit(1);
9380                 } else if (!ret) {
9381                         struct btrfs_trans_handle *trans;
9382
9383                         root = root->fs_info->extent_root;
9384                         trans = btrfs_start_transaction(root, 1);
9385                         if (IS_ERR(trans)) {
9386                                 ret = PTR_ERR(trans);
9387                                 goto repair_abort;
9388                         }
9389
9390                         btrfs_fix_block_accounting(trans, root);
9391                         ret = btrfs_commit_transaction(trans, root);
9392                         if (ret)
9393                                 goto repair_abort;
9394                 }
9395                 if (err)
9396                         fprintf(stderr, "repaired damaged extent references\n");
9397                 return ret;
9398         }
9399         return err;
9400 }
9401
9402 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9403 {
9404         u64 stripe_size;
9405
9406         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9407                 stripe_size = length;
9408                 stripe_size /= num_stripes;
9409         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9410                 stripe_size = length * 2;
9411                 stripe_size /= num_stripes;
9412         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9413                 stripe_size = length;
9414                 stripe_size /= (num_stripes - 1);
9415         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9416                 stripe_size = length;
9417                 stripe_size /= (num_stripes - 2);
9418         } else {
9419                 stripe_size = length;
9420         }
9421         return stripe_size;
9422 }
9423
9424 /*
9425  * Check the chunk with its block group/dev list ref:
9426  * Return 0 if all refs seems valid.
9427  * Return 1 if part of refs seems valid, need later check for rebuild ref
9428  * like missing block group and needs to search extent tree to rebuild them.
9429  * Return -1 if essential refs are missing and unable to rebuild.
9430  */
9431 static int check_chunk_refs(struct chunk_record *chunk_rec,
9432                             struct block_group_tree *block_group_cache,
9433                             struct device_extent_tree *dev_extent_cache,
9434                             int silent)
9435 {
9436         struct cache_extent *block_group_item;
9437         struct block_group_record *block_group_rec;
9438         struct cache_extent *dev_extent_item;
9439         struct device_extent_record *dev_extent_rec;
9440         u64 devid;
9441         u64 offset;
9442         u64 length;
9443         int metadump_v2 = 0;
9444         int i;
9445         int ret = 0;
9446
9447         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9448                                                chunk_rec->offset,
9449                                                chunk_rec->length);
9450         if (block_group_item) {
9451                 block_group_rec = container_of(block_group_item,
9452                                                struct block_group_record,
9453                                                cache);
9454                 if (chunk_rec->length != block_group_rec->offset ||
9455                     chunk_rec->offset != block_group_rec->objectid ||
9456                     (!metadump_v2 &&
9457                      chunk_rec->type_flags != block_group_rec->flags)) {
9458                         if (!silent)
9459                                 fprintf(stderr,
9460                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9461                                         chunk_rec->objectid,
9462                                         chunk_rec->type,
9463                                         chunk_rec->offset,
9464                                         chunk_rec->length,
9465                                         chunk_rec->offset,
9466                                         chunk_rec->type_flags,
9467                                         block_group_rec->objectid,
9468                                         block_group_rec->type,
9469                                         block_group_rec->offset,
9470                                         block_group_rec->offset,
9471                                         block_group_rec->objectid,
9472                                         block_group_rec->flags);
9473                         ret = -1;
9474                 } else {
9475                         list_del_init(&block_group_rec->list);
9476                         chunk_rec->bg_rec = block_group_rec;
9477                 }
9478         } else {
9479                 if (!silent)
9480                         fprintf(stderr,
9481                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9482                                 chunk_rec->objectid,
9483                                 chunk_rec->type,
9484                                 chunk_rec->offset,
9485                                 chunk_rec->length,
9486                                 chunk_rec->offset,
9487                                 chunk_rec->type_flags);
9488                 ret = 1;
9489         }
9490
9491         if (metadump_v2)
9492                 return ret;
9493
9494         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9495                                     chunk_rec->num_stripes);
9496         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9497                 devid = chunk_rec->stripes[i].devid;
9498                 offset = chunk_rec->stripes[i].offset;
9499                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9500                                                        devid, offset, length);
9501                 if (dev_extent_item) {
9502                         dev_extent_rec = container_of(dev_extent_item,
9503                                                 struct device_extent_record,
9504                                                 cache);
9505                         if (dev_extent_rec->objectid != devid ||
9506                             dev_extent_rec->offset != offset ||
9507                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9508                             dev_extent_rec->length != length) {
9509                                 if (!silent)
9510                                         fprintf(stderr,
9511                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9512                                                 chunk_rec->objectid,
9513                                                 chunk_rec->type,
9514                                                 chunk_rec->offset,
9515                                                 chunk_rec->stripes[i].devid,
9516                                                 chunk_rec->stripes[i].offset,
9517                                                 dev_extent_rec->objectid,
9518                                                 dev_extent_rec->offset,
9519                                                 dev_extent_rec->length);
9520                                 ret = -1;
9521                         } else {
9522                                 list_move(&dev_extent_rec->chunk_list,
9523                                           &chunk_rec->dextents);
9524                         }
9525                 } else {
9526                         if (!silent)
9527                                 fprintf(stderr,
9528                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9529                                         chunk_rec->objectid,
9530                                         chunk_rec->type,
9531                                         chunk_rec->offset,
9532                                         chunk_rec->stripes[i].devid,
9533                                         chunk_rec->stripes[i].offset);
9534                         ret = -1;
9535                 }
9536         }
9537         return ret;
9538 }
9539
9540 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9541 int check_chunks(struct cache_tree *chunk_cache,
9542                  struct block_group_tree *block_group_cache,
9543                  struct device_extent_tree *dev_extent_cache,
9544                  struct list_head *good, struct list_head *bad,
9545                  struct list_head *rebuild, int silent)
9546 {
9547         struct cache_extent *chunk_item;
9548         struct chunk_record *chunk_rec;
9549         struct block_group_record *bg_rec;
9550         struct device_extent_record *dext_rec;
9551         int err;
9552         int ret = 0;
9553
9554         chunk_item = first_cache_extent(chunk_cache);
9555         while (chunk_item) {
9556                 chunk_rec = container_of(chunk_item, struct chunk_record,
9557                                          cache);
9558                 err = check_chunk_refs(chunk_rec, block_group_cache,
9559                                        dev_extent_cache, silent);
9560                 if (err < 0)
9561                         ret = err;
9562                 if (err == 0 && good)
9563                         list_add_tail(&chunk_rec->list, good);
9564                 if (err > 0 && rebuild)
9565                         list_add_tail(&chunk_rec->list, rebuild);
9566                 if (err < 0 && bad)
9567                         list_add_tail(&chunk_rec->list, bad);
9568                 chunk_item = next_cache_extent(chunk_item);
9569         }
9570
9571         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9572                 if (!silent)
9573                         fprintf(stderr,
9574                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9575                                 bg_rec->objectid,
9576                                 bg_rec->offset,
9577                                 bg_rec->flags);
9578                 if (!ret)
9579                         ret = 1;
9580         }
9581
9582         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9583                             chunk_list) {
9584                 if (!silent)
9585                         fprintf(stderr,
9586                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9587                                 dext_rec->objectid,
9588                                 dext_rec->offset,
9589                                 dext_rec->length);
9590                 if (!ret)
9591                         ret = 1;
9592         }
9593         return ret;
9594 }
9595
9596
9597 static int check_device_used(struct device_record *dev_rec,
9598                              struct device_extent_tree *dext_cache)
9599 {
9600         struct cache_extent *cache;
9601         struct device_extent_record *dev_extent_rec;
9602         u64 total_byte = 0;
9603
9604         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9605         while (cache) {
9606                 dev_extent_rec = container_of(cache,
9607                                               struct device_extent_record,
9608                                               cache);
9609                 if (dev_extent_rec->objectid != dev_rec->devid)
9610                         break;
9611
9612                 list_del_init(&dev_extent_rec->device_list);
9613                 total_byte += dev_extent_rec->length;
9614                 cache = next_cache_extent(cache);
9615         }
9616
9617         if (total_byte != dev_rec->byte_used) {
9618                 fprintf(stderr,
9619                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9620                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9621                         dev_rec->type, dev_rec->offset);
9622                 return -1;
9623         } else {
9624                 return 0;
9625         }
9626 }
9627
9628 /* check btrfs_dev_item -> btrfs_dev_extent */
9629 static int check_devices(struct rb_root *dev_cache,
9630                          struct device_extent_tree *dev_extent_cache)
9631 {
9632         struct rb_node *dev_node;
9633         struct device_record *dev_rec;
9634         struct device_extent_record *dext_rec;
9635         int err;
9636         int ret = 0;
9637
9638         dev_node = rb_first(dev_cache);
9639         while (dev_node) {
9640                 dev_rec = container_of(dev_node, struct device_record, node);
9641                 err = check_device_used(dev_rec, dev_extent_cache);
9642                 if (err)
9643                         ret = err;
9644
9645                 dev_node = rb_next(dev_node);
9646         }
9647         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9648                             device_list) {
9649                 fprintf(stderr,
9650                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9651                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9652                 if (!ret)
9653                         ret = 1;
9654         }
9655         return ret;
9656 }
9657
9658 static int add_root_item_to_list(struct list_head *head,
9659                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9660                                   u8 level, u8 drop_level,
9661                                   int level_size, struct btrfs_key *drop_key)
9662 {
9663
9664         struct root_item_record *ri_rec;
9665         ri_rec = malloc(sizeof(*ri_rec));
9666         if (!ri_rec)
9667                 return -ENOMEM;
9668         ri_rec->bytenr = bytenr;
9669         ri_rec->objectid = objectid;
9670         ri_rec->level = level;
9671         ri_rec->level_size = level_size;
9672         ri_rec->drop_level = drop_level;
9673         ri_rec->last_snapshot = last_snapshot;
9674         if (drop_key)
9675                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9676         list_add_tail(&ri_rec->list, head);
9677
9678         return 0;
9679 }
9680
9681 static void free_root_item_list(struct list_head *list)
9682 {
9683         struct root_item_record *ri_rec;
9684
9685         while (!list_empty(list)) {
9686                 ri_rec = list_first_entry(list, struct root_item_record,
9687                                           list);
9688                 list_del_init(&ri_rec->list);
9689                 free(ri_rec);
9690         }
9691 }
9692
9693 static int deal_root_from_list(struct list_head *list,
9694                                struct btrfs_root *root,
9695                                struct block_info *bits,
9696                                int bits_nr,
9697                                struct cache_tree *pending,
9698                                struct cache_tree *seen,
9699                                struct cache_tree *reada,
9700                                struct cache_tree *nodes,
9701                                struct cache_tree *extent_cache,
9702                                struct cache_tree *chunk_cache,
9703                                struct rb_root *dev_cache,
9704                                struct block_group_tree *block_group_cache,
9705                                struct device_extent_tree *dev_extent_cache)
9706 {
9707         int ret = 0;
9708         u64 last;
9709
9710         while (!list_empty(list)) {
9711                 struct root_item_record *rec;
9712                 struct extent_buffer *buf;
9713                 rec = list_entry(list->next,
9714                                  struct root_item_record, list);
9715                 last = 0;
9716                 buf = read_tree_block(root->fs_info->tree_root,
9717                                       rec->bytenr, rec->level_size, 0);
9718                 if (!extent_buffer_uptodate(buf)) {
9719                         free_extent_buffer(buf);
9720                         ret = -EIO;
9721                         break;
9722                 }
9723                 ret = add_root_to_pending(buf, extent_cache, pending,
9724                                     seen, nodes, rec->objectid);
9725                 if (ret < 0)
9726                         break;
9727                 /*
9728                  * To rebuild extent tree, we need deal with snapshot
9729                  * one by one, otherwise we deal with node firstly which
9730                  * can maximize readahead.
9731                  */
9732                 while (1) {
9733                         ret = run_next_block(root, bits, bits_nr, &last,
9734                                              pending, seen, reada, nodes,
9735                                              extent_cache, chunk_cache,
9736                                              dev_cache, block_group_cache,
9737                                              dev_extent_cache, rec);
9738                         if (ret != 0)
9739                                 break;
9740                 }
9741                 free_extent_buffer(buf);
9742                 list_del(&rec->list);
9743                 free(rec);
9744                 if (ret < 0)
9745                         break;
9746         }
9747         while (ret >= 0) {
9748                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9749                                      reada, nodes, extent_cache, chunk_cache,
9750                                      dev_cache, block_group_cache,
9751                                      dev_extent_cache, NULL);
9752                 if (ret != 0) {
9753                         if (ret > 0)
9754                                 ret = 0;
9755                         break;
9756                 }
9757         }
9758         return ret;
9759 }
9760
9761 static int check_chunks_and_extents(struct btrfs_root *root)
9762 {
9763         struct rb_root dev_cache;
9764         struct cache_tree chunk_cache;
9765         struct block_group_tree block_group_cache;
9766         struct device_extent_tree dev_extent_cache;
9767         struct cache_tree extent_cache;
9768         struct cache_tree seen;
9769         struct cache_tree pending;
9770         struct cache_tree reada;
9771         struct cache_tree nodes;
9772         struct extent_io_tree excluded_extents;
9773         struct cache_tree corrupt_blocks;
9774         struct btrfs_path path;
9775         struct btrfs_key key;
9776         struct btrfs_key found_key;
9777         int ret, err = 0;
9778         struct block_info *bits;
9779         int bits_nr;
9780         struct extent_buffer *leaf;
9781         int slot;
9782         struct btrfs_root_item ri;
9783         struct list_head dropping_trees;
9784         struct list_head normal_trees;
9785         struct btrfs_root *root1;
9786         u64 objectid;
9787         u32 level_size;
9788         u8 level;
9789
9790         dev_cache = RB_ROOT;
9791         cache_tree_init(&chunk_cache);
9792         block_group_tree_init(&block_group_cache);
9793         device_extent_tree_init(&dev_extent_cache);
9794
9795         cache_tree_init(&extent_cache);
9796         cache_tree_init(&seen);
9797         cache_tree_init(&pending);
9798         cache_tree_init(&nodes);
9799         cache_tree_init(&reada);
9800         cache_tree_init(&corrupt_blocks);
9801         extent_io_tree_init(&excluded_extents);
9802         INIT_LIST_HEAD(&dropping_trees);
9803         INIT_LIST_HEAD(&normal_trees);
9804
9805         if (repair) {
9806                 root->fs_info->excluded_extents = &excluded_extents;
9807                 root->fs_info->fsck_extent_cache = &extent_cache;
9808                 root->fs_info->free_extent_hook = free_extent_hook;
9809                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9810         }
9811
9812         bits_nr = 1024;
9813         bits = malloc(bits_nr * sizeof(struct block_info));
9814         if (!bits) {
9815                 perror("malloc");
9816                 exit(1);
9817         }
9818
9819         if (ctx.progress_enabled) {
9820                 ctx.tp = TASK_EXTENTS;
9821                 task_start(ctx.info);
9822         }
9823
9824 again:
9825         root1 = root->fs_info->tree_root;
9826         level = btrfs_header_level(root1->node);
9827         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9828                                     root1->node->start, 0, level, 0,
9829                                     root1->nodesize, NULL);
9830         if (ret < 0)
9831                 goto out;
9832         root1 = root->fs_info->chunk_root;
9833         level = btrfs_header_level(root1->node);
9834         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9835                                     root1->node->start, 0, level, 0,
9836                                     root1->nodesize, NULL);
9837         if (ret < 0)
9838                 goto out;
9839         btrfs_init_path(&path);
9840         key.offset = 0;
9841         key.objectid = 0;
9842         key.type = BTRFS_ROOT_ITEM_KEY;
9843         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9844                                         &key, &path, 0, 0);
9845         if (ret < 0)
9846                 goto out;
9847         while(1) {
9848                 leaf = path.nodes[0];
9849                 slot = path.slots[0];
9850                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9851                         ret = btrfs_next_leaf(root, &path);
9852                         if (ret != 0)
9853                                 break;
9854                         leaf = path.nodes[0];
9855                         slot = path.slots[0];
9856                 }
9857                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9858                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9859                         unsigned long offset;
9860                         u64 last_snapshot;
9861
9862                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9863                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9864                         last_snapshot = btrfs_root_last_snapshot(&ri);
9865                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9866                                 level = btrfs_root_level(&ri);
9867                                 level_size = root->nodesize;
9868                                 ret = add_root_item_to_list(&normal_trees,
9869                                                 found_key.objectid,
9870                                                 btrfs_root_bytenr(&ri),
9871                                                 last_snapshot, level,
9872                                                 0, level_size, NULL);
9873                                 if (ret < 0)
9874                                         goto out;
9875                         } else {
9876                                 level = btrfs_root_level(&ri);
9877                                 level_size = root->nodesize;
9878                                 objectid = found_key.objectid;
9879                                 btrfs_disk_key_to_cpu(&found_key,
9880                                                       &ri.drop_progress);
9881                                 ret = add_root_item_to_list(&dropping_trees,
9882                                                 objectid,
9883                                                 btrfs_root_bytenr(&ri),
9884                                                 last_snapshot, level,
9885                                                 ri.drop_level,
9886                                                 level_size, &found_key);
9887                                 if (ret < 0)
9888                                         goto out;
9889                         }
9890                 }
9891                 path.slots[0]++;
9892         }
9893         btrfs_release_path(&path);
9894
9895         /*
9896          * check_block can return -EAGAIN if it fixes something, please keep
9897          * this in mind when dealing with return values from these functions, if
9898          * we get -EAGAIN we want to fall through and restart the loop.
9899          */
9900         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9901                                   &seen, &reada, &nodes, &extent_cache,
9902                                   &chunk_cache, &dev_cache, &block_group_cache,
9903                                   &dev_extent_cache);
9904         if (ret < 0) {
9905                 if (ret == -EAGAIN)
9906                         goto loop;
9907                 goto out;
9908         }
9909         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9910                                   &pending, &seen, &reada, &nodes,
9911                                   &extent_cache, &chunk_cache, &dev_cache,
9912                                   &block_group_cache, &dev_extent_cache);
9913         if (ret < 0) {
9914                 if (ret == -EAGAIN)
9915                         goto loop;
9916                 goto out;
9917         }
9918
9919         ret = check_chunks(&chunk_cache, &block_group_cache,
9920                            &dev_extent_cache, NULL, NULL, NULL, 0);
9921         if (ret) {
9922                 if (ret == -EAGAIN)
9923                         goto loop;
9924                 err = ret;
9925         }
9926
9927         ret = check_extent_refs(root, &extent_cache);
9928         if (ret < 0) {
9929                 if (ret == -EAGAIN)
9930                         goto loop;
9931                 goto out;
9932         }
9933
9934         ret = check_devices(&dev_cache, &dev_extent_cache);
9935         if (ret && err)
9936                 ret = err;
9937
9938 out:
9939         task_stop(ctx.info);
9940         if (repair) {
9941                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9942                 extent_io_tree_cleanup(&excluded_extents);
9943                 root->fs_info->fsck_extent_cache = NULL;
9944                 root->fs_info->free_extent_hook = NULL;
9945                 root->fs_info->corrupt_blocks = NULL;
9946                 root->fs_info->excluded_extents = NULL;
9947         }
9948         free(bits);
9949         free_chunk_cache_tree(&chunk_cache);
9950         free_device_cache_tree(&dev_cache);
9951         free_block_group_tree(&block_group_cache);
9952         free_device_extent_tree(&dev_extent_cache);
9953         free_extent_cache_tree(&seen);
9954         free_extent_cache_tree(&pending);
9955         free_extent_cache_tree(&reada);
9956         free_extent_cache_tree(&nodes);
9957         return ret;
9958 loop:
9959         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9960         free_extent_cache_tree(&seen);
9961         free_extent_cache_tree(&pending);
9962         free_extent_cache_tree(&reada);
9963         free_extent_cache_tree(&nodes);
9964         free_chunk_cache_tree(&chunk_cache);
9965         free_block_group_tree(&block_group_cache);
9966         free_device_cache_tree(&dev_cache);
9967         free_device_extent_tree(&dev_extent_cache);
9968         free_extent_record_cache(root->fs_info, &extent_cache);
9969         free_root_item_list(&normal_trees);
9970         free_root_item_list(&dropping_trees);
9971         extent_io_tree_cleanup(&excluded_extents);
9972         goto again;
9973 }
9974
9975 /*
9976  * Check backrefs of a tree block given by @bytenr or @eb.
9977  *
9978  * @root:       the root containing the @bytenr or @eb
9979  * @eb:         tree block extent buffer, can be NULL
9980  * @bytenr:     bytenr of the tree block to search
9981  * @level:      tree level of the tree block
9982  * @owner:      owner of the tree block
9983  *
9984  * Return >0 for any error found and output error message
9985  * Return 0 for no error found
9986  */
9987 static int check_tree_block_ref(struct btrfs_root *root,
9988                                 struct extent_buffer *eb, u64 bytenr,
9989                                 int level, u64 owner)
9990 {
9991         struct btrfs_key key;
9992         struct btrfs_root *extent_root = root->fs_info->extent_root;
9993         struct btrfs_path path;
9994         struct btrfs_extent_item *ei;
9995         struct btrfs_extent_inline_ref *iref;
9996         struct extent_buffer *leaf;
9997         unsigned long end;
9998         unsigned long ptr;
9999         int slot;
10000         int skinny_level;
10001         int type;
10002         u32 nodesize = root->nodesize;
10003         u32 item_size;
10004         u64 offset;
10005         int found_ref = 0;
10006         int err = 0;
10007         int ret;
10008
10009         btrfs_init_path(&path);
10010         key.objectid = bytenr;
10011         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10012                 key.type = BTRFS_METADATA_ITEM_KEY;
10013         else
10014                 key.type = BTRFS_EXTENT_ITEM_KEY;
10015         key.offset = (u64)-1;
10016
10017         /* Search for the backref in extent tree */
10018         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10019         if (ret < 0) {
10020                 err |= BACKREF_MISSING;
10021                 goto out;
10022         }
10023         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10024         if (ret) {
10025                 err |= BACKREF_MISSING;
10026                 goto out;
10027         }
10028
10029         leaf = path.nodes[0];
10030         slot = path.slots[0];
10031         btrfs_item_key_to_cpu(leaf, &key, slot);
10032
10033         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10034
10035         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10036                 skinny_level = (int)key.offset;
10037                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10038         } else {
10039                 struct btrfs_tree_block_info *info;
10040
10041                 info = (struct btrfs_tree_block_info *)(ei + 1);
10042                 skinny_level = btrfs_tree_block_level(leaf, info);
10043                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10044         }
10045
10046         if (eb) {
10047                 u64 header_gen;
10048                 u64 extent_gen;
10049
10050                 if (!(btrfs_extent_flags(leaf, ei) &
10051                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10052                         error(
10053                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10054                                 key.objectid, nodesize,
10055                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10056                         err = BACKREF_MISMATCH;
10057                 }
10058                 header_gen = btrfs_header_generation(eb);
10059                 extent_gen = btrfs_extent_generation(leaf, ei);
10060                 if (header_gen != extent_gen) {
10061                         error(
10062         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10063                                 key.objectid, nodesize, header_gen,
10064                                 extent_gen);
10065                         err = BACKREF_MISMATCH;
10066                 }
10067                 if (level != skinny_level) {
10068                         error(
10069                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10070                                 key.objectid, nodesize, level, skinny_level);
10071                         err = BACKREF_MISMATCH;
10072                 }
10073                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10074                         error(
10075                         "extent[%llu %u] is referred by other roots than %llu",
10076                                 key.objectid, nodesize, root->objectid);
10077                         err = BACKREF_MISMATCH;
10078                 }
10079         }
10080
10081         /*
10082          * Iterate the extent/metadata item to find the exact backref
10083          */
10084         item_size = btrfs_item_size_nr(leaf, slot);
10085         ptr = (unsigned long)iref;
10086         end = (unsigned long)ei + item_size;
10087         while (ptr < end) {
10088                 iref = (struct btrfs_extent_inline_ref *)ptr;
10089                 type = btrfs_extent_inline_ref_type(leaf, iref);
10090                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10091
10092                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10093                         (offset == root->objectid || offset == owner)) {
10094                         found_ref = 1;
10095                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10096                         /* Check if the backref points to valid referencer */
10097                         found_ref = !check_tree_block_ref(root, NULL, offset,
10098                                                           level + 1, owner);
10099                 }
10100
10101                 if (found_ref)
10102                         break;
10103                 ptr += btrfs_extent_inline_ref_size(type);
10104         }
10105
10106         /*
10107          * Inlined extent item doesn't have what we need, check
10108          * TREE_BLOCK_REF_KEY
10109          */
10110         if (!found_ref) {
10111                 btrfs_release_path(&path);
10112                 key.objectid = bytenr;
10113                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10114                 key.offset = root->objectid;
10115
10116                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10117                 if (!ret)
10118                         found_ref = 1;
10119         }
10120         if (!found_ref)
10121                 err |= BACKREF_MISSING;
10122 out:
10123         btrfs_release_path(&path);
10124         if (eb && (err & BACKREF_MISSING))
10125                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10126                         bytenr, nodesize, owner, level);
10127         return err;
10128 }
10129
10130 /*
10131  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10132  *
10133  * Return >0 any error found and output error message
10134  * Return 0 for no error found
10135  */
10136 static int check_extent_data_item(struct btrfs_root *root,
10137                                   struct extent_buffer *eb, int slot)
10138 {
10139         struct btrfs_file_extent_item *fi;
10140         struct btrfs_path path;
10141         struct btrfs_root *extent_root = root->fs_info->extent_root;
10142         struct btrfs_key fi_key;
10143         struct btrfs_key dbref_key;
10144         struct extent_buffer *leaf;
10145         struct btrfs_extent_item *ei;
10146         struct btrfs_extent_inline_ref *iref;
10147         struct btrfs_extent_data_ref *dref;
10148         u64 owner;
10149         u64 file_extent_gen;
10150         u64 disk_bytenr;
10151         u64 disk_num_bytes;
10152         u64 extent_num_bytes;
10153         u64 extent_flags;
10154         u64 extent_gen;
10155         u32 item_size;
10156         unsigned long end;
10157         unsigned long ptr;
10158         int type;
10159         u64 ref_root;
10160         int found_dbackref = 0;
10161         int err = 0;
10162         int ret;
10163
10164         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10165         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10166         file_extent_gen = btrfs_file_extent_generation(eb, fi);
10167
10168         /* Nothing to check for hole and inline data extents */
10169         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10170             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10171                 return 0;
10172
10173         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10174         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10175         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10176
10177         /* Check unaligned disk_num_bytes and num_bytes */
10178         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10179                 error(
10180 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10181                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10182                         root->sectorsize);
10183                 err |= BYTES_UNALIGNED;
10184         } else {
10185                 data_bytes_allocated += disk_num_bytes;
10186         }
10187         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10188                 error(
10189 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10190                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10191                         root->sectorsize);
10192                 err |= BYTES_UNALIGNED;
10193         } else {
10194                 data_bytes_referenced += extent_num_bytes;
10195         }
10196         owner = btrfs_header_owner(eb);
10197
10198         /* Check the extent item of the file extent in extent tree */
10199         btrfs_init_path(&path);
10200         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10201         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10202         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10203
10204         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10205         if (ret) {
10206                 err |= BACKREF_MISSING;
10207                 goto error;
10208         }
10209
10210         leaf = path.nodes[0];
10211         slot = path.slots[0];
10212         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10213
10214         extent_flags = btrfs_extent_flags(leaf, ei);
10215         extent_gen = btrfs_extent_generation(leaf, ei);
10216
10217         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10218                 error(
10219                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10220                     disk_bytenr, disk_num_bytes,
10221                     BTRFS_EXTENT_FLAG_DATA);
10222                 err |= BACKREF_MISMATCH;
10223         }
10224
10225         if (file_extent_gen < extent_gen) {
10226                 error(
10227 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
10228                         disk_bytenr, disk_num_bytes, file_extent_gen,
10229                         extent_gen);
10230                 err |= BACKREF_MISMATCH;
10231         }
10232
10233         /* Check data backref inside that extent item */
10234         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10235         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10236         ptr = (unsigned long)iref;
10237         end = (unsigned long)ei + item_size;
10238         while (ptr < end) {
10239                 iref = (struct btrfs_extent_inline_ref *)ptr;
10240                 type = btrfs_extent_inline_ref_type(leaf, iref);
10241                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10242
10243                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10244                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10245                         if (ref_root == owner || ref_root == root->objectid)
10246                                 found_dbackref = 1;
10247                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10248                         found_dbackref = !check_tree_block_ref(root, NULL,
10249                                 btrfs_extent_inline_ref_offset(leaf, iref),
10250                                 0, owner);
10251                 }
10252
10253                 if (found_dbackref)
10254                         break;
10255                 ptr += btrfs_extent_inline_ref_size(type);
10256         }
10257
10258         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10259         if (!found_dbackref) {
10260                 btrfs_release_path(&path);
10261
10262                 btrfs_init_path(&path);
10263                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10264                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10265                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10266                                 fi_key.objectid, fi_key.offset);
10267
10268                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10269                                         &dbref_key, &path, 0, 0);
10270                 if (!ret)
10271                         found_dbackref = 1;
10272         }
10273
10274         if (!found_dbackref)
10275                 err |= BACKREF_MISSING;
10276 error:
10277         btrfs_release_path(&path);
10278         if (err & BACKREF_MISSING) {
10279                 error("data extent[%llu %llu] backref lost",
10280                       disk_bytenr, disk_num_bytes);
10281         }
10282         return err;
10283 }
10284
10285 /*
10286  * Get real tree block level for the case like shared block
10287  * Return >= 0 as tree level
10288  * Return <0 for error
10289  */
10290 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10291 {
10292         struct extent_buffer *eb;
10293         struct btrfs_path path;
10294         struct btrfs_key key;
10295         struct btrfs_extent_item *ei;
10296         u64 flags;
10297         u64 transid;
10298         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10299         u8 backref_level;
10300         u8 header_level;
10301         int ret;
10302
10303         /* Search extent tree for extent generation and level */
10304         key.objectid = bytenr;
10305         key.type = BTRFS_METADATA_ITEM_KEY;
10306         key.offset = (u64)-1;
10307
10308         btrfs_init_path(&path);
10309         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10310         if (ret < 0)
10311                 goto release_out;
10312         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10313         if (ret < 0)
10314                 goto release_out;
10315         if (ret > 0) {
10316                 ret = -ENOENT;
10317                 goto release_out;
10318         }
10319
10320         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10321         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10322                             struct btrfs_extent_item);
10323         flags = btrfs_extent_flags(path.nodes[0], ei);
10324         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10325                 ret = -ENOENT;
10326                 goto release_out;
10327         }
10328
10329         /* Get transid for later read_tree_block() check */
10330         transid = btrfs_extent_generation(path.nodes[0], ei);
10331
10332         /* Get backref level as one source */
10333         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10334                 backref_level = key.offset;
10335         } else {
10336                 struct btrfs_tree_block_info *info;
10337
10338                 info = (struct btrfs_tree_block_info *)(ei + 1);
10339                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10340         }
10341         btrfs_release_path(&path);
10342
10343         /* Get level from tree block as an alternative source */
10344         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10345         if (!extent_buffer_uptodate(eb)) {
10346                 free_extent_buffer(eb);
10347                 return -EIO;
10348         }
10349         header_level = btrfs_header_level(eb);
10350         free_extent_buffer(eb);
10351
10352         if (header_level != backref_level)
10353                 return -EIO;
10354         return header_level;
10355
10356 release_out:
10357         btrfs_release_path(&path);
10358         return ret;
10359 }
10360
10361 /*
10362  * Check if a tree block backref is valid (points to a valid tree block)
10363  * if level == -1, level will be resolved
10364  * Return >0 for any error found and print error message
10365  */
10366 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10367                                     u64 bytenr, int level)
10368 {
10369         struct btrfs_root *root;
10370         struct btrfs_key key;
10371         struct btrfs_path path;
10372         struct extent_buffer *eb;
10373         struct extent_buffer *node;
10374         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10375         int err = 0;
10376         int ret;
10377
10378         /* Query level for level == -1 special case */
10379         if (level == -1)
10380                 level = query_tree_block_level(fs_info, bytenr);
10381         if (level < 0) {
10382                 err |= REFERENCER_MISSING;
10383                 goto out;
10384         }
10385
10386         key.objectid = root_id;
10387         key.type = BTRFS_ROOT_ITEM_KEY;
10388         key.offset = (u64)-1;
10389
10390         root = btrfs_read_fs_root(fs_info, &key);
10391         if (IS_ERR(root)) {
10392                 err |= REFERENCER_MISSING;
10393                 goto out;
10394         }
10395
10396         /* Read out the tree block to get item/node key */
10397         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10398         if (!extent_buffer_uptodate(eb)) {
10399                 err |= REFERENCER_MISSING;
10400                 free_extent_buffer(eb);
10401                 goto out;
10402         }
10403
10404         /* Empty tree, no need to check key */
10405         if (!btrfs_header_nritems(eb) && !level) {
10406                 free_extent_buffer(eb);
10407                 goto out;
10408         }
10409
10410         if (level)
10411                 btrfs_node_key_to_cpu(eb, &key, 0);
10412         else
10413                 btrfs_item_key_to_cpu(eb, &key, 0);
10414
10415         free_extent_buffer(eb);
10416
10417         btrfs_init_path(&path);
10418         path.lowest_level = level;
10419         /* Search with the first key, to ensure we can reach it */
10420         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10421         if (ret < 0) {
10422                 err |= REFERENCER_MISSING;
10423                 goto release_out;
10424         }
10425
10426         node = path.nodes[level];
10427         if (btrfs_header_bytenr(node) != bytenr) {
10428                 error(
10429         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10430                         bytenr, nodesize, bytenr,
10431                         btrfs_header_bytenr(node));
10432                 err |= REFERENCER_MISMATCH;
10433         }
10434         if (btrfs_header_level(node) != level) {
10435                 error(
10436         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10437                         bytenr, nodesize, level,
10438                         btrfs_header_level(node));
10439                 err |= REFERENCER_MISMATCH;
10440         }
10441
10442 release_out:
10443         btrfs_release_path(&path);
10444 out:
10445         if (err & REFERENCER_MISSING) {
10446                 if (level < 0)
10447                         error("extent [%llu %d] lost referencer (owner: %llu)",
10448                                 bytenr, nodesize, root_id);
10449                 else
10450                         error(
10451                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10452                                 bytenr, nodesize, root_id, level);
10453         }
10454
10455         return err;
10456 }
10457
10458 /*
10459  * Check referencer for shared block backref
10460  * If level == -1, this function will resolve the level.
10461  */
10462 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10463                                      u64 parent, u64 bytenr, int level)
10464 {
10465         struct extent_buffer *eb;
10466         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10467         u32 nr;
10468         int found_parent = 0;
10469         int i;
10470
10471         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10472         if (!extent_buffer_uptodate(eb))
10473                 goto out;
10474
10475         if (level == -1)
10476                 level = query_tree_block_level(fs_info, bytenr);
10477         if (level < 0)
10478                 goto out;
10479
10480         if (level + 1 != btrfs_header_level(eb))
10481                 goto out;
10482
10483         nr = btrfs_header_nritems(eb);
10484         for (i = 0; i < nr; i++) {
10485                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10486                         found_parent = 1;
10487                         break;
10488                 }
10489         }
10490 out:
10491         free_extent_buffer(eb);
10492         if (!found_parent) {
10493                 error(
10494         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10495                         bytenr, nodesize, parent, level);
10496                 return REFERENCER_MISSING;
10497         }
10498         return 0;
10499 }
10500
10501 /*
10502  * Check referencer for normal (inlined) data ref
10503  * If len == 0, it will be resolved by searching in extent tree
10504  */
10505 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10506                                      u64 root_id, u64 objectid, u64 offset,
10507                                      u64 bytenr, u64 len, u32 count)
10508 {
10509         struct btrfs_root *root;
10510         struct btrfs_root *extent_root = fs_info->extent_root;
10511         struct btrfs_key key;
10512         struct btrfs_path path;
10513         struct extent_buffer *leaf;
10514         struct btrfs_file_extent_item *fi;
10515         u32 found_count = 0;
10516         int slot;
10517         int ret = 0;
10518
10519         if (!len) {
10520                 key.objectid = bytenr;
10521                 key.type = BTRFS_EXTENT_ITEM_KEY;
10522                 key.offset = (u64)-1;
10523
10524                 btrfs_init_path(&path);
10525                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10526                 if (ret < 0)
10527                         goto out;
10528                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10529                 if (ret)
10530                         goto out;
10531                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10532                 if (key.objectid != bytenr ||
10533                     key.type != BTRFS_EXTENT_ITEM_KEY)
10534                         goto out;
10535                 len = key.offset;
10536                 btrfs_release_path(&path);
10537         }
10538         key.objectid = root_id;
10539         key.type = BTRFS_ROOT_ITEM_KEY;
10540         key.offset = (u64)-1;
10541         btrfs_init_path(&path);
10542
10543         root = btrfs_read_fs_root(fs_info, &key);
10544         if (IS_ERR(root))
10545                 goto out;
10546
10547         key.objectid = objectid;
10548         key.type = BTRFS_EXTENT_DATA_KEY;
10549         /*
10550          * It can be nasty as data backref offset is
10551          * file offset - file extent offset, which is smaller or
10552          * equal to original backref offset.  The only special case is
10553          * overflow.  So we need to special check and do further search.
10554          */
10555         key.offset = offset & (1ULL << 63) ? 0 : offset;
10556
10557         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10558         if (ret < 0)
10559                 goto out;
10560
10561         /*
10562          * Search afterwards to get correct one
10563          * NOTE: As we must do a comprehensive check on the data backref to
10564          * make sure the dref count also matches, we must iterate all file
10565          * extents for that inode.
10566          */
10567         while (1) {
10568                 leaf = path.nodes[0];
10569                 slot = path.slots[0];
10570
10571                 btrfs_item_key_to_cpu(leaf, &key, slot);
10572                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10573                         break;
10574                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10575                 /*
10576                  * Except normal disk bytenr and disk num bytes, we still
10577                  * need to do extra check on dbackref offset as
10578                  * dbackref offset = file_offset - file_extent_offset
10579                  */
10580                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10581                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10582                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10583                     offset)
10584                         found_count++;
10585
10586                 ret = btrfs_next_item(root, &path);
10587                 if (ret)
10588                         break;
10589         }
10590 out:
10591         btrfs_release_path(&path);
10592         if (found_count != count) {
10593                 error(
10594 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10595                         bytenr, len, root_id, objectid, offset, count, found_count);
10596                 return REFERENCER_MISSING;
10597         }
10598         return 0;
10599 }
10600
10601 /*
10602  * Check if the referencer of a shared data backref exists
10603  */
10604 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10605                                      u64 parent, u64 bytenr)
10606 {
10607         struct extent_buffer *eb;
10608         struct btrfs_key key;
10609         struct btrfs_file_extent_item *fi;
10610         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10611         u32 nr;
10612         int found_parent = 0;
10613         int i;
10614
10615         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10616         if (!extent_buffer_uptodate(eb))
10617                 goto out;
10618
10619         nr = btrfs_header_nritems(eb);
10620         for (i = 0; i < nr; i++) {
10621                 btrfs_item_key_to_cpu(eb, &key, i);
10622                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10623                         continue;
10624
10625                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10626                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10627                         continue;
10628
10629                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10630                         found_parent = 1;
10631                         break;
10632                 }
10633         }
10634
10635 out:
10636         free_extent_buffer(eb);
10637         if (!found_parent) {
10638                 error("shared extent %llu referencer lost (parent: %llu)",
10639                         bytenr, parent);
10640                 return REFERENCER_MISSING;
10641         }
10642         return 0;
10643 }
10644
10645 /*
10646  * This function will check a given extent item, including its backref and
10647  * itself (like crossing stripe boundary and type)
10648  *
10649  * Since we don't use extent_record anymore, introduce new error bit
10650  */
10651 static int check_extent_item(struct btrfs_fs_info *fs_info,
10652                              struct extent_buffer *eb, int slot)
10653 {
10654         struct btrfs_extent_item *ei;
10655         struct btrfs_extent_inline_ref *iref;
10656         struct btrfs_extent_data_ref *dref;
10657         unsigned long end;
10658         unsigned long ptr;
10659         int type;
10660         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10661         u32 item_size = btrfs_item_size_nr(eb, slot);
10662         u64 flags;
10663         u64 offset;
10664         int metadata = 0;
10665         int level;
10666         struct btrfs_key key;
10667         int ret;
10668         int err = 0;
10669
10670         btrfs_item_key_to_cpu(eb, &key, slot);
10671         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10672                 bytes_used += key.offset;
10673         else
10674                 bytes_used += nodesize;
10675
10676         if (item_size < sizeof(*ei)) {
10677                 /*
10678                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10679                  * old thing when on disk format is still un-determined.
10680                  * No need to care about it anymore
10681                  */
10682                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10683                 return -ENOTTY;
10684         }
10685
10686         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10687         flags = btrfs_extent_flags(eb, ei);
10688
10689         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10690                 metadata = 1;
10691         if (metadata && check_crossing_stripes(global_info, key.objectid,
10692                                                eb->len)) {
10693                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10694                       key.objectid, key.objectid + nodesize);
10695                 err |= CROSSING_STRIPE_BOUNDARY;
10696         }
10697
10698         ptr = (unsigned long)(ei + 1);
10699
10700         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10701                 /* Old EXTENT_ITEM metadata */
10702                 struct btrfs_tree_block_info *info;
10703
10704                 info = (struct btrfs_tree_block_info *)ptr;
10705                 level = btrfs_tree_block_level(eb, info);
10706                 ptr += sizeof(struct btrfs_tree_block_info);
10707         } else {
10708                 /* New METADATA_ITEM */
10709                 level = key.offset;
10710         }
10711         end = (unsigned long)ei + item_size;
10712
10713         if (ptr >= end) {
10714                 err |= ITEM_SIZE_MISMATCH;
10715                 goto out;
10716         }
10717
10718         /* Now check every backref in this extent item */
10719 next:
10720         iref = (struct btrfs_extent_inline_ref *)ptr;
10721         type = btrfs_extent_inline_ref_type(eb, iref);
10722         offset = btrfs_extent_inline_ref_offset(eb, iref);
10723         switch (type) {
10724         case BTRFS_TREE_BLOCK_REF_KEY:
10725                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10726                                                level);
10727                 err |= ret;
10728                 break;
10729         case BTRFS_SHARED_BLOCK_REF_KEY:
10730                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10731                                                  level);
10732                 err |= ret;
10733                 break;
10734         case BTRFS_EXTENT_DATA_REF_KEY:
10735                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10736                 ret = check_extent_data_backref(fs_info,
10737                                 btrfs_extent_data_ref_root(eb, dref),
10738                                 btrfs_extent_data_ref_objectid(eb, dref),
10739                                 btrfs_extent_data_ref_offset(eb, dref),
10740                                 key.objectid, key.offset,
10741                                 btrfs_extent_data_ref_count(eb, dref));
10742                 err |= ret;
10743                 break;
10744         case BTRFS_SHARED_DATA_REF_KEY:
10745                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10746                 err |= ret;
10747                 break;
10748         default:
10749                 error("extent[%llu %d %llu] has unknown ref type: %d",
10750                         key.objectid, key.type, key.offset, type);
10751                 err |= UNKNOWN_TYPE;
10752                 goto out;
10753         }
10754
10755         ptr += btrfs_extent_inline_ref_size(type);
10756         if (ptr < end)
10757                 goto next;
10758
10759 out:
10760         return err;
10761 }
10762
10763 /*
10764  * Check if a dev extent item is referred correctly by its chunk
10765  */
10766 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10767                                  struct extent_buffer *eb, int slot)
10768 {
10769         struct btrfs_root *chunk_root = fs_info->chunk_root;
10770         struct btrfs_dev_extent *ptr;
10771         struct btrfs_path path;
10772         struct btrfs_key chunk_key;
10773         struct btrfs_key devext_key;
10774         struct btrfs_chunk *chunk;
10775         struct extent_buffer *l;
10776         int num_stripes;
10777         u64 length;
10778         int i;
10779         int found_chunk = 0;
10780         int ret;
10781
10782         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10783         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10784         length = btrfs_dev_extent_length(eb, ptr);
10785
10786         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10787         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10788         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10789
10790         btrfs_init_path(&path);
10791         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10792         if (ret)
10793                 goto out;
10794
10795         l = path.nodes[0];
10796         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10797         if (btrfs_chunk_length(l, chunk) != length)
10798                 goto out;
10799
10800         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10801         for (i = 0; i < num_stripes; i++) {
10802                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10803                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10804
10805                 if (devid == devext_key.objectid &&
10806                     offset == devext_key.offset) {
10807                         found_chunk = 1;
10808                         break;
10809                 }
10810         }
10811 out:
10812         btrfs_release_path(&path);
10813         if (!found_chunk) {
10814                 error(
10815                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10816                         devext_key.objectid, devext_key.offset, length);
10817                 return REFERENCER_MISSING;
10818         }
10819         return 0;
10820 }
10821
10822 /*
10823  * Check if the used space is correct with the dev item
10824  */
10825 static int check_dev_item(struct btrfs_fs_info *fs_info,
10826                           struct extent_buffer *eb, int slot)
10827 {
10828         struct btrfs_root *dev_root = fs_info->dev_root;
10829         struct btrfs_dev_item *dev_item;
10830         struct btrfs_path path;
10831         struct btrfs_key key;
10832         struct btrfs_dev_extent *ptr;
10833         u64 dev_id;
10834         u64 used;
10835         u64 total = 0;
10836         int ret;
10837
10838         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10839         dev_id = btrfs_device_id(eb, dev_item);
10840         used = btrfs_device_bytes_used(eb, dev_item);
10841
10842         key.objectid = dev_id;
10843         key.type = BTRFS_DEV_EXTENT_KEY;
10844         key.offset = 0;
10845
10846         btrfs_init_path(&path);
10847         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10848         if (ret < 0) {
10849                 btrfs_item_key_to_cpu(eb, &key, slot);
10850                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10851                         key.objectid, key.type, key.offset);
10852                 btrfs_release_path(&path);
10853                 return REFERENCER_MISSING;
10854         }
10855
10856         /* Iterate dev_extents to calculate the used space of a device */
10857         while (1) {
10858                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10859
10860                 if (key.objectid > dev_id)
10861                         break;
10862                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10863                         goto next;
10864
10865                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10866                                      struct btrfs_dev_extent);
10867                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10868 next:
10869                 ret = btrfs_next_item(dev_root, &path);
10870                 if (ret)
10871                         break;
10872         }
10873         btrfs_release_path(&path);
10874
10875         if (used != total) {
10876                 btrfs_item_key_to_cpu(eb, &key, slot);
10877                 error(
10878 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10879                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10880                         BTRFS_DEV_EXTENT_KEY, dev_id);
10881                 return ACCOUNTING_MISMATCH;
10882         }
10883         return 0;
10884 }
10885
10886 /*
10887  * Check a block group item with its referener (chunk) and its used space
10888  * with extent/metadata item
10889  */
10890 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10891                                   struct extent_buffer *eb, int slot)
10892 {
10893         struct btrfs_root *extent_root = fs_info->extent_root;
10894         struct btrfs_root *chunk_root = fs_info->chunk_root;
10895         struct btrfs_block_group_item *bi;
10896         struct btrfs_block_group_item bg_item;
10897         struct btrfs_path path;
10898         struct btrfs_key bg_key;
10899         struct btrfs_key chunk_key;
10900         struct btrfs_key extent_key;
10901         struct btrfs_chunk *chunk;
10902         struct extent_buffer *leaf;
10903         struct btrfs_extent_item *ei;
10904         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10905         u64 flags;
10906         u64 bg_flags;
10907         u64 used;
10908         u64 total = 0;
10909         int ret;
10910         int err = 0;
10911
10912         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10913         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10914         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10915         used = btrfs_block_group_used(&bg_item);
10916         bg_flags = btrfs_block_group_flags(&bg_item);
10917
10918         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10919         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10920         chunk_key.offset = bg_key.objectid;
10921
10922         btrfs_init_path(&path);
10923         /* Search for the referencer chunk */
10924         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10925         if (ret) {
10926                 error(
10927                 "block group[%llu %llu] did not find the related chunk item",
10928                         bg_key.objectid, bg_key.offset);
10929                 err |= REFERENCER_MISSING;
10930         } else {
10931                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10932                                         struct btrfs_chunk);
10933                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10934                                                 bg_key.offset) {
10935                         error(
10936         "block group[%llu %llu] related chunk item length does not match",
10937                                 bg_key.objectid, bg_key.offset);
10938                         err |= REFERENCER_MISMATCH;
10939                 }
10940         }
10941         btrfs_release_path(&path);
10942
10943         /* Search from the block group bytenr */
10944         extent_key.objectid = bg_key.objectid;
10945         extent_key.type = 0;
10946         extent_key.offset = 0;
10947
10948         btrfs_init_path(&path);
10949         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10950         if (ret < 0)
10951                 goto out;
10952
10953         /* Iterate extent tree to account used space */
10954         while (1) {
10955                 leaf = path.nodes[0];
10956                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10957                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10958                         break;
10959
10960                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10961                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10962                         goto next;
10963                 if (extent_key.objectid < bg_key.objectid)
10964                         goto next;
10965
10966                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10967                         total += nodesize;
10968                 else
10969                         total += extent_key.offset;
10970
10971                 ei = btrfs_item_ptr(leaf, path.slots[0],
10972                                     struct btrfs_extent_item);
10973                 flags = btrfs_extent_flags(leaf, ei);
10974                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10975                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10976                                 error(
10977                         "bad extent[%llu, %llu) type mismatch with chunk",
10978                                         extent_key.objectid,
10979                                         extent_key.objectid + extent_key.offset);
10980                                 err |= CHUNK_TYPE_MISMATCH;
10981                         }
10982                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10983                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10984                                     BTRFS_BLOCK_GROUP_METADATA))) {
10985                                 error(
10986                         "bad extent[%llu, %llu) type mismatch with chunk",
10987                                         extent_key.objectid,
10988                                         extent_key.objectid + nodesize);
10989                                 err |= CHUNK_TYPE_MISMATCH;
10990                         }
10991                 }
10992 next:
10993                 ret = btrfs_next_item(extent_root, &path);
10994                 if (ret)
10995                         break;
10996         }
10997
10998 out:
10999         btrfs_release_path(&path);
11000
11001         if (total != used) {
11002                 error(
11003                 "block group[%llu %llu] used %llu but extent items used %llu",
11004                         bg_key.objectid, bg_key.offset, used, total);
11005                 err |= ACCOUNTING_MISMATCH;
11006         }
11007         return err;
11008 }
11009
11010 /*
11011  * Check a chunk item.
11012  * Including checking all referred dev_extents and block group
11013  */
11014 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11015                             struct extent_buffer *eb, int slot)
11016 {
11017         struct btrfs_root *extent_root = fs_info->extent_root;
11018         struct btrfs_root *dev_root = fs_info->dev_root;
11019         struct btrfs_path path;
11020         struct btrfs_key chunk_key;
11021         struct btrfs_key bg_key;
11022         struct btrfs_key devext_key;
11023         struct btrfs_chunk *chunk;
11024         struct extent_buffer *leaf;
11025         struct btrfs_block_group_item *bi;
11026         struct btrfs_block_group_item bg_item;
11027         struct btrfs_dev_extent *ptr;
11028         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11029         u64 length;
11030         u64 chunk_end;
11031         u64 type;
11032         u64 profile;
11033         int num_stripes;
11034         u64 offset;
11035         u64 objectid;
11036         int i;
11037         int ret;
11038         int err = 0;
11039
11040         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11041         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11042         length = btrfs_chunk_length(eb, chunk);
11043         chunk_end = chunk_key.offset + length;
11044         if (!IS_ALIGNED(length, sectorsize)) {
11045                 error("chunk[%llu %llu) not aligned to %u",
11046                         chunk_key.offset, chunk_end, sectorsize);
11047                 err |= BYTES_UNALIGNED;
11048                 goto out;
11049         }
11050
11051         type = btrfs_chunk_type(eb, chunk);
11052         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11053         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11054                 error("chunk[%llu %llu) has no chunk type",
11055                         chunk_key.offset, chunk_end);
11056                 err |= UNKNOWN_TYPE;
11057         }
11058         if (profile && (profile & (profile - 1))) {
11059                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11060                         chunk_key.offset, chunk_end, profile);
11061                 err |= UNKNOWN_TYPE;
11062         }
11063
11064         bg_key.objectid = chunk_key.offset;
11065         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11066         bg_key.offset = length;
11067
11068         btrfs_init_path(&path);
11069         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11070         if (ret) {
11071                 error(
11072                 "chunk[%llu %llu) did not find the related block group item",
11073                         chunk_key.offset, chunk_end);
11074                 err |= REFERENCER_MISSING;
11075         } else{
11076                 leaf = path.nodes[0];
11077                 bi = btrfs_item_ptr(leaf, path.slots[0],
11078                                     struct btrfs_block_group_item);
11079                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11080                                    sizeof(bg_item));
11081                 if (btrfs_block_group_flags(&bg_item) != type) {
11082                         error(
11083 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11084                                 chunk_key.offset, chunk_end, type,
11085                                 btrfs_block_group_flags(&bg_item));
11086                         err |= REFERENCER_MISSING;
11087                 }
11088         }
11089
11090         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11091         for (i = 0; i < num_stripes; i++) {
11092                 btrfs_release_path(&path);
11093                 btrfs_init_path(&path);
11094                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11095                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11096                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11097
11098                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11099                                         0, 0);
11100                 if (ret)
11101                         goto not_match_dev;
11102
11103                 leaf = path.nodes[0];
11104                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11105                                      struct btrfs_dev_extent);
11106                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11107                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11108                 if (objectid != chunk_key.objectid ||
11109                     offset != chunk_key.offset ||
11110                     btrfs_dev_extent_length(leaf, ptr) != length)
11111                         goto not_match_dev;
11112                 continue;
11113 not_match_dev:
11114                 err |= BACKREF_MISSING;
11115                 error(
11116                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11117                         chunk_key.objectid, chunk_end, i);
11118                 continue;
11119         }
11120         btrfs_release_path(&path);
11121 out:
11122         return err;
11123 }
11124
11125 /*
11126  * Main entry function to check known items and update related accounting info
11127  */
11128 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11129 {
11130         struct btrfs_fs_info *fs_info = root->fs_info;
11131         struct btrfs_key key;
11132         int slot = 0;
11133         int type;
11134         struct btrfs_extent_data_ref *dref;
11135         int ret;
11136         int err = 0;
11137
11138 next:
11139         btrfs_item_key_to_cpu(eb, &key, slot);
11140         type = key.type;
11141
11142         switch (type) {
11143         case BTRFS_EXTENT_DATA_KEY:
11144                 ret = check_extent_data_item(root, eb, slot);
11145                 err |= ret;
11146                 break;
11147         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11148                 ret = check_block_group_item(fs_info, eb, slot);
11149                 err |= ret;
11150                 break;
11151         case BTRFS_DEV_ITEM_KEY:
11152                 ret = check_dev_item(fs_info, eb, slot);
11153                 err |= ret;
11154                 break;
11155         case BTRFS_CHUNK_ITEM_KEY:
11156                 ret = check_chunk_item(fs_info, eb, slot);
11157                 err |= ret;
11158                 break;
11159         case BTRFS_DEV_EXTENT_KEY:
11160                 ret = check_dev_extent_item(fs_info, eb, slot);
11161                 err |= ret;
11162                 break;
11163         case BTRFS_EXTENT_ITEM_KEY:
11164         case BTRFS_METADATA_ITEM_KEY:
11165                 ret = check_extent_item(fs_info, eb, slot);
11166                 err |= ret;
11167                 break;
11168         case BTRFS_EXTENT_CSUM_KEY:
11169                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11170                 break;
11171         case BTRFS_TREE_BLOCK_REF_KEY:
11172                 ret = check_tree_block_backref(fs_info, key.offset,
11173                                                key.objectid, -1);
11174                 err |= ret;
11175                 break;
11176         case BTRFS_EXTENT_DATA_REF_KEY:
11177                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11178                 ret = check_extent_data_backref(fs_info,
11179                                 btrfs_extent_data_ref_root(eb, dref),
11180                                 btrfs_extent_data_ref_objectid(eb, dref),
11181                                 btrfs_extent_data_ref_offset(eb, dref),
11182                                 key.objectid, 0,
11183                                 btrfs_extent_data_ref_count(eb, dref));
11184                 err |= ret;
11185                 break;
11186         case BTRFS_SHARED_BLOCK_REF_KEY:
11187                 ret = check_shared_block_backref(fs_info, key.offset,
11188                                                  key.objectid, -1);
11189                 err |= ret;
11190                 break;
11191         case BTRFS_SHARED_DATA_REF_KEY:
11192                 ret = check_shared_data_backref(fs_info, key.offset,
11193                                                 key.objectid);
11194                 err |= ret;
11195                 break;
11196         default:
11197                 break;
11198         }
11199
11200         if (++slot < btrfs_header_nritems(eb))
11201                 goto next;
11202
11203         return err;
11204 }
11205
11206 /*
11207  * Helper function for later fs/subvol tree check.  To determine if a tree
11208  * block should be checked.
11209  * This function will ensure only the direct referencer with lowest rootid to
11210  * check a fs/subvolume tree block.
11211  *
11212  * Backref check at extent tree would detect errors like missing subvolume
11213  * tree, so we can do aggressive check to reduce duplicated checks.
11214  */
11215 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11216 {
11217         struct btrfs_root *extent_root = root->fs_info->extent_root;
11218         struct btrfs_key key;
11219         struct btrfs_path path;
11220         struct extent_buffer *leaf;
11221         int slot;
11222         struct btrfs_extent_item *ei;
11223         unsigned long ptr;
11224         unsigned long end;
11225         int type;
11226         u32 item_size;
11227         u64 offset;
11228         struct btrfs_extent_inline_ref *iref;
11229         int ret;
11230
11231         btrfs_init_path(&path);
11232         key.objectid = btrfs_header_bytenr(eb);
11233         key.type = BTRFS_METADATA_ITEM_KEY;
11234         key.offset = (u64)-1;
11235
11236         /*
11237          * Any failure in backref resolving means we can't determine
11238          * whom the tree block belongs to.
11239          * So in that case, we need to check that tree block
11240          */
11241         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11242         if (ret < 0)
11243                 goto need_check;
11244
11245         ret = btrfs_previous_extent_item(extent_root, &path,
11246                                          btrfs_header_bytenr(eb));
11247         if (ret)
11248                 goto need_check;
11249
11250         leaf = path.nodes[0];
11251         slot = path.slots[0];
11252         btrfs_item_key_to_cpu(leaf, &key, slot);
11253         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11254
11255         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11256                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11257         } else {
11258                 struct btrfs_tree_block_info *info;
11259
11260                 info = (struct btrfs_tree_block_info *)(ei + 1);
11261                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11262         }
11263
11264         item_size = btrfs_item_size_nr(leaf, slot);
11265         ptr = (unsigned long)iref;
11266         end = (unsigned long)ei + item_size;
11267         while (ptr < end) {
11268                 iref = (struct btrfs_extent_inline_ref *)ptr;
11269                 type = btrfs_extent_inline_ref_type(leaf, iref);
11270                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11271
11272                 /*
11273                  * We only check the tree block if current root is
11274                  * the lowest referencer of it.
11275                  */
11276                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11277                     offset < root->objectid) {
11278                         btrfs_release_path(&path);
11279                         return 0;
11280                 }
11281
11282                 ptr += btrfs_extent_inline_ref_size(type);
11283         }
11284         /*
11285          * Normally we should also check keyed tree block ref, but that may be
11286          * very time consuming.  Inlined ref should already make us skip a lot
11287          * of refs now.  So skip search keyed tree block ref.
11288          */
11289
11290 need_check:
11291         btrfs_release_path(&path);
11292         return 1;
11293 }
11294
11295 /*
11296  * Traversal function for tree block. We will do:
11297  * 1) Skip shared fs/subvolume tree blocks
11298  * 2) Update related bytes accounting
11299  * 3) Pre-order traversal
11300  */
11301 static int traverse_tree_block(struct btrfs_root *root,
11302                                 struct extent_buffer *node)
11303 {
11304         struct extent_buffer *eb;
11305         struct btrfs_key key;
11306         struct btrfs_key drop_key;
11307         int level;
11308         u64 nr;
11309         int i;
11310         int err = 0;
11311         int ret;
11312
11313         /*
11314          * Skip shared fs/subvolume tree block, in that case they will
11315          * be checked by referencer with lowest rootid
11316          */
11317         if (is_fstree(root->objectid) && !should_check(root, node))
11318                 return 0;
11319
11320         /* Update bytes accounting */
11321         total_btree_bytes += node->len;
11322         if (fs_root_objectid(btrfs_header_owner(node)))
11323                 total_fs_tree_bytes += node->len;
11324         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11325                 total_extent_tree_bytes += node->len;
11326         if (!found_old_backref &&
11327             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11328             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11329             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11330                 found_old_backref = 1;
11331
11332         /* pre-order tranversal, check itself first */
11333         level = btrfs_header_level(node);
11334         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11335                                    btrfs_header_level(node),
11336                                    btrfs_header_owner(node));
11337         err |= ret;
11338         if (err)
11339                 error(
11340         "check %s failed root %llu bytenr %llu level %d, force continue check",
11341                         level ? "node":"leaf", root->objectid,
11342                         btrfs_header_bytenr(node), btrfs_header_level(node));
11343
11344         if (!level) {
11345                 btree_space_waste += btrfs_leaf_free_space(root, node);
11346                 ret = check_leaf_items(root, node);
11347                 err |= ret;
11348                 return err;
11349         }
11350
11351         nr = btrfs_header_nritems(node);
11352         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11353         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11354                 sizeof(struct btrfs_key_ptr);
11355
11356         /* Then check all its children */
11357         for (i = 0; i < nr; i++) {
11358                 u64 blocknr = btrfs_node_blockptr(node, i);
11359
11360                 btrfs_node_key_to_cpu(node, &key, i);
11361                 if (level == root->root_item.drop_level &&
11362                     is_dropped_key(&key, &drop_key))
11363                         continue;
11364
11365                 /*
11366                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11367                  * to call the function itself.
11368                  */
11369                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11370                 if (extent_buffer_uptodate(eb)) {
11371                         ret = traverse_tree_block(root, eb);
11372                         err |= ret;
11373                 }
11374                 free_extent_buffer(eb);
11375         }
11376
11377         return err;
11378 }
11379
11380 /*
11381  * Low memory usage version check_chunks_and_extents.
11382  */
11383 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11384 {
11385         struct btrfs_path path;
11386         struct btrfs_key key;
11387         struct btrfs_root *root1;
11388         struct btrfs_root *cur_root;
11389         int err = 0;
11390         int ret;
11391
11392         root1 = root->fs_info->chunk_root;
11393         ret = traverse_tree_block(root1, root1->node);
11394         err |= ret;
11395
11396         root1 = root->fs_info->tree_root;
11397         ret = traverse_tree_block(root1, root1->node);
11398         err |= ret;
11399
11400         btrfs_init_path(&path);
11401         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11402         key.offset = 0;
11403         key.type = BTRFS_ROOT_ITEM_KEY;
11404
11405         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11406         if (ret) {
11407                 error("cannot find extent treet in tree_root");
11408                 goto out;
11409         }
11410
11411         while (1) {
11412                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11413                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11414                         goto next;
11415                 key.offset = (u64)-1;
11416
11417                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11418                 if (IS_ERR(cur_root) || !cur_root) {
11419                         error("failed to read tree: %lld", key.objectid);
11420                         goto next;
11421                 }
11422
11423                 ret = traverse_tree_block(cur_root, cur_root->node);
11424                 err |= ret;
11425
11426 next:
11427                 ret = btrfs_next_item(root1, &path);
11428                 if (ret)
11429                         goto out;
11430         }
11431
11432 out:
11433         btrfs_release_path(&path);
11434         return err;
11435 }
11436
11437 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11438                            struct btrfs_root *root, int overwrite)
11439 {
11440         struct extent_buffer *c;
11441         struct extent_buffer *old = root->node;
11442         int level;
11443         int ret;
11444         struct btrfs_disk_key disk_key = {0,0,0};
11445
11446         level = 0;
11447
11448         if (overwrite) {
11449                 c = old;
11450                 extent_buffer_get(c);
11451                 goto init;
11452         }
11453         c = btrfs_alloc_free_block(trans, root,
11454                                    root->nodesize,
11455                                    root->root_key.objectid,
11456                                    &disk_key, level, 0, 0);
11457         if (IS_ERR(c)) {
11458                 c = old;
11459                 extent_buffer_get(c);
11460                 overwrite = 1;
11461         }
11462 init:
11463         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11464         btrfs_set_header_level(c, level);
11465         btrfs_set_header_bytenr(c, c->start);
11466         btrfs_set_header_generation(c, trans->transid);
11467         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11468         btrfs_set_header_owner(c, root->root_key.objectid);
11469
11470         write_extent_buffer(c, root->fs_info->fsid,
11471                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11472
11473         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11474                             btrfs_header_chunk_tree_uuid(c),
11475                             BTRFS_UUID_SIZE);
11476
11477         btrfs_mark_buffer_dirty(c);
11478         /*
11479          * this case can happen in the following case:
11480          *
11481          * 1.overwrite previous root.
11482          *
11483          * 2.reinit reloc data root, this is because we skip pin
11484          * down reloc data tree before which means we can allocate
11485          * same block bytenr here.
11486          */
11487         if (old->start == c->start) {
11488                 btrfs_set_root_generation(&root->root_item,
11489                                           trans->transid);
11490                 root->root_item.level = btrfs_header_level(root->node);
11491                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11492                                         &root->root_key, &root->root_item);
11493                 if (ret) {
11494                         free_extent_buffer(c);
11495                         return ret;
11496                 }
11497         }
11498         free_extent_buffer(old);
11499         root->node = c;
11500         add_root_to_dirty_list(root);
11501         return 0;
11502 }
11503
11504 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11505                                 struct extent_buffer *eb, int tree_root)
11506 {
11507         struct extent_buffer *tmp;
11508         struct btrfs_root_item *ri;
11509         struct btrfs_key key;
11510         u64 bytenr;
11511         u32 nodesize;
11512         int level = btrfs_header_level(eb);
11513         int nritems;
11514         int ret;
11515         int i;
11516
11517         /*
11518          * If we have pinned this block before, don't pin it again.
11519          * This can not only avoid forever loop with broken filesystem
11520          * but also give us some speedups.
11521          */
11522         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11523                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11524                 return 0;
11525
11526         btrfs_pin_extent(fs_info, eb->start, eb->len);
11527
11528         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11529         nritems = btrfs_header_nritems(eb);
11530         for (i = 0; i < nritems; i++) {
11531                 if (level == 0) {
11532                         btrfs_item_key_to_cpu(eb, &key, i);
11533                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11534                                 continue;
11535                         /* Skip the extent root and reloc roots */
11536                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11537                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11538                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11539                                 continue;
11540                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11541                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11542
11543                         /*
11544                          * If at any point we start needing the real root we
11545                          * will have to build a stump root for the root we are
11546                          * in, but for now this doesn't actually use the root so
11547                          * just pass in extent_root.
11548                          */
11549                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11550                                               nodesize, 0);
11551                         if (!extent_buffer_uptodate(tmp)) {
11552                                 fprintf(stderr, "Error reading root block\n");
11553                                 return -EIO;
11554                         }
11555                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11556                         free_extent_buffer(tmp);
11557                         if (ret)
11558                                 return ret;
11559                 } else {
11560                         bytenr = btrfs_node_blockptr(eb, i);
11561
11562                         /* If we aren't the tree root don't read the block */
11563                         if (level == 1 && !tree_root) {
11564                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11565                                 continue;
11566                         }
11567
11568                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11569                                               nodesize, 0);
11570                         if (!extent_buffer_uptodate(tmp)) {
11571                                 fprintf(stderr, "Error reading tree block\n");
11572                                 return -EIO;
11573                         }
11574                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11575                         free_extent_buffer(tmp);
11576                         if (ret)
11577                                 return ret;
11578                 }
11579         }
11580
11581         return 0;
11582 }
11583
11584 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11585 {
11586         int ret;
11587
11588         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11589         if (ret)
11590                 return ret;
11591
11592         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11593 }
11594
11595 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11596 {
11597         struct btrfs_block_group_cache *cache;
11598         struct btrfs_path path;
11599         struct extent_buffer *leaf;
11600         struct btrfs_chunk *chunk;
11601         struct btrfs_key key;
11602         int ret;
11603         u64 start;
11604
11605         btrfs_init_path(&path);
11606         key.objectid = 0;
11607         key.type = BTRFS_CHUNK_ITEM_KEY;
11608         key.offset = 0;
11609         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11610         if (ret < 0) {
11611                 btrfs_release_path(&path);
11612                 return ret;
11613         }
11614
11615         /*
11616          * We do this in case the block groups were screwed up and had alloc
11617          * bits that aren't actually set on the chunks.  This happens with
11618          * restored images every time and could happen in real life I guess.
11619          */
11620         fs_info->avail_data_alloc_bits = 0;
11621         fs_info->avail_metadata_alloc_bits = 0;
11622         fs_info->avail_system_alloc_bits = 0;
11623
11624         /* First we need to create the in-memory block groups */
11625         while (1) {
11626                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11627                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11628                         if (ret < 0) {
11629                                 btrfs_release_path(&path);
11630                                 return ret;
11631                         }
11632                         if (ret) {
11633                                 ret = 0;
11634                                 break;
11635                         }
11636                 }
11637                 leaf = path.nodes[0];
11638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11639                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11640                         path.slots[0]++;
11641                         continue;
11642                 }
11643
11644                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11645                 btrfs_add_block_group(fs_info, 0,
11646                                       btrfs_chunk_type(leaf, chunk),
11647                                       key.objectid, key.offset,
11648                                       btrfs_chunk_length(leaf, chunk));
11649                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11650                                  key.offset + btrfs_chunk_length(leaf, chunk),
11651                                  GFP_NOFS);
11652                 path.slots[0]++;
11653         }
11654         start = 0;
11655         while (1) {
11656                 cache = btrfs_lookup_first_block_group(fs_info, start);
11657                 if (!cache)
11658                         break;
11659                 cache->cached = 1;
11660                 start = cache->key.objectid + cache->key.offset;
11661         }
11662
11663         btrfs_release_path(&path);
11664         return 0;
11665 }
11666
11667 static int reset_balance(struct btrfs_trans_handle *trans,
11668                          struct btrfs_fs_info *fs_info)
11669 {
11670         struct btrfs_root *root = fs_info->tree_root;
11671         struct btrfs_path path;
11672         struct extent_buffer *leaf;
11673         struct btrfs_key key;
11674         int del_slot, del_nr = 0;
11675         int ret;
11676         int found = 0;
11677
11678         btrfs_init_path(&path);
11679         key.objectid = BTRFS_BALANCE_OBJECTID;
11680         key.type = BTRFS_BALANCE_ITEM_KEY;
11681         key.offset = 0;
11682         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11683         if (ret) {
11684                 if (ret > 0)
11685                         ret = 0;
11686                 if (!ret)
11687                         goto reinit_data_reloc;
11688                 else
11689                         goto out;
11690         }
11691
11692         ret = btrfs_del_item(trans, root, &path);
11693         if (ret)
11694                 goto out;
11695         btrfs_release_path(&path);
11696
11697         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11698         key.type = BTRFS_ROOT_ITEM_KEY;
11699         key.offset = 0;
11700         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11701         if (ret < 0)
11702                 goto out;
11703         while (1) {
11704                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11705                         if (!found)
11706                                 break;
11707
11708                         if (del_nr) {
11709                                 ret = btrfs_del_items(trans, root, &path,
11710                                                       del_slot, del_nr);
11711                                 del_nr = 0;
11712                                 if (ret)
11713                                         goto out;
11714                         }
11715                         key.offset++;
11716                         btrfs_release_path(&path);
11717
11718                         found = 0;
11719                         ret = btrfs_search_slot(trans, root, &key, &path,
11720                                                 -1, 1);
11721                         if (ret < 0)
11722                                 goto out;
11723                         continue;
11724                 }
11725                 found = 1;
11726                 leaf = path.nodes[0];
11727                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11728                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11729                         break;
11730                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11731                         path.slots[0]++;
11732                         continue;
11733                 }
11734                 if (!del_nr) {
11735                         del_slot = path.slots[0];
11736                         del_nr = 1;
11737                 } else {
11738                         del_nr++;
11739                 }
11740                 path.slots[0]++;
11741         }
11742
11743         if (del_nr) {
11744                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11745                 if (ret)
11746                         goto out;
11747         }
11748         btrfs_release_path(&path);
11749
11750 reinit_data_reloc:
11751         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11752         key.type = BTRFS_ROOT_ITEM_KEY;
11753         key.offset = (u64)-1;
11754         root = btrfs_read_fs_root(fs_info, &key);
11755         if (IS_ERR(root)) {
11756                 fprintf(stderr, "Error reading data reloc tree\n");
11757                 ret = PTR_ERR(root);
11758                 goto out;
11759         }
11760         record_root_in_trans(trans, root);
11761         ret = btrfs_fsck_reinit_root(trans, root, 0);
11762         if (ret)
11763                 goto out;
11764         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11765 out:
11766         btrfs_release_path(&path);
11767         return ret;
11768 }
11769
11770 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11771                               struct btrfs_fs_info *fs_info)
11772 {
11773         u64 start = 0;
11774         int ret;
11775
11776         /*
11777          * The only reason we don't do this is because right now we're just
11778          * walking the trees we find and pinning down their bytes, we don't look
11779          * at any of the leaves.  In order to do mixed groups we'd have to check
11780          * the leaves of any fs roots and pin down the bytes for any file
11781          * extents we find.  Not hard but why do it if we don't have to?
11782          */
11783         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11784                 fprintf(stderr, "We don't support re-initing the extent tree "
11785                         "for mixed block groups yet, please notify a btrfs "
11786                         "developer you want to do this so they can add this "
11787                         "functionality.\n");
11788                 return -EINVAL;
11789         }
11790
11791         /*
11792          * first we need to walk all of the trees except the extent tree and pin
11793          * down the bytes that are in use so we don't overwrite any existing
11794          * metadata.
11795          */
11796         ret = pin_metadata_blocks(fs_info);
11797         if (ret) {
11798                 fprintf(stderr, "error pinning down used bytes\n");
11799                 return ret;
11800         }
11801
11802         /*
11803          * Need to drop all the block groups since we're going to recreate all
11804          * of them again.
11805          */
11806         btrfs_free_block_groups(fs_info);
11807         ret = reset_block_groups(fs_info);
11808         if (ret) {
11809                 fprintf(stderr, "error resetting the block groups\n");
11810                 return ret;
11811         }
11812
11813         /* Ok we can allocate now, reinit the extent root */
11814         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11815         if (ret) {
11816                 fprintf(stderr, "extent root initialization failed\n");
11817                 /*
11818                  * When the transaction code is updated we should end the
11819                  * transaction, but for now progs only knows about commit so
11820                  * just return an error.
11821                  */
11822                 return ret;
11823         }
11824
11825         /*
11826          * Now we have all the in-memory block groups setup so we can make
11827          * allocations properly, and the metadata we care about is safe since we
11828          * pinned all of it above.
11829          */
11830         while (1) {
11831                 struct btrfs_block_group_cache *cache;
11832
11833                 cache = btrfs_lookup_first_block_group(fs_info, start);
11834                 if (!cache)
11835                         break;
11836                 start = cache->key.objectid + cache->key.offset;
11837                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11838                                         &cache->key, &cache->item,
11839                                         sizeof(cache->item));
11840                 if (ret) {
11841                         fprintf(stderr, "Error adding block group\n");
11842                         return ret;
11843                 }
11844                 btrfs_extent_post_op(trans, fs_info->extent_root);
11845         }
11846
11847         ret = reset_balance(trans, fs_info);
11848         if (ret)
11849                 fprintf(stderr, "error resetting the pending balance\n");
11850
11851         return ret;
11852 }
11853
11854 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11855 {
11856         struct btrfs_path path;
11857         struct btrfs_trans_handle *trans;
11858         struct btrfs_key key;
11859         int ret;
11860
11861         printf("Recowing metadata block %llu\n", eb->start);
11862         key.objectid = btrfs_header_owner(eb);
11863         key.type = BTRFS_ROOT_ITEM_KEY;
11864         key.offset = (u64)-1;
11865
11866         root = btrfs_read_fs_root(root->fs_info, &key);
11867         if (IS_ERR(root)) {
11868                 fprintf(stderr, "Couldn't find owner root %llu\n",
11869                         key.objectid);
11870                 return PTR_ERR(root);
11871         }
11872
11873         trans = btrfs_start_transaction(root, 1);
11874         if (IS_ERR(trans))
11875                 return PTR_ERR(trans);
11876
11877         btrfs_init_path(&path);
11878         path.lowest_level = btrfs_header_level(eb);
11879         if (path.lowest_level)
11880                 btrfs_node_key_to_cpu(eb, &key, 0);
11881         else
11882                 btrfs_item_key_to_cpu(eb, &key, 0);
11883
11884         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11885         btrfs_commit_transaction(trans, root);
11886         btrfs_release_path(&path);
11887         return ret;
11888 }
11889
11890 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11891 {
11892         struct btrfs_path path;
11893         struct btrfs_trans_handle *trans;
11894         struct btrfs_key key;
11895         int ret;
11896
11897         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11898                bad->key.type, bad->key.offset);
11899         key.objectid = bad->root_id;
11900         key.type = BTRFS_ROOT_ITEM_KEY;
11901         key.offset = (u64)-1;
11902
11903         root = btrfs_read_fs_root(root->fs_info, &key);
11904         if (IS_ERR(root)) {
11905                 fprintf(stderr, "Couldn't find owner root %llu\n",
11906                         key.objectid);
11907                 return PTR_ERR(root);
11908         }
11909
11910         trans = btrfs_start_transaction(root, 1);
11911         if (IS_ERR(trans))
11912                 return PTR_ERR(trans);
11913
11914         btrfs_init_path(&path);
11915         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11916         if (ret) {
11917                 if (ret > 0)
11918                         ret = 0;
11919                 goto out;
11920         }
11921         ret = btrfs_del_item(trans, root, &path);
11922 out:
11923         btrfs_commit_transaction(trans, root);
11924         btrfs_release_path(&path);
11925         return ret;
11926 }
11927
11928 static int zero_log_tree(struct btrfs_root *root)
11929 {
11930         struct btrfs_trans_handle *trans;
11931         int ret;
11932
11933         trans = btrfs_start_transaction(root, 1);
11934         if (IS_ERR(trans)) {
11935                 ret = PTR_ERR(trans);
11936                 return ret;
11937         }
11938         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11939         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11940         ret = btrfs_commit_transaction(trans, root);
11941         return ret;
11942 }
11943
11944 static int populate_csum(struct btrfs_trans_handle *trans,
11945                          struct btrfs_root *csum_root, char *buf, u64 start,
11946                          u64 len)
11947 {
11948         u64 offset = 0;
11949         u64 sectorsize;
11950         int ret = 0;
11951
11952         while (offset < len) {
11953                 sectorsize = csum_root->sectorsize;
11954                 ret = read_extent_data(csum_root, buf, start + offset,
11955                                        &sectorsize, 0);
11956                 if (ret)
11957                         break;
11958                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11959                                             start + offset, buf, sectorsize);
11960                 if (ret)
11961                         break;
11962                 offset += sectorsize;
11963         }
11964         return ret;
11965 }
11966
11967 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11968                                       struct btrfs_root *csum_root,
11969                                       struct btrfs_root *cur_root)
11970 {
11971         struct btrfs_path path;
11972         struct btrfs_key key;
11973         struct extent_buffer *node;
11974         struct btrfs_file_extent_item *fi;
11975         char *buf = NULL;
11976         u64 start = 0;
11977         u64 len = 0;
11978         int slot = 0;
11979         int ret = 0;
11980
11981         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11982         if (!buf)
11983                 return -ENOMEM;
11984
11985         btrfs_init_path(&path);
11986         key.objectid = 0;
11987         key.offset = 0;
11988         key.type = 0;
11989         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11990         if (ret < 0)
11991                 goto out;
11992         /* Iterate all regular file extents and fill its csum */
11993         while (1) {
11994                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11995
11996                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11997                         goto next;
11998                 node = path.nodes[0];
11999                 slot = path.slots[0];
12000                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12001                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12002                         goto next;
12003                 start = btrfs_file_extent_disk_bytenr(node, fi);
12004                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12005
12006                 ret = populate_csum(trans, csum_root, buf, start, len);
12007                 if (ret == -EEXIST)
12008                         ret = 0;
12009                 if (ret < 0)
12010                         goto out;
12011 next:
12012                 /*
12013                  * TODO: if next leaf is corrupted, jump to nearest next valid
12014                  * leaf.
12015                  */
12016                 ret = btrfs_next_item(cur_root, &path);
12017                 if (ret < 0)
12018                         goto out;
12019                 if (ret > 0) {
12020                         ret = 0;
12021                         goto out;
12022                 }
12023         }
12024
12025 out:
12026         btrfs_release_path(&path);
12027         free(buf);
12028         return ret;
12029 }
12030
12031 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12032                                   struct btrfs_root *csum_root)
12033 {
12034         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12035         struct btrfs_path path;
12036         struct btrfs_root *tree_root = fs_info->tree_root;
12037         struct btrfs_root *cur_root;
12038         struct extent_buffer *node;
12039         struct btrfs_key key;
12040         int slot = 0;
12041         int ret = 0;
12042
12043         btrfs_init_path(&path);
12044         key.objectid = BTRFS_FS_TREE_OBJECTID;
12045         key.offset = 0;
12046         key.type = BTRFS_ROOT_ITEM_KEY;
12047         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12048         if (ret < 0)
12049                 goto out;
12050         if (ret > 0) {
12051                 ret = -ENOENT;
12052                 goto out;
12053         }
12054
12055         while (1) {
12056                 node = path.nodes[0];
12057                 slot = path.slots[0];
12058                 btrfs_item_key_to_cpu(node, &key, slot);
12059                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12060                         goto out;
12061                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12062                         goto next;
12063                 if (!is_fstree(key.objectid))
12064                         goto next;
12065                 key.offset = (u64)-1;
12066
12067                 cur_root = btrfs_read_fs_root(fs_info, &key);
12068                 if (IS_ERR(cur_root) || !cur_root) {
12069                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12070                                 key.objectid);
12071                         goto out;
12072                 }
12073                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12074                                 cur_root);
12075                 if (ret < 0)
12076                         goto out;
12077 next:
12078                 ret = btrfs_next_item(tree_root, &path);
12079                 if (ret > 0) {
12080                         ret = 0;
12081                         goto out;
12082                 }
12083                 if (ret < 0)
12084                         goto out;
12085         }
12086
12087 out:
12088         btrfs_release_path(&path);
12089         return ret;
12090 }
12091
12092 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12093                                       struct btrfs_root *csum_root)
12094 {
12095         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12096         struct btrfs_path path;
12097         struct btrfs_extent_item *ei;
12098         struct extent_buffer *leaf;
12099         char *buf;
12100         struct btrfs_key key;
12101         int ret;
12102
12103         btrfs_init_path(&path);
12104         key.objectid = 0;
12105         key.type = BTRFS_EXTENT_ITEM_KEY;
12106         key.offset = 0;
12107         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12108         if (ret < 0) {
12109                 btrfs_release_path(&path);
12110                 return ret;
12111         }
12112
12113         buf = malloc(csum_root->sectorsize);
12114         if (!buf) {
12115                 btrfs_release_path(&path);
12116                 return -ENOMEM;
12117         }
12118
12119         while (1) {
12120                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12121                         ret = btrfs_next_leaf(extent_root, &path);
12122                         if (ret < 0)
12123                                 break;
12124                         if (ret) {
12125                                 ret = 0;
12126                                 break;
12127                         }
12128                 }
12129                 leaf = path.nodes[0];
12130
12131                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12132                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12133                         path.slots[0]++;
12134                         continue;
12135                 }
12136
12137                 ei = btrfs_item_ptr(leaf, path.slots[0],
12138                                     struct btrfs_extent_item);
12139                 if (!(btrfs_extent_flags(leaf, ei) &
12140                       BTRFS_EXTENT_FLAG_DATA)) {
12141                         path.slots[0]++;
12142                         continue;
12143                 }
12144
12145                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12146                                     key.offset);
12147                 if (ret)
12148                         break;
12149                 path.slots[0]++;
12150         }
12151
12152         btrfs_release_path(&path);
12153         free(buf);
12154         return ret;
12155 }
12156
12157 /*
12158  * Recalculate the csum and put it into the csum tree.
12159  *
12160  * Extent tree init will wipe out all the extent info, so in that case, we
12161  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12162  * will use fs/subvol trees to init the csum tree.
12163  */
12164 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12165                           struct btrfs_root *csum_root,
12166                           int search_fs_tree)
12167 {
12168         if (search_fs_tree)
12169                 return fill_csum_tree_from_fs(trans, csum_root);
12170         else
12171                 return fill_csum_tree_from_extent(trans, csum_root);
12172 }
12173
12174 static void free_roots_info_cache(void)
12175 {
12176         if (!roots_info_cache)
12177                 return;
12178
12179         while (!cache_tree_empty(roots_info_cache)) {
12180                 struct cache_extent *entry;
12181                 struct root_item_info *rii;
12182
12183                 entry = first_cache_extent(roots_info_cache);
12184                 if (!entry)
12185                         break;
12186                 remove_cache_extent(roots_info_cache, entry);
12187                 rii = container_of(entry, struct root_item_info, cache_extent);
12188                 free(rii);
12189         }
12190
12191         free(roots_info_cache);
12192         roots_info_cache = NULL;
12193 }
12194
12195 static int build_roots_info_cache(struct btrfs_fs_info *info)
12196 {
12197         int ret = 0;
12198         struct btrfs_key key;
12199         struct extent_buffer *leaf;
12200         struct btrfs_path path;
12201
12202         if (!roots_info_cache) {
12203                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12204                 if (!roots_info_cache)
12205                         return -ENOMEM;
12206                 cache_tree_init(roots_info_cache);
12207         }
12208
12209         btrfs_init_path(&path);
12210         key.objectid = 0;
12211         key.type = BTRFS_EXTENT_ITEM_KEY;
12212         key.offset = 0;
12213         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12214         if (ret < 0)
12215                 goto out;
12216         leaf = path.nodes[0];
12217
12218         while (1) {
12219                 struct btrfs_key found_key;
12220                 struct btrfs_extent_item *ei;
12221                 struct btrfs_extent_inline_ref *iref;
12222                 int slot = path.slots[0];
12223                 int type;
12224                 u64 flags;
12225                 u64 root_id;
12226                 u8 level;
12227                 struct cache_extent *entry;
12228                 struct root_item_info *rii;
12229
12230                 if (slot >= btrfs_header_nritems(leaf)) {
12231                         ret = btrfs_next_leaf(info->extent_root, &path);
12232                         if (ret < 0) {
12233                                 break;
12234                         } else if (ret) {
12235                                 ret = 0;
12236                                 break;
12237                         }
12238                         leaf = path.nodes[0];
12239                         slot = path.slots[0];
12240                 }
12241
12242                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12243
12244                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12245                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12246                         goto next;
12247
12248                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12249                 flags = btrfs_extent_flags(leaf, ei);
12250
12251                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12252                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12253                         goto next;
12254
12255                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12256                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12257                         level = found_key.offset;
12258                 } else {
12259                         struct btrfs_tree_block_info *binfo;
12260
12261                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12262                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12263                         level = btrfs_tree_block_level(leaf, binfo);
12264                 }
12265
12266                 /*
12267                  * For a root extent, it must be of the following type and the
12268                  * first (and only one) iref in the item.
12269                  */
12270                 type = btrfs_extent_inline_ref_type(leaf, iref);
12271                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12272                         goto next;
12273
12274                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12275                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12276                 if (!entry) {
12277                         rii = malloc(sizeof(struct root_item_info));
12278                         if (!rii) {
12279                                 ret = -ENOMEM;
12280                                 goto out;
12281                         }
12282                         rii->cache_extent.start = root_id;
12283                         rii->cache_extent.size = 1;
12284                         rii->level = (u8)-1;
12285                         entry = &rii->cache_extent;
12286                         ret = insert_cache_extent(roots_info_cache, entry);
12287                         ASSERT(ret == 0);
12288                 } else {
12289                         rii = container_of(entry, struct root_item_info,
12290                                            cache_extent);
12291                 }
12292
12293                 ASSERT(rii->cache_extent.start == root_id);
12294                 ASSERT(rii->cache_extent.size == 1);
12295
12296                 if (level > rii->level || rii->level == (u8)-1) {
12297                         rii->level = level;
12298                         rii->bytenr = found_key.objectid;
12299                         rii->gen = btrfs_extent_generation(leaf, ei);
12300                         rii->node_count = 1;
12301                 } else if (level == rii->level) {
12302                         rii->node_count++;
12303                 }
12304 next:
12305                 path.slots[0]++;
12306         }
12307
12308 out:
12309         btrfs_release_path(&path);
12310
12311         return ret;
12312 }
12313
12314 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12315                                   struct btrfs_path *path,
12316                                   const struct btrfs_key *root_key,
12317                                   const int read_only_mode)
12318 {
12319         const u64 root_id = root_key->objectid;
12320         struct cache_extent *entry;
12321         struct root_item_info *rii;
12322         struct btrfs_root_item ri;
12323         unsigned long offset;
12324
12325         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12326         if (!entry) {
12327                 fprintf(stderr,
12328                         "Error: could not find extent items for root %llu\n",
12329                         root_key->objectid);
12330                 return -ENOENT;
12331         }
12332
12333         rii = container_of(entry, struct root_item_info, cache_extent);
12334         ASSERT(rii->cache_extent.start == root_id);
12335         ASSERT(rii->cache_extent.size == 1);
12336
12337         if (rii->node_count != 1) {
12338                 fprintf(stderr,
12339                         "Error: could not find btree root extent for root %llu\n",
12340                         root_id);
12341                 return -ENOENT;
12342         }
12343
12344         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12345         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12346
12347         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12348             btrfs_root_level(&ri) != rii->level ||
12349             btrfs_root_generation(&ri) != rii->gen) {
12350
12351                 /*
12352                  * If we're in repair mode but our caller told us to not update
12353                  * the root item, i.e. just check if it needs to be updated, don't
12354                  * print this message, since the caller will call us again shortly
12355                  * for the same root item without read only mode (the caller will
12356                  * open a transaction first).
12357                  */
12358                 if (!(read_only_mode && repair))
12359                         fprintf(stderr,
12360                                 "%sroot item for root %llu,"
12361                                 " current bytenr %llu, current gen %llu, current level %u,"
12362                                 " new bytenr %llu, new gen %llu, new level %u\n",
12363                                 (read_only_mode ? "" : "fixing "),
12364                                 root_id,
12365                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12366                                 btrfs_root_level(&ri),
12367                                 rii->bytenr, rii->gen, rii->level);
12368
12369                 if (btrfs_root_generation(&ri) > rii->gen) {
12370                         fprintf(stderr,
12371                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12372                                 root_id, btrfs_root_generation(&ri), rii->gen);
12373                         return -EINVAL;
12374                 }
12375
12376                 if (!read_only_mode) {
12377                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12378                         btrfs_set_root_level(&ri, rii->level);
12379                         btrfs_set_root_generation(&ri, rii->gen);
12380                         write_extent_buffer(path->nodes[0], &ri,
12381                                             offset, sizeof(ri));
12382                 }
12383
12384                 return 1;
12385         }
12386
12387         return 0;
12388 }
12389
12390 /*
12391  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12392  * caused read-only snapshots to be corrupted if they were created at a moment
12393  * when the source subvolume/snapshot had orphan items. The issue was that the
12394  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12395  * node instead of the post orphan cleanup root node.
12396  * So this function, and its callees, just detects and fixes those cases. Even
12397  * though the regression was for read-only snapshots, this function applies to
12398  * any snapshot/subvolume root.
12399  * This must be run before any other repair code - not doing it so, makes other
12400  * repair code delete or modify backrefs in the extent tree for example, which
12401  * will result in an inconsistent fs after repairing the root items.
12402  */
12403 static int repair_root_items(struct btrfs_fs_info *info)
12404 {
12405         struct btrfs_path path;
12406         struct btrfs_key key;
12407         struct extent_buffer *leaf;
12408         struct btrfs_trans_handle *trans = NULL;
12409         int ret = 0;
12410         int bad_roots = 0;
12411         int need_trans = 0;
12412
12413         btrfs_init_path(&path);
12414
12415         ret = build_roots_info_cache(info);
12416         if (ret)
12417                 goto out;
12418
12419         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12420         key.type = BTRFS_ROOT_ITEM_KEY;
12421         key.offset = 0;
12422
12423 again:
12424         /*
12425          * Avoid opening and committing transactions if a leaf doesn't have
12426          * any root items that need to be fixed, so that we avoid rotating
12427          * backup roots unnecessarily.
12428          */
12429         if (need_trans) {
12430                 trans = btrfs_start_transaction(info->tree_root, 1);
12431                 if (IS_ERR(trans)) {
12432                         ret = PTR_ERR(trans);
12433                         goto out;
12434                 }
12435         }
12436
12437         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12438                                 0, trans ? 1 : 0);
12439         if (ret < 0)
12440                 goto out;
12441         leaf = path.nodes[0];
12442
12443         while (1) {
12444                 struct btrfs_key found_key;
12445
12446                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12447                         int no_more_keys = find_next_key(&path, &key);
12448
12449                         btrfs_release_path(&path);
12450                         if (trans) {
12451                                 ret = btrfs_commit_transaction(trans,
12452                                                                info->tree_root);
12453                                 trans = NULL;
12454                                 if (ret < 0)
12455                                         goto out;
12456                         }
12457                         need_trans = 0;
12458                         if (no_more_keys)
12459                                 break;
12460                         goto again;
12461                 }
12462
12463                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12464
12465                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12466                         goto next;
12467                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12468                         goto next;
12469
12470                 ret = maybe_repair_root_item(info, &path, &found_key,
12471                                              trans ? 0 : 1);
12472                 if (ret < 0)
12473                         goto out;
12474                 if (ret) {
12475                         if (!trans && repair) {
12476                                 need_trans = 1;
12477                                 key = found_key;
12478                                 btrfs_release_path(&path);
12479                                 goto again;
12480                         }
12481                         bad_roots++;
12482                 }
12483 next:
12484                 path.slots[0]++;
12485         }
12486         ret = 0;
12487 out:
12488         free_roots_info_cache();
12489         btrfs_release_path(&path);
12490         if (trans)
12491                 btrfs_commit_transaction(trans, info->tree_root);
12492         if (ret < 0)
12493                 return ret;
12494
12495         return bad_roots;
12496 }
12497
12498 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12499 {
12500         struct btrfs_trans_handle *trans;
12501         struct btrfs_block_group_cache *bg_cache;
12502         u64 current = 0;
12503         int ret = 0;
12504
12505         /* Clear all free space cache inodes and its extent data */
12506         while (1) {
12507                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12508                 if (!bg_cache)
12509                         break;
12510                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12511                 if (ret < 0)
12512                         return ret;
12513                 current = bg_cache->key.objectid + bg_cache->key.offset;
12514         }
12515
12516         /* Don't forget to set cache_generation to -1 */
12517         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12518         if (IS_ERR(trans)) {
12519                 error("failed to update super block cache generation");
12520                 return PTR_ERR(trans);
12521         }
12522         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12523         btrfs_commit_transaction(trans, fs_info->tree_root);
12524
12525         return ret;
12526 }
12527
12528 const char * const cmd_check_usage[] = {
12529         "btrfs check [options] <device>",
12530         "Check structural integrity of a filesystem (unmounted).",
12531         "Check structural integrity of an unmounted filesystem. Verify internal",
12532         "trees' consistency and item connectivity. In the repair mode try to",
12533         "fix the problems found. ",
12534         "WARNING: the repair mode is considered dangerous",
12535         "",
12536         "-s|--super <superblock>     use this superblock copy",
12537         "-b|--backup                 use the first valid backup root copy",
12538         "--repair                    try to repair the filesystem",
12539         "--readonly                  run in read-only mode (default)",
12540         "--init-csum-tree            create a new CRC tree",
12541         "--init-extent-tree          create a new extent tree",
12542         "--mode <MODE>               allows choice of memory/IO trade-offs",
12543         "                            where MODE is one of:",
12544         "                            original - read inodes and extents to memory (requires",
12545         "                                       more memory, does less IO)",
12546         "                            lowmem   - try to use less memory but read blocks again",
12547         "                                       when needed",
12548         "--check-data-csum           verify checksums of data blocks",
12549         "-Q|--qgroup-report          print a report on qgroup consistency",
12550         "-E|--subvol-extents <subvolid>",
12551         "                            print subvolume extents and sharing state",
12552         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12553         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12554         "-p|--progress               indicate progress",
12555         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12556         NULL
12557 };
12558
12559 int cmd_check(int argc, char **argv)
12560 {
12561         struct cache_tree root_cache;
12562         struct btrfs_root *root;
12563         struct btrfs_fs_info *info;
12564         u64 bytenr = 0;
12565         u64 subvolid = 0;
12566         u64 tree_root_bytenr = 0;
12567         u64 chunk_root_bytenr = 0;
12568         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12569         int ret;
12570         int err = 0;
12571         u64 num;
12572         int init_csum_tree = 0;
12573         int readonly = 0;
12574         int clear_space_cache = 0;
12575         int qgroup_report = 0;
12576         int qgroups_repaired = 0;
12577         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12578
12579         while(1) {
12580                 int c;
12581                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12582                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12583                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12584                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12585                 static const struct option long_options[] = {
12586                         { "super", required_argument, NULL, 's' },
12587                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12588                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12589                         { "init-csum-tree", no_argument, NULL,
12590                                 GETOPT_VAL_INIT_CSUM },
12591                         { "init-extent-tree", no_argument, NULL,
12592                                 GETOPT_VAL_INIT_EXTENT },
12593                         { "check-data-csum", no_argument, NULL,
12594                                 GETOPT_VAL_CHECK_CSUM },
12595                         { "backup", no_argument, NULL, 'b' },
12596                         { "subvol-extents", required_argument, NULL, 'E' },
12597                         { "qgroup-report", no_argument, NULL, 'Q' },
12598                         { "tree-root", required_argument, NULL, 'r' },
12599                         { "chunk-root", required_argument, NULL,
12600                                 GETOPT_VAL_CHUNK_TREE },
12601                         { "progress", no_argument, NULL, 'p' },
12602                         { "mode", required_argument, NULL,
12603                                 GETOPT_VAL_MODE },
12604                         { "clear-space-cache", required_argument, NULL,
12605                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12606                         { NULL, 0, NULL, 0}
12607                 };
12608
12609                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12610                 if (c < 0)
12611                         break;
12612                 switch(c) {
12613                         case 'a': /* ignored */ break;
12614                         case 'b':
12615                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12616                                 break;
12617                         case 's':
12618                                 num = arg_strtou64(optarg);
12619                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12620                                         error(
12621                                         "super mirror should be less than %d",
12622                                                 BTRFS_SUPER_MIRROR_MAX);
12623                                         exit(1);
12624                                 }
12625                                 bytenr = btrfs_sb_offset(((int)num));
12626                                 printf("using SB copy %llu, bytenr %llu\n", num,
12627                                        (unsigned long long)bytenr);
12628                                 break;
12629                         case 'Q':
12630                                 qgroup_report = 1;
12631                                 break;
12632                         case 'E':
12633                                 subvolid = arg_strtou64(optarg);
12634                                 break;
12635                         case 'r':
12636                                 tree_root_bytenr = arg_strtou64(optarg);
12637                                 break;
12638                         case GETOPT_VAL_CHUNK_TREE:
12639                                 chunk_root_bytenr = arg_strtou64(optarg);
12640                                 break;
12641                         case 'p':
12642                                 ctx.progress_enabled = true;
12643                                 break;
12644                         case '?':
12645                         case 'h':
12646                                 usage(cmd_check_usage);
12647                         case GETOPT_VAL_REPAIR:
12648                                 printf("enabling repair mode\n");
12649                                 repair = 1;
12650                                 ctree_flags |= OPEN_CTREE_WRITES;
12651                                 break;
12652                         case GETOPT_VAL_READONLY:
12653                                 readonly = 1;
12654                                 break;
12655                         case GETOPT_VAL_INIT_CSUM:
12656                                 printf("Creating a new CRC tree\n");
12657                                 init_csum_tree = 1;
12658                                 repair = 1;
12659                                 ctree_flags |= OPEN_CTREE_WRITES;
12660                                 break;
12661                         case GETOPT_VAL_INIT_EXTENT:
12662                                 init_extent_tree = 1;
12663                                 ctree_flags |= (OPEN_CTREE_WRITES |
12664                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12665                                 repair = 1;
12666                                 break;
12667                         case GETOPT_VAL_CHECK_CSUM:
12668                                 check_data_csum = 1;
12669                                 break;
12670                         case GETOPT_VAL_MODE:
12671                                 check_mode = parse_check_mode(optarg);
12672                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12673                                         error("unknown mode: %s", optarg);
12674                                         exit(1);
12675                                 }
12676                                 break;
12677                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12678                                 if (strcmp(optarg, "v1") == 0) {
12679                                         clear_space_cache = 1;
12680                                 } else if (strcmp(optarg, "v2") == 0) {
12681                                         clear_space_cache = 2;
12682                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12683                                 } else {
12684                                         error(
12685                 "invalid argument to --clear-space-cache, must be v1 or v2");
12686                                         exit(1);
12687                                 }
12688                                 ctree_flags |= OPEN_CTREE_WRITES;
12689                                 break;
12690                 }
12691         }
12692
12693         if (check_argc_exact(argc - optind, 1))
12694                 usage(cmd_check_usage);
12695
12696         if (ctx.progress_enabled) {
12697                 ctx.tp = TASK_NOTHING;
12698                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12699         }
12700
12701         /* This check is the only reason for --readonly to exist */
12702         if (readonly && repair) {
12703                 error("repair options are not compatible with --readonly");
12704                 exit(1);
12705         }
12706
12707         /*
12708          * Not supported yet
12709          */
12710         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12711                 error("low memory mode doesn't support repair yet");
12712                 exit(1);
12713         }
12714
12715         radix_tree_init();
12716         cache_tree_init(&root_cache);
12717
12718         if((ret = check_mounted(argv[optind])) < 0) {
12719                 error("could not check mount status: %s", strerror(-ret));
12720                 err |= !!ret;
12721                 goto err_out;
12722         } else if(ret) {
12723                 error("%s is currently mounted, aborting", argv[optind]);
12724                 ret = -EBUSY;
12725                 err |= !!ret;
12726                 goto err_out;
12727         }
12728
12729         /* only allow partial opening under repair mode */
12730         if (repair)
12731                 ctree_flags |= OPEN_CTREE_PARTIAL;
12732
12733         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12734                                   chunk_root_bytenr, ctree_flags);
12735         if (!info) {
12736                 error("cannot open file system");
12737                 ret = -EIO;
12738                 err |= !!ret;
12739                 goto err_out;
12740         }
12741
12742         global_info = info;
12743         root = info->fs_root;
12744         if (clear_space_cache == 1) {
12745                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12746                         error(
12747                 "free space cache v2 detected, use --clear-space-cache v2");
12748                         ret = 1;
12749                         goto close_out;
12750                 }
12751                 printf("Clearing free space cache\n");
12752                 ret = clear_free_space_cache(info);
12753                 if (ret) {
12754                         error("failed to clear free space cache");
12755                         ret = 1;
12756                 } else {
12757                         printf("Free space cache cleared\n");
12758                 }
12759                 goto close_out;
12760         } else if (clear_space_cache == 2) {
12761                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12762                         printf("no free space cache v2 to clear\n");
12763                         ret = 0;
12764                         goto close_out;
12765                 }
12766                 printf("Clear free space cache v2\n");
12767                 ret = btrfs_clear_free_space_tree(info);
12768                 if (ret) {
12769                         error("failed to clear free space cache v2: %d", ret);
12770                         ret = 1;
12771                 } else {
12772                         printf("free space cache v2 cleared\n");
12773                 }
12774                 goto close_out;
12775         }
12776
12777         /*
12778          * repair mode will force us to commit transaction which
12779          * will make us fail to load log tree when mounting.
12780          */
12781         if (repair && btrfs_super_log_root(info->super_copy)) {
12782                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12783                 if (!ret) {
12784                         ret = 1;
12785                         err |= !!ret;
12786                         goto close_out;
12787                 }
12788                 ret = zero_log_tree(root);
12789                 err |= !!ret;
12790                 if (ret) {
12791                         error("failed to zero log tree: %d", ret);
12792                         goto close_out;
12793                 }
12794         }
12795
12796         uuid_unparse(info->super_copy->fsid, uuidbuf);
12797         if (qgroup_report) {
12798                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12799                        uuidbuf);
12800                 ret = qgroup_verify_all(info);
12801                 err |= !!ret;
12802                 if (ret == 0)
12803                         report_qgroups(1);
12804                 goto close_out;
12805         }
12806         if (subvolid) {
12807                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12808                        subvolid, argv[optind], uuidbuf);
12809                 ret = print_extent_state(info, subvolid);
12810                 err |= !!ret;
12811                 goto close_out;
12812         }
12813         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12814
12815         if (!extent_buffer_uptodate(info->tree_root->node) ||
12816             !extent_buffer_uptodate(info->dev_root->node) ||
12817             !extent_buffer_uptodate(info->chunk_root->node)) {
12818                 error("critical roots corrupted, unable to check the filesystem");
12819                 err |= !!ret;
12820                 ret = -EIO;
12821                 goto close_out;
12822         }
12823
12824         if (init_extent_tree || init_csum_tree) {
12825                 struct btrfs_trans_handle *trans;
12826
12827                 trans = btrfs_start_transaction(info->extent_root, 0);
12828                 if (IS_ERR(trans)) {
12829                         error("error starting transaction");
12830                         ret = PTR_ERR(trans);
12831                         err |= !!ret;
12832                         goto close_out;
12833                 }
12834
12835                 if (init_extent_tree) {
12836                         printf("Creating a new extent tree\n");
12837                         ret = reinit_extent_tree(trans, info);
12838                         err |= !!ret;
12839                         if (ret)
12840                                 goto close_out;
12841                 }
12842
12843                 if (init_csum_tree) {
12844                         printf("Reinitialize checksum tree\n");
12845                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12846                         if (ret) {
12847                                 error("checksum tree initialization failed: %d",
12848                                                 ret);
12849                                 ret = -EIO;
12850                                 err |= !!ret;
12851                                 goto close_out;
12852                         }
12853
12854                         ret = fill_csum_tree(trans, info->csum_root,
12855                                              init_extent_tree);
12856                         err |= !!ret;
12857                         if (ret) {
12858                                 error("checksum tree refilling failed: %d", ret);
12859                                 return -EIO;
12860                         }
12861                 }
12862                 /*
12863                  * Ok now we commit and run the normal fsck, which will add
12864                  * extent entries for all of the items it finds.
12865                  */
12866                 ret = btrfs_commit_transaction(trans, info->extent_root);
12867                 err |= !!ret;
12868                 if (ret)
12869                         goto close_out;
12870         }
12871         if (!extent_buffer_uptodate(info->extent_root->node)) {
12872                 error("critical: extent_root, unable to check the filesystem");
12873                 ret = -EIO;
12874                 err |= !!ret;
12875                 goto close_out;
12876         }
12877         if (!extent_buffer_uptodate(info->csum_root->node)) {
12878                 error("critical: csum_root, unable to check the filesystem");
12879                 ret = -EIO;
12880                 err |= !!ret;
12881                 goto close_out;
12882         }
12883
12884         if (!ctx.progress_enabled)
12885                 fprintf(stderr, "checking extents\n");
12886         if (check_mode == CHECK_MODE_LOWMEM)
12887                 ret = check_chunks_and_extents_v2(root);
12888         else
12889                 ret = check_chunks_and_extents(root);
12890         err |= !!ret;
12891         if (ret)
12892                 error(
12893                 "errors found in extent allocation tree or chunk allocation");
12894
12895         ret = repair_root_items(info);
12896         err |= !!ret;
12897         if (ret < 0)
12898                 goto close_out;
12899         if (repair) {
12900                 fprintf(stderr, "Fixed %d roots.\n", ret);
12901                 ret = 0;
12902         } else if (ret > 0) {
12903                 fprintf(stderr,
12904                        "Found %d roots with an outdated root item.\n",
12905                        ret);
12906                 fprintf(stderr,
12907                         "Please run a filesystem check with the option --repair to fix them.\n");
12908                 ret = 1;
12909                 err |= !!ret;
12910                 goto close_out;
12911         }
12912
12913         if (!ctx.progress_enabled) {
12914                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12915                         fprintf(stderr, "checking free space tree\n");
12916                 else
12917                         fprintf(stderr, "checking free space cache\n");
12918         }
12919         ret = check_space_cache(root);
12920         err |= !!ret;
12921         if (ret)
12922                 goto out;
12923
12924         /*
12925          * We used to have to have these hole extents in between our real
12926          * extents so if we don't have this flag set we need to make sure there
12927          * are no gaps in the file extents for inodes, otherwise we can just
12928          * ignore it when this happens.
12929          */
12930         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12931         if (!ctx.progress_enabled)
12932                 fprintf(stderr, "checking fs roots\n");
12933         if (check_mode == CHECK_MODE_LOWMEM)
12934                 ret = check_fs_roots_v2(root->fs_info);
12935         else
12936                 ret = check_fs_roots(root, &root_cache);
12937         err |= !!ret;
12938         if (ret)
12939                 goto out;
12940
12941         fprintf(stderr, "checking csums\n");
12942         ret = check_csums(root);
12943         err |= !!ret;
12944         if (ret)
12945                 goto out;
12946
12947         fprintf(stderr, "checking root refs\n");
12948         /* For low memory mode, check_fs_roots_v2 handles root refs */
12949         if (check_mode != CHECK_MODE_LOWMEM) {
12950                 ret = check_root_refs(root, &root_cache);
12951                 err |= !!ret;
12952                 if (ret)
12953                         goto out;
12954         }
12955
12956         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12957                 struct extent_buffer *eb;
12958
12959                 eb = list_first_entry(&root->fs_info->recow_ebs,
12960                                       struct extent_buffer, recow);
12961                 list_del_init(&eb->recow);
12962                 ret = recow_extent_buffer(root, eb);
12963                 err |= !!ret;
12964                 if (ret)
12965                         break;
12966         }
12967
12968         while (!list_empty(&delete_items)) {
12969                 struct bad_item *bad;
12970
12971                 bad = list_first_entry(&delete_items, struct bad_item, list);
12972                 list_del_init(&bad->list);
12973                 if (repair) {
12974                         ret = delete_bad_item(root, bad);
12975                         err |= !!ret;
12976                 }
12977                 free(bad);
12978         }
12979
12980         if (info->quota_enabled) {
12981                 fprintf(stderr, "checking quota groups\n");
12982                 ret = qgroup_verify_all(info);
12983                 err |= !!ret;
12984                 if (ret)
12985                         goto out;
12986                 report_qgroups(0);
12987                 ret = repair_qgroups(info, &qgroups_repaired);
12988                 err |= !!ret;
12989                 if (err)
12990                         goto out;
12991                 ret = 0;
12992         }
12993
12994         if (!list_empty(&root->fs_info->recow_ebs)) {
12995                 error("transid errors in file system");
12996                 ret = 1;
12997                 err |= !!ret;
12998         }
12999 out:
13000         if (found_old_backref) { /*
13001                  * there was a disk format change when mixed
13002                  * backref was in testing tree. The old format
13003                  * existed about one week.
13004                  */
13005                 printf("\n * Found old mixed backref format. "
13006                        "The old format is not supported! *"
13007                        "\n * Please mount the FS in readonly mode, "
13008                        "backup data and re-format the FS. *\n\n");
13009                 err |= 1;
13010         }
13011         printf("found %llu bytes used err is %d\n",
13012                (unsigned long long)bytes_used, ret);
13013         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13014         printf("total tree bytes: %llu\n",
13015                (unsigned long long)total_btree_bytes);
13016         printf("total fs tree bytes: %llu\n",
13017                (unsigned long long)total_fs_tree_bytes);
13018         printf("total extent tree bytes: %llu\n",
13019                (unsigned long long)total_extent_tree_bytes);
13020         printf("btree space waste bytes: %llu\n",
13021                (unsigned long long)btree_space_waste);
13022         printf("file data blocks allocated: %llu\n referenced %llu\n",
13023                 (unsigned long long)data_bytes_allocated,
13024                 (unsigned long long)data_bytes_referenced);
13025
13026         free_qgroup_counts();
13027         free_root_recs_tree(&root_cache);
13028 close_out:
13029         close_ctree(root);
13030 err_out:
13031         if (ctx.progress_enabled)
13032                 task_deinit(ctx.info);
13033
13034         return err;
13035 }