btrfs-progs: check: Fix lowmem false alert on tree reloc tree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         int level_size;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize, root->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct btrfs_root *root,
1481                             struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(root, eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct btrfs_root *root,
1987                             struct extent_buffer *parent, int slot,
1988                             struct extent_buffer *child)
1989 {
1990         struct btrfs_key parent_key;
1991         struct btrfs_key child_key;
1992         int ret = 0;
1993
1994         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1995         if (btrfs_header_level(child) == 0)
1996                 btrfs_item_key_to_cpu(child, &child_key, 0);
1997         else
1998                 btrfs_node_key_to_cpu(child, &child_key, 0);
1999
2000         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2001                 ret = -EINVAL;
2002                 fprintf(stderr,
2003                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2004                         parent_key.objectid, parent_key.type, parent_key.offset,
2005                         child_key.objectid, child_key.type, child_key.offset);
2006         }
2007         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008                 ret = -EINVAL;
2009                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2010                         btrfs_node_blockptr(parent, slot),
2011                         btrfs_header_bytenr(child));
2012         }
2013         if (btrfs_node_ptr_generation(parent, slot) !=
2014             btrfs_header_generation(child)) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2017                         btrfs_header_generation(child),
2018                         btrfs_node_ptr_generation(parent, slot));
2019         }
2020         return ret;
2021 }
2022
2023 /*
2024  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2025  * in every fs or file tree check. Here we find its all root ids, and only check
2026  * it in the fs or file tree which has the smallest root id.
2027  */
2028 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 {
2030         struct rb_node *node;
2031         struct ulist_node *u;
2032
2033         if (roots->nnodes == 1)
2034                 return 1;
2035
2036         node = rb_first(&roots->root);
2037         u = rb_entry(node, struct ulist_node, rb_node);
2038         /*
2039          * current root id is not smallest, we skip it and let it be checked
2040          * in the fs or file tree who hash the smallest root id.
2041          */
2042         if (root->objectid != u->val)
2043                 return 0;
2044
2045         return 1;
2046 }
2047
2048 /*
2049  * for a tree node or leaf, we record its reference count, so later if we still
2050  * process this node or leaf, don't need to compute its reference count again.
2051  */
2052 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2053                              struct node_refs *nrefs, u64 level)
2054 {
2055         int check, ret;
2056         u64 refs;
2057         struct ulist *roots;
2058
2059         if (nrefs->bytenr[level] != bytenr) {
2060                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2061                                        level, 1, &refs, NULL);
2062                 if (ret < 0)
2063                         return ret;
2064
2065                 nrefs->bytenr[level] = bytenr;
2066                 nrefs->refs[level] = refs;
2067                 if (refs > 1) {
2068                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2069                                                    0, &roots);
2070                         if (ret)
2071                                 return -EIO;
2072
2073                         check = need_check(root, roots);
2074                         ulist_free(roots);
2075                         nrefs->need_check[level] = check;
2076                 } else {
2077                         nrefs->need_check[level] = 1;
2078                 }
2079         }
2080
2081         return 0;
2082 }
2083
2084 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2085                           struct walk_control *wc, int *level,
2086                           struct node_refs *nrefs)
2087 {
2088         enum btrfs_tree_block_status status;
2089         u64 bytenr;
2090         u64 ptr_gen;
2091         struct extent_buffer *next;
2092         struct extent_buffer *cur;
2093         u32 blocksize;
2094         int ret, err = 0;
2095         u64 refs;
2096
2097         WARN_ON(*level < 0);
2098         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099
2100         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2101                 refs = nrefs->refs[*level];
2102                 ret = 0;
2103         } else {
2104                 ret = btrfs_lookup_extent_info(NULL, root,
2105                                        path->nodes[*level]->start,
2106                                        *level, 1, &refs, NULL);
2107                 if (ret < 0) {
2108                         err = ret;
2109                         goto out;
2110                 }
2111                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2112                 nrefs->refs[*level] = refs;
2113         }
2114
2115         if (refs > 1) {
2116                 ret = enter_shared_node(root, path->nodes[*level]->start,
2117                                         refs, wc, *level);
2118                 if (ret > 0) {
2119                         err = ret;
2120                         goto out;
2121                 }
2122         }
2123
2124         while (*level >= 0) {
2125                 WARN_ON(*level < 0);
2126                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2127                 cur = path->nodes[*level];
2128
2129                 if (btrfs_header_level(cur) != *level)
2130                         WARN_ON(1);
2131
2132                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2133                         break;
2134                 if (*level == 0) {
2135                         ret = process_one_leaf(root, cur, wc);
2136                         if (ret < 0)
2137                                 err = ret;
2138                         break;
2139                 }
2140                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2141                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2142                 blocksize = root->nodesize;
2143
2144                 if (bytenr == nrefs->bytenr[*level - 1]) {
2145                         refs = nrefs->refs[*level - 1];
2146                 } else {
2147                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2148                                         *level - 1, 1, &refs, NULL);
2149                         if (ret < 0) {
2150                                 refs = 0;
2151                         } else {
2152                                 nrefs->bytenr[*level - 1] = bytenr;
2153                                 nrefs->refs[*level - 1] = refs;
2154                         }
2155                 }
2156
2157                 if (refs > 1) {
2158                         ret = enter_shared_node(root, bytenr, refs,
2159                                                 wc, *level - 1);
2160                         if (ret > 0) {
2161                                 path->slots[*level]++;
2162                                 continue;
2163                         }
2164                 }
2165
2166                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2167                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2168                         free_extent_buffer(next);
2169                         reada_walk_down(root, cur, path->slots[*level]);
2170                         next = read_tree_block(root, bytenr, blocksize,
2171                                                ptr_gen);
2172                         if (!extent_buffer_uptodate(next)) {
2173                                 struct btrfs_key node_key;
2174
2175                                 btrfs_node_key_to_cpu(path->nodes[*level],
2176                                                       &node_key,
2177                                                       path->slots[*level]);
2178                                 btrfs_add_corrupt_extent_record(root->fs_info,
2179                                                 &node_key,
2180                                                 path->nodes[*level]->start,
2181                                                 root->nodesize, *level);
2182                                 err = -EIO;
2183                                 goto out;
2184                         }
2185                 }
2186
2187                 ret = check_child_node(root, cur, path->slots[*level], next);
2188                 if (ret) {
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2217                              int *level, struct node_refs *nrefs, int ext_ref)
2218 {
2219         enum btrfs_tree_block_status status;
2220         u64 bytenr;
2221         u64 ptr_gen;
2222         struct extent_buffer *next;
2223         struct extent_buffer *cur;
2224         u32 blocksize;
2225         int ret;
2226
2227         WARN_ON(*level < 0);
2228         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229
2230         ret = update_nodes_refs(root, path->nodes[*level]->start,
2231                                 nrefs, *level);
2232         if (ret < 0)
2233                 return ret;
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 /* Don't forgot to check leaf/node validation */
2246                 if (*level == 0) {
2247                         ret = btrfs_check_leaf(root, NULL, cur);
2248                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2249                                 ret = -EIO;
2250                                 break;
2251                         }
2252                         ret = process_one_leaf_v2(root, path, nrefs,
2253                                                   level, ext_ref);
2254                         break;
2255                 } else {
2256                         ret = btrfs_check_node(root, NULL, cur);
2257                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2258                                 ret = -EIO;
2259                                 break;
2260                         }
2261                 }
2262                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2263                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2264                 blocksize = root->nodesize;
2265
2266                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2267                 if (ret)
2268                         break;
2269                 if (!nrefs->need_check[*level - 1]) {
2270                         path->slots[*level]++;
2271                         continue;
2272                 }
2273
2274                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2275                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2276                         free_extent_buffer(next);
2277                         reada_walk_down(root, cur, path->slots[*level]);
2278                         next = read_tree_block(root, bytenr, blocksize,
2279                                                ptr_gen);
2280                         if (!extent_buffer_uptodate(next)) {
2281                                 struct btrfs_key node_key;
2282
2283                                 btrfs_node_key_to_cpu(path->nodes[*level],
2284                                                       &node_key,
2285                                                       path->slots[*level]);
2286                                 btrfs_add_corrupt_extent_record(root->fs_info,
2287                                                 &node_key,
2288                                                 path->nodes[*level]->start,
2289                                                 root->nodesize, *level);
2290                                 ret = -EIO;
2291                                 break;
2292                         }
2293                 }
2294
2295                 ret = check_child_node(root, cur, path->slots[*level], next);
2296                 if (ret < 0) 
2297                         break;
2298
2299                 if (btrfs_is_leaf(next))
2300                         status = btrfs_check_leaf(root, NULL, next);
2301                 else
2302                         status = btrfs_check_node(root, NULL, next);
2303                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2304                         free_extent_buffer(next);
2305                         ret = -EIO;
2306                         break;
2307                 }
2308
2309                 *level = *level - 1;
2310                 free_extent_buffer(path->nodes[*level]);
2311                 path->nodes[*level] = next;
2312                 path->slots[*level] = 0;
2313         }
2314         return ret;
2315 }
2316
2317 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2318                         struct walk_control *wc, int *level)
2319 {
2320         int i;
2321         struct extent_buffer *leaf;
2322
2323         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2324                 leaf = path->nodes[i];
2325                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2326                         path->slots[i]++;
2327                         *level = i;
2328                         return 0;
2329                 } else {
2330                         free_extent_buffer(path->nodes[*level]);
2331                         path->nodes[*level] = NULL;
2332                         BUG_ON(*level > wc->active_node);
2333                         if (*level == wc->active_node)
2334                                 leave_shared_node(root, wc, *level);
2335                         *level = i + 1;
2336                 }
2337         }
2338         return 1;
2339 }
2340
2341 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2342                            int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         *level = i + 1;
2357                 }
2358         }
2359         return 1;
2360 }
2361
2362 static int check_root_dir(struct inode_record *rec)
2363 {
2364         struct inode_backref *backref;
2365         int ret = -1;
2366
2367         if (!rec->found_inode_item || rec->errors)
2368                 goto out;
2369         if (rec->nlink != 1 || rec->found_link != 0)
2370                 goto out;
2371         if (list_empty(&rec->backrefs))
2372                 goto out;
2373         backref = to_inode_backref(rec->backrefs.next);
2374         if (!backref->found_inode_ref)
2375                 goto out;
2376         if (backref->index != 0 || backref->namelen != 2 ||
2377             memcmp(backref->name, "..", 2))
2378                 goto out;
2379         if (backref->found_dir_index || backref->found_dir_item)
2380                 goto out;
2381         ret = 0;
2382 out:
2383         return ret;
2384 }
2385
2386 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2387                               struct btrfs_root *root, struct btrfs_path *path,
2388                               struct inode_record *rec)
2389 {
2390         struct btrfs_inode_item *ei;
2391         struct btrfs_key key;
2392         int ret;
2393
2394         key.objectid = rec->ino;
2395         key.type = BTRFS_INODE_ITEM_KEY;
2396         key.offset = (u64)-1;
2397
2398         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2399         if (ret < 0)
2400                 goto out;
2401         if (ret) {
2402                 if (!path->slots[0]) {
2403                         ret = -ENOENT;
2404                         goto out;
2405                 }
2406                 path->slots[0]--;
2407                 ret = 0;
2408         }
2409         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2410         if (key.objectid != rec->ino) {
2411                 ret = -ENOENT;
2412                 goto out;
2413         }
2414
2415         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2416                             struct btrfs_inode_item);
2417         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2418         btrfs_mark_buffer_dirty(path->nodes[0]);
2419         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2420         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2421                root->root_key.objectid);
2422 out:
2423         btrfs_release_path(path);
2424         return ret;
2425 }
2426
2427 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2428                                     struct btrfs_root *root,
2429                                     struct btrfs_path *path,
2430                                     struct inode_record *rec)
2431 {
2432         int ret;
2433
2434         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2435         btrfs_release_path(path);
2436         if (!ret)
2437                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2438         return ret;
2439 }
2440
2441 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2442                                struct btrfs_root *root,
2443                                struct btrfs_path *path,
2444                                struct inode_record *rec)
2445 {
2446         struct btrfs_inode_item *ei;
2447         struct btrfs_key key;
2448         int ret = 0;
2449
2450         key.objectid = rec->ino;
2451         key.type = BTRFS_INODE_ITEM_KEY;
2452         key.offset = 0;
2453
2454         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2455         if (ret) {
2456                 if (ret > 0)
2457                         ret = -ENOENT;
2458                 goto out;
2459         }
2460
2461         /* Since ret == 0, no need to check anything */
2462         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463                             struct btrfs_inode_item);
2464         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2465         btrfs_mark_buffer_dirty(path->nodes[0]);
2466         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2467         printf("reset nbytes for ino %llu root %llu\n",
2468                rec->ino, root->root_key.objectid);
2469 out:
2470         btrfs_release_path(path);
2471         return ret;
2472 }
2473
2474 static int add_missing_dir_index(struct btrfs_root *root,
2475                                  struct cache_tree *inode_cache,
2476                                  struct inode_record *rec,
2477                                  struct inode_backref *backref)
2478 {
2479         struct btrfs_path path;
2480         struct btrfs_trans_handle *trans;
2481         struct btrfs_dir_item *dir_item;
2482         struct extent_buffer *leaf;
2483         struct btrfs_key key;
2484         struct btrfs_disk_key disk_key;
2485         struct inode_record *dir_rec;
2486         unsigned long name_ptr;
2487         u32 data_size = sizeof(*dir_item) + backref->namelen;
2488         int ret;
2489
2490         trans = btrfs_start_transaction(root, 1);
2491         if (IS_ERR(trans))
2492                 return PTR_ERR(trans);
2493
2494         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2495                 (unsigned long long)rec->ino);
2496
2497         btrfs_init_path(&path);
2498         key.objectid = backref->dir;
2499         key.type = BTRFS_DIR_INDEX_KEY;
2500         key.offset = backref->index;
2501         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2502         BUG_ON(ret);
2503
2504         leaf = path.nodes[0];
2505         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506
2507         disk_key.objectid = cpu_to_le64(rec->ino);
2508         disk_key.type = BTRFS_INODE_ITEM_KEY;
2509         disk_key.offset = 0;
2510
2511         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2512         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2513         btrfs_set_dir_data_len(leaf, dir_item, 0);
2514         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2515         name_ptr = (unsigned long)(dir_item + 1);
2516         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2517         btrfs_mark_buffer_dirty(leaf);
2518         btrfs_release_path(&path);
2519         btrfs_commit_transaction(trans, root);
2520
2521         backref->found_dir_index = 1;
2522         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2523         BUG_ON(IS_ERR(dir_rec));
2524         if (!dir_rec)
2525                 return 0;
2526         dir_rec->found_size += backref->namelen;
2527         if (dir_rec->found_size == dir_rec->isize &&
2528             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2529                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2530         if (dir_rec->found_size != dir_rec->isize)
2531                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532
2533         return 0;
2534 }
2535
2536 static int delete_dir_index(struct btrfs_root *root,
2537                             struct cache_tree *inode_cache,
2538                             struct inode_record *rec,
2539                             struct inode_backref *backref)
2540 {
2541         struct btrfs_trans_handle *trans;
2542         struct btrfs_dir_item *di;
2543         struct btrfs_path path;
2544         int ret = 0;
2545
2546         trans = btrfs_start_transaction(root, 1);
2547         if (IS_ERR(trans))
2548                 return PTR_ERR(trans);
2549
2550         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2551                 (unsigned long long)backref->dir,
2552                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2553                 (unsigned long long)root->objectid);
2554
2555         btrfs_init_path(&path);
2556         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2557                                     backref->name, backref->namelen,
2558                                     backref->index, -1);
2559         if (IS_ERR(di)) {
2560                 ret = PTR_ERR(di);
2561                 btrfs_release_path(&path);
2562                 btrfs_commit_transaction(trans, root);
2563                 if (ret == -ENOENT)
2564                         return 0;
2565                 return ret;
2566         }
2567
2568         if (!di)
2569                 ret = btrfs_del_item(trans, root, &path);
2570         else
2571                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2572         BUG_ON(ret);
2573         btrfs_release_path(&path);
2574         btrfs_commit_transaction(trans, root);
2575         return ret;
2576 }
2577
2578 static int create_inode_item(struct btrfs_root *root,
2579                              struct inode_record *rec,
2580                              struct inode_backref *backref, int root_dir)
2581 {
2582         struct btrfs_trans_handle *trans;
2583         struct btrfs_inode_item inode_item;
2584         time_t now = time(NULL);
2585         int ret;
2586
2587         trans = btrfs_start_transaction(root, 1);
2588         if (IS_ERR(trans)) {
2589                 ret = PTR_ERR(trans);
2590                 return ret;
2591         }
2592
2593         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2594                 "be incomplete, please check permissions and content after "
2595                 "the fsck completes.\n", (unsigned long long)root->objectid,
2596                 (unsigned long long)rec->ino);
2597
2598         memset(&inode_item, 0, sizeof(inode_item));
2599         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2600         if (root_dir)
2601                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2602         else
2603                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2604         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2605         if (rec->found_dir_item) {
2606                 if (rec->found_file_extent)
2607                         fprintf(stderr, "root %llu inode %llu has both a dir "
2608                                 "item and extents, unsure if it is a dir or a "
2609                                 "regular file so setting it as a directory\n",
2610                                 (unsigned long long)root->objectid,
2611                                 (unsigned long long)rec->ino);
2612                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2613                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2614         } else if (!rec->found_dir_item) {
2615                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2616                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2617         }
2618         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2619         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2620         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2621         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2622         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2623         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2624         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2625         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2626
2627         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2628         BUG_ON(ret);
2629         btrfs_commit_transaction(trans, root);
2630         return 0;
2631 }
2632
2633 static int repair_inode_backrefs(struct btrfs_root *root,
2634                                  struct inode_record *rec,
2635                                  struct cache_tree *inode_cache,
2636                                  int delete)
2637 {
2638         struct inode_backref *tmp, *backref;
2639         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2640         int ret = 0;
2641         int repaired = 0;
2642
2643         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2644                 if (!delete && rec->ino == root_dirid) {
2645                         if (!rec->found_inode_item) {
2646                                 ret = create_inode_item(root, rec, backref, 1);
2647                                 if (ret)
2648                                         break;
2649                                 repaired++;
2650                         }
2651                 }
2652
2653                 /* Index 0 for root dir's are special, don't mess with it */
2654                 if (rec->ino == root_dirid && backref->index == 0)
2655                         continue;
2656
2657                 if (delete &&
2658                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2659                      (backref->found_dir_index && backref->found_inode_ref &&
2660                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2661                         ret = delete_dir_index(root, inode_cache, rec, backref);
2662                         if (ret)
2663                                 break;
2664                         repaired++;
2665                         list_del(&backref->list);
2666                         free(backref);
2667                 }
2668
2669                 if (!delete && !backref->found_dir_index &&
2670                     backref->found_dir_item && backref->found_inode_ref) {
2671                         ret = add_missing_dir_index(root, inode_cache, rec,
2672                                                     backref);
2673                         if (ret)
2674                                 break;
2675                         repaired++;
2676                         if (backref->found_dir_item &&
2677                             backref->found_dir_index &&
2678                             backref->found_dir_index) {
2679                                 if (!backref->errors &&
2680                                     backref->found_inode_ref) {
2681                                         list_del(&backref->list);
2682                                         free(backref);
2683                                 }
2684                         }
2685                 }
2686
2687                 if (!delete && (!backref->found_dir_index &&
2688                                 !backref->found_dir_item &&
2689                                 backref->found_inode_ref)) {
2690                         struct btrfs_trans_handle *trans;
2691                         struct btrfs_key location;
2692
2693                         ret = check_dir_conflict(root, backref->name,
2694                                                  backref->namelen,
2695                                                  backref->dir,
2696                                                  backref->index);
2697                         if (ret) {
2698                                 /*
2699                                  * let nlink fixing routine to handle it,
2700                                  * which can do it better.
2701                                  */
2702                                 ret = 0;
2703                                 break;
2704                         }
2705                         location.objectid = rec->ino;
2706                         location.type = BTRFS_INODE_ITEM_KEY;
2707                         location.offset = 0;
2708
2709                         trans = btrfs_start_transaction(root, 1);
2710                         if (IS_ERR(trans)) {
2711                                 ret = PTR_ERR(trans);
2712                                 break;
2713                         }
2714                         fprintf(stderr, "adding missing dir index/item pair "
2715                                 "for inode %llu\n",
2716                                 (unsigned long long)rec->ino);
2717                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2718                                                     backref->namelen,
2719                                                     backref->dir, &location,
2720                                                     imode_to_type(rec->imode),
2721                                                     backref->index);
2722                         BUG_ON(ret);
2723                         btrfs_commit_transaction(trans, root);
2724                         repaired++;
2725                 }
2726
2727                 if (!delete && (backref->found_inode_ref &&
2728                                 backref->found_dir_index &&
2729                                 backref->found_dir_item &&
2730                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2731                                 !rec->found_inode_item)) {
2732                         ret = create_inode_item(root, rec, backref, 0);
2733                         if (ret)
2734                                 break;
2735                         repaired++;
2736                 }
2737
2738         }
2739         return ret ? ret : repaired;
2740 }
2741
2742 /*
2743  * To determine the file type for nlink/inode_item repair
2744  *
2745  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2746  * Return -ENOENT if file type is not found.
2747  */
2748 static int find_file_type(struct inode_record *rec, u8 *type)
2749 {
2750         struct inode_backref *backref;
2751
2752         /* For inode item recovered case */
2753         if (rec->found_inode_item) {
2754                 *type = imode_to_type(rec->imode);
2755                 return 0;
2756         }
2757
2758         list_for_each_entry(backref, &rec->backrefs, list) {
2759                 if (backref->found_dir_index || backref->found_dir_item) {
2760                         *type = backref->filetype;
2761                         return 0;
2762                 }
2763         }
2764         return -ENOENT;
2765 }
2766
2767 /*
2768  * To determine the file name for nlink repair
2769  *
2770  * Return 0 if file name is found, set name and namelen.
2771  * Return -ENOENT if file name is not found.
2772  */
2773 static int find_file_name(struct inode_record *rec,
2774                           char *name, int *namelen)
2775 {
2776         struct inode_backref *backref;
2777
2778         list_for_each_entry(backref, &rec->backrefs, list) {
2779                 if (backref->found_dir_index || backref->found_dir_item ||
2780                     backref->found_inode_ref) {
2781                         memcpy(name, backref->name, backref->namelen);
2782                         *namelen = backref->namelen;
2783                         return 0;
2784                 }
2785         }
2786         return -ENOENT;
2787 }
2788
2789 /* Reset the nlink of the inode to the correct one */
2790 static int reset_nlink(struct btrfs_trans_handle *trans,
2791                        struct btrfs_root *root,
2792                        struct btrfs_path *path,
2793                        struct inode_record *rec)
2794 {
2795         struct inode_backref *backref;
2796         struct inode_backref *tmp;
2797         struct btrfs_key key;
2798         struct btrfs_inode_item *inode_item;
2799         int ret = 0;
2800
2801         /* We don't believe this either, reset it and iterate backref */
2802         rec->found_link = 0;
2803
2804         /* Remove all backref including the valid ones */
2805         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2806                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2807                                    backref->index, backref->name,
2808                                    backref->namelen, 0);
2809                 if (ret < 0)
2810                         goto out;
2811
2812                 /* remove invalid backref, so it won't be added back */
2813                 if (!(backref->found_dir_index &&
2814                       backref->found_dir_item &&
2815                       backref->found_inode_ref)) {
2816                         list_del(&backref->list);
2817                         free(backref);
2818                 } else {
2819                         rec->found_link++;
2820                 }
2821         }
2822
2823         /* Set nlink to 0 */
2824         key.objectid = rec->ino;
2825         key.type = BTRFS_INODE_ITEM_KEY;
2826         key.offset = 0;
2827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2828         if (ret < 0)
2829                 goto out;
2830         if (ret > 0) {
2831                 ret = -ENOENT;
2832                 goto out;
2833         }
2834         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2835                                     struct btrfs_inode_item);
2836         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2837         btrfs_mark_buffer_dirty(path->nodes[0]);
2838         btrfs_release_path(path);
2839
2840         /*
2841          * Add back valid inode_ref/dir_item/dir_index,
2842          * add_link() will handle the nlink inc, so new nlink must be correct
2843          */
2844         list_for_each_entry(backref, &rec->backrefs, list) {
2845                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2846                                      backref->name, backref->namelen,
2847                                      backref->filetype, &backref->index, 1);
2848                 if (ret < 0)
2849                         goto out;
2850         }
2851 out:
2852         btrfs_release_path(path);
2853         return ret;
2854 }
2855
2856 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2857                                struct btrfs_root *root,
2858                                struct btrfs_path *path,
2859                                struct inode_record *rec)
2860 {
2861         char *dir_name = "lost+found";
2862         char namebuf[BTRFS_NAME_LEN] = {0};
2863         u64 lost_found_ino;
2864         u32 mode = 0700;
2865         u8 type = 0;
2866         int namelen = 0;
2867         int name_recovered = 0;
2868         int type_recovered = 0;
2869         int ret = 0;
2870
2871         /*
2872          * Get file name and type first before these invalid inode ref
2873          * are deleted by remove_all_invalid_backref()
2874          */
2875         name_recovered = !find_file_name(rec, namebuf, &namelen);
2876         type_recovered = !find_file_type(rec, &type);
2877
2878         if (!name_recovered) {
2879                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2880                        rec->ino, rec->ino);
2881                 namelen = count_digits(rec->ino);
2882                 sprintf(namebuf, "%llu", rec->ino);
2883                 name_recovered = 1;
2884         }
2885         if (!type_recovered) {
2886                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2887                        rec->ino);
2888                 type = BTRFS_FT_REG_FILE;
2889                 type_recovered = 1;
2890         }
2891
2892         ret = reset_nlink(trans, root, path, rec);
2893         if (ret < 0) {
2894                 fprintf(stderr,
2895                         "Failed to reset nlink for inode %llu: %s\n",
2896                         rec->ino, strerror(-ret));
2897                 goto out;
2898         }
2899
2900         if (rec->found_link == 0) {
2901                 lost_found_ino = root->highest_inode;
2902                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2903                         ret = -EOVERFLOW;
2904                         goto out;
2905                 }
2906                 lost_found_ino++;
2907                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2908                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2909                                   mode);
2910                 if (ret < 0) {
2911                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2912                                 dir_name, strerror(-ret));
2913                         goto out;
2914                 }
2915                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2916                                      namebuf, namelen, type, NULL, 1);
2917                 /*
2918                  * Add ".INO" suffix several times to handle case where
2919                  * "FILENAME.INO" is already taken by another file.
2920                  */
2921                 while (ret == -EEXIST) {
2922                         /*
2923                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2924                          */
2925                         if (namelen + count_digits(rec->ino) + 1 >
2926                             BTRFS_NAME_LEN) {
2927                                 ret = -EFBIG;
2928                                 goto out;
2929                         }
2930                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2931                                  ".%llu", rec->ino);
2932                         namelen += count_digits(rec->ino) + 1;
2933                         ret = btrfs_add_link(trans, root, rec->ino,
2934                                              lost_found_ino, namebuf,
2935                                              namelen, type, NULL, 1);
2936                 }
2937                 if (ret < 0) {
2938                         fprintf(stderr,
2939                                 "Failed to link the inode %llu to %s dir: %s\n",
2940                                 rec->ino, dir_name, strerror(-ret));
2941                         goto out;
2942                 }
2943                 /*
2944                  * Just increase the found_link, don't actually add the
2945                  * backref. This will make things easier and this inode
2946                  * record will be freed after the repair is done.
2947                  * So fsck will not report problem about this inode.
2948                  */
2949                 rec->found_link++;
2950                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2951                        namelen, namebuf, dir_name);
2952         }
2953         printf("Fixed the nlink of inode %llu\n", rec->ino);
2954 out:
2955         /*
2956          * Clear the flag anyway, or we will loop forever for the same inode
2957          * as it will not be removed from the bad inode list and the dead loop
2958          * happens.
2959          */
2960         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2961         btrfs_release_path(path);
2962         return ret;
2963 }
2964
2965 /*
2966  * Check if there is any normal(reg or prealloc) file extent for given
2967  * ino.
2968  * This is used to determine the file type when neither its dir_index/item or
2969  * inode_item exists.
2970  *
2971  * This will *NOT* report error, if any error happens, just consider it does
2972  * not have any normal file extent.
2973  */
2974 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2975 {
2976         struct btrfs_path path;
2977         struct btrfs_key key;
2978         struct btrfs_key found_key;
2979         struct btrfs_file_extent_item *fi;
2980         u8 type;
2981         int ret = 0;
2982
2983         btrfs_init_path(&path);
2984         key.objectid = ino;
2985         key.type = BTRFS_EXTENT_DATA_KEY;
2986         key.offset = 0;
2987
2988         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2989         if (ret < 0) {
2990                 ret = 0;
2991                 goto out;
2992         }
2993         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2994                 ret = btrfs_next_leaf(root, &path);
2995                 if (ret) {
2996                         ret = 0;
2997                         goto out;
2998                 }
2999         }
3000         while (1) {
3001                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3002                                       path.slots[0]);
3003                 if (found_key.objectid != ino ||
3004                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3005                         break;
3006                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3007                                     struct btrfs_file_extent_item);
3008                 type = btrfs_file_extent_type(path.nodes[0], fi);
3009                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3010                         ret = 1;
3011                         goto out;
3012                 }
3013         }
3014 out:
3015         btrfs_release_path(&path);
3016         return ret;
3017 }
3018
3019 static u32 btrfs_type_to_imode(u8 type)
3020 {
3021         static u32 imode_by_btrfs_type[] = {
3022                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3023                 [BTRFS_FT_DIR]          = S_IFDIR,
3024                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3025                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3026                 [BTRFS_FT_FIFO]         = S_IFIFO,
3027                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3028                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3029         };
3030
3031         return imode_by_btrfs_type[(type)];
3032 }
3033
3034 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3035                                 struct btrfs_root *root,
3036                                 struct btrfs_path *path,
3037                                 struct inode_record *rec)
3038 {
3039         u8 filetype;
3040         u32 mode = 0700;
3041         int type_recovered = 0;
3042         int ret = 0;
3043
3044         printf("Trying to rebuild inode:%llu\n", rec->ino);
3045
3046         type_recovered = !find_file_type(rec, &filetype);
3047
3048         /*
3049          * Try to determine inode type if type not found.
3050          *
3051          * For found regular file extent, it must be FILE.
3052          * For found dir_item/index, it must be DIR.
3053          *
3054          * For undetermined one, use FILE as fallback.
3055          *
3056          * TODO:
3057          * 1. If found backref(inode_index/item is already handled) to it,
3058          *    it must be DIR.
3059          *    Need new inode-inode ref structure to allow search for that.
3060          */
3061         if (!type_recovered) {
3062                 if (rec->found_file_extent &&
3063                     find_normal_file_extent(root, rec->ino)) {
3064                         type_recovered = 1;
3065                         filetype = BTRFS_FT_REG_FILE;
3066                 } else if (rec->found_dir_item) {
3067                         type_recovered = 1;
3068                         filetype = BTRFS_FT_DIR;
3069                 } else if (!list_empty(&rec->orphan_extents)) {
3070                         type_recovered = 1;
3071                         filetype = BTRFS_FT_REG_FILE;
3072                 } else{
3073                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3074                                rec->ino);
3075                         type_recovered = 1;
3076                         filetype = BTRFS_FT_REG_FILE;
3077                 }
3078         }
3079
3080         ret = btrfs_new_inode(trans, root, rec->ino,
3081                               mode | btrfs_type_to_imode(filetype));
3082         if (ret < 0)
3083                 goto out;
3084
3085         /*
3086          * Here inode rebuild is done, we only rebuild the inode item,
3087          * don't repair the nlink(like move to lost+found).
3088          * That is the job of nlink repair.
3089          *
3090          * We just fill the record and return
3091          */
3092         rec->found_dir_item = 1;
3093         rec->imode = mode | btrfs_type_to_imode(filetype);
3094         rec->nlink = 0;
3095         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3096         /* Ensure the inode_nlinks repair function will be called */
3097         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3098 out:
3099         return ret;
3100 }
3101
3102 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3103                                       struct btrfs_root *root,
3104                                       struct btrfs_path *path,
3105                                       struct inode_record *rec)
3106 {
3107         struct orphan_data_extent *orphan;
3108         struct orphan_data_extent *tmp;
3109         int ret = 0;
3110
3111         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3112                 /*
3113                  * Check for conflicting file extents
3114                  *
3115                  * Here we don't know whether the extents is compressed or not,
3116                  * so we can only assume it not compressed nor data offset,
3117                  * and use its disk_len as extent length.
3118                  */
3119                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3120                                        orphan->offset, orphan->disk_len, 0);
3121                 btrfs_release_path(path);
3122                 if (ret < 0)
3123                         goto out;
3124                 if (!ret) {
3125                         fprintf(stderr,
3126                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3127                                 orphan->disk_bytenr, orphan->disk_len);
3128                         ret = btrfs_free_extent(trans,
3129                                         root->fs_info->extent_root,
3130                                         orphan->disk_bytenr, orphan->disk_len,
3131                                         0, root->objectid, orphan->objectid,
3132                                         orphan->offset);
3133                         if (ret < 0)
3134                                 goto out;
3135                 }
3136                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3137                                 orphan->offset, orphan->disk_bytenr,
3138                                 orphan->disk_len, orphan->disk_len);
3139                 if (ret < 0)
3140                         goto out;
3141
3142                 /* Update file size info */
3143                 rec->found_size += orphan->disk_len;
3144                 if (rec->found_size == rec->nbytes)
3145                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3146
3147                 /* Update the file extent hole info too */
3148                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3149                                            orphan->disk_len);
3150                 if (ret < 0)
3151                         goto out;
3152                 if (RB_EMPTY_ROOT(&rec->holes))
3153                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3154
3155                 list_del(&orphan->list);
3156                 free(orphan);
3157         }
3158         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3159 out:
3160         return ret;
3161 }
3162
3163 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3164                                         struct btrfs_root *root,
3165                                         struct btrfs_path *path,
3166                                         struct inode_record *rec)
3167 {
3168         struct rb_node *node;
3169         struct file_extent_hole *hole;
3170         int found = 0;
3171         int ret = 0;
3172
3173         node = rb_first(&rec->holes);
3174
3175         while (node) {
3176                 found = 1;
3177                 hole = rb_entry(node, struct file_extent_hole, node);
3178                 ret = btrfs_punch_hole(trans, root, rec->ino,
3179                                        hole->start, hole->len);
3180                 if (ret < 0)
3181                         goto out;
3182                 ret = del_file_extent_hole(&rec->holes, hole->start,
3183                                            hole->len);
3184                 if (ret < 0)
3185                         goto out;
3186                 if (RB_EMPTY_ROOT(&rec->holes))
3187                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3188                 node = rb_first(&rec->holes);
3189         }
3190         /* special case for a file losing all its file extent */
3191         if (!found) {
3192                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3193                                        round_up(rec->isize, root->sectorsize));
3194                 if (ret < 0)
3195                         goto out;
3196         }
3197         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3198                rec->ino, root->objectid);
3199 out:
3200         return ret;
3201 }
3202
3203 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3204 {
3205         struct btrfs_trans_handle *trans;
3206         struct btrfs_path path;
3207         int ret = 0;
3208
3209         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3210                              I_ERR_NO_ORPHAN_ITEM |
3211                              I_ERR_LINK_COUNT_WRONG |
3212                              I_ERR_NO_INODE_ITEM |
3213                              I_ERR_FILE_EXTENT_ORPHAN |
3214                              I_ERR_FILE_EXTENT_DISCOUNT|
3215                              I_ERR_FILE_NBYTES_WRONG)))
3216                 return rec->errors;
3217
3218         /*
3219          * For nlink repair, it may create a dir and add link, so
3220          * 2 for parent(256)'s dir_index and dir_item
3221          * 2 for lost+found dir's inode_item and inode_ref
3222          * 1 for the new inode_ref of the file
3223          * 2 for lost+found dir's dir_index and dir_item for the file
3224          */
3225         trans = btrfs_start_transaction(root, 7);
3226         if (IS_ERR(trans))
3227                 return PTR_ERR(trans);
3228
3229         btrfs_init_path(&path);
3230         if (rec->errors & I_ERR_NO_INODE_ITEM)
3231                 ret = repair_inode_no_item(trans, root, &path, rec);
3232         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3233                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3234         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3235                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3236         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3237                 ret = repair_inode_isize(trans, root, &path, rec);
3238         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3239                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3240         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3241                 ret = repair_inode_nlinks(trans, root, &path, rec);
3242         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3243                 ret = repair_inode_nbytes(trans, root, &path, rec);
3244         btrfs_commit_transaction(trans, root);
3245         btrfs_release_path(&path);
3246         return ret;
3247 }
3248
3249 static int check_inode_recs(struct btrfs_root *root,
3250                             struct cache_tree *inode_cache)
3251 {
3252         struct cache_extent *cache;
3253         struct ptr_node *node;
3254         struct inode_record *rec;
3255         struct inode_backref *backref;
3256         int stage = 0;
3257         int ret = 0;
3258         int err = 0;
3259         u64 error = 0;
3260         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3261
3262         if (btrfs_root_refs(&root->root_item) == 0) {
3263                 if (!cache_tree_empty(inode_cache))
3264                         fprintf(stderr, "warning line %d\n", __LINE__);
3265                 return 0;
3266         }
3267
3268         /*
3269          * We need to record the highest inode number for later 'lost+found'
3270          * dir creation.
3271          * We must select an ino not used/referred by any existing inode, or
3272          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3273          * this may cause 'lost+found' dir has wrong nlinks.
3274          */
3275         cache = last_cache_extent(inode_cache);
3276         if (cache) {
3277                 node = container_of(cache, struct ptr_node, cache);
3278                 rec = node->data;
3279                 if (rec->ino > root->highest_inode)
3280                         root->highest_inode = rec->ino;
3281         }
3282
3283         /*
3284          * We need to repair backrefs first because we could change some of the
3285          * errors in the inode recs.
3286          *
3287          * We also need to go through and delete invalid backrefs first and then
3288          * add the correct ones second.  We do this because we may get EEXIST
3289          * when adding back the correct index because we hadn't yet deleted the
3290          * invalid index.
3291          *
3292          * For example, if we were missing a dir index then the directories
3293          * isize would be wrong, so if we fixed the isize to what we thought it
3294          * would be and then fixed the backref we'd still have a invalid fs, so
3295          * we need to add back the dir index and then check to see if the isize
3296          * is still wrong.
3297          */
3298         while (stage < 3) {
3299                 stage++;
3300                 if (stage == 3 && !err)
3301                         break;
3302
3303                 cache = search_cache_extent(inode_cache, 0);
3304                 while (repair && cache) {
3305                         node = container_of(cache, struct ptr_node, cache);
3306                         rec = node->data;
3307                         cache = next_cache_extent(cache);
3308
3309                         /* Need to free everything up and rescan */
3310                         if (stage == 3) {
3311                                 remove_cache_extent(inode_cache, &node->cache);
3312                                 free(node);
3313                                 free_inode_rec(rec);
3314                                 continue;
3315                         }
3316
3317                         if (list_empty(&rec->backrefs))
3318                                 continue;
3319
3320                         ret = repair_inode_backrefs(root, rec, inode_cache,
3321                                                     stage == 1);
3322                         if (ret < 0) {
3323                                 err = ret;
3324                                 stage = 2;
3325                                 break;
3326                         } if (ret > 0) {
3327                                 err = -EAGAIN;
3328                         }
3329                 }
3330         }
3331         if (err)
3332                 return err;
3333
3334         rec = get_inode_rec(inode_cache, root_dirid, 0);
3335         BUG_ON(IS_ERR(rec));
3336         if (rec) {
3337                 ret = check_root_dir(rec);
3338                 if (ret) {
3339                         fprintf(stderr, "root %llu root dir %llu error\n",
3340                                 (unsigned long long)root->root_key.objectid,
3341                                 (unsigned long long)root_dirid);
3342                         print_inode_error(root, rec);
3343                         error++;
3344                 }
3345         } else {
3346                 if (repair) {
3347                         struct btrfs_trans_handle *trans;
3348
3349                         trans = btrfs_start_transaction(root, 1);
3350                         if (IS_ERR(trans)) {
3351                                 err = PTR_ERR(trans);
3352                                 return err;
3353                         }
3354
3355                         fprintf(stderr,
3356                                 "root %llu missing its root dir, recreating\n",
3357                                 (unsigned long long)root->objectid);
3358
3359                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3360                         BUG_ON(ret);
3361
3362                         btrfs_commit_transaction(trans, root);
3363                         return -EAGAIN;
3364                 }
3365
3366                 fprintf(stderr, "root %llu root dir %llu not found\n",
3367                         (unsigned long long)root->root_key.objectid,
3368                         (unsigned long long)root_dirid);
3369         }
3370
3371         while (1) {
3372                 cache = search_cache_extent(inode_cache, 0);
3373                 if (!cache)
3374                         break;
3375                 node = container_of(cache, struct ptr_node, cache);
3376                 rec = node->data;
3377                 remove_cache_extent(inode_cache, &node->cache);
3378                 free(node);
3379                 if (rec->ino == root_dirid ||
3380                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3381                         free_inode_rec(rec);
3382                         continue;
3383                 }
3384
3385                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3386                         ret = check_orphan_item(root, rec->ino);
3387                         if (ret == 0)
3388                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3389                         if (can_free_inode_rec(rec)) {
3390                                 free_inode_rec(rec);
3391                                 continue;
3392                         }
3393                 }
3394
3395                 if (!rec->found_inode_item)
3396                         rec->errors |= I_ERR_NO_INODE_ITEM;
3397                 if (rec->found_link != rec->nlink)
3398                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3399                 if (repair) {
3400                         ret = try_repair_inode(root, rec);
3401                         if (ret == 0 && can_free_inode_rec(rec)) {
3402                                 free_inode_rec(rec);
3403                                 continue;
3404                         }
3405                         ret = 0;
3406                 }
3407
3408                 if (!(repair && ret == 0))
3409                         error++;
3410                 print_inode_error(root, rec);
3411                 list_for_each_entry(backref, &rec->backrefs, list) {
3412                         if (!backref->found_dir_item)
3413                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3414                         if (!backref->found_dir_index)
3415                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3416                         if (!backref->found_inode_ref)
3417                                 backref->errors |= REF_ERR_NO_INODE_REF;
3418                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3419                                 " namelen %u name %s filetype %d errors %x",
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->filetype, backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426                 free_inode_rec(rec);
3427         }
3428         return (error > 0) ? -1 : 0;
3429 }
3430
3431 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3432                                         u64 objectid)
3433 {
3434         struct cache_extent *cache;
3435         struct root_record *rec = NULL;
3436         int ret;
3437
3438         cache = lookup_cache_extent(root_cache, objectid, 1);
3439         if (cache) {
3440                 rec = container_of(cache, struct root_record, cache);
3441         } else {
3442                 rec = calloc(1, sizeof(*rec));
3443                 if (!rec)
3444                         return ERR_PTR(-ENOMEM);
3445                 rec->objectid = objectid;
3446                 INIT_LIST_HEAD(&rec->backrefs);
3447                 rec->cache.start = objectid;
3448                 rec->cache.size = 1;
3449
3450                 ret = insert_cache_extent(root_cache, &rec->cache);
3451                 if (ret)
3452                         return ERR_PTR(-EEXIST);
3453         }
3454         return rec;
3455 }
3456
3457 static struct root_backref *get_root_backref(struct root_record *rec,
3458                                              u64 ref_root, u64 dir, u64 index,
3459                                              const char *name, int namelen)
3460 {
3461         struct root_backref *backref;
3462
3463         list_for_each_entry(backref, &rec->backrefs, list) {
3464                 if (backref->ref_root != ref_root || backref->dir != dir ||
3465                     backref->namelen != namelen)
3466                         continue;
3467                 if (memcmp(name, backref->name, namelen))
3468                         continue;
3469                 return backref;
3470         }
3471
3472         backref = calloc(1, sizeof(*backref) + namelen + 1);
3473         if (!backref)
3474                 return NULL;
3475         backref->ref_root = ref_root;
3476         backref->dir = dir;
3477         backref->index = index;
3478         backref->namelen = namelen;
3479         memcpy(backref->name, name, namelen);
3480         backref->name[namelen] = '\0';
3481         list_add_tail(&backref->list, &rec->backrefs);
3482         return backref;
3483 }
3484
3485 static void free_root_record(struct cache_extent *cache)
3486 {
3487         struct root_record *rec;
3488         struct root_backref *backref;
3489
3490         rec = container_of(cache, struct root_record, cache);
3491         while (!list_empty(&rec->backrefs)) {
3492                 backref = to_root_backref(rec->backrefs.next);
3493                 list_del(&backref->list);
3494                 free(backref);
3495         }
3496
3497         free(rec);
3498 }
3499
3500 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3501
3502 static int add_root_backref(struct cache_tree *root_cache,
3503                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3504                             const char *name, int namelen,
3505                             int item_type, int errors)
3506 {
3507         struct root_record *rec;
3508         struct root_backref *backref;
3509
3510         rec = get_root_rec(root_cache, root_id);
3511         BUG_ON(IS_ERR(rec));
3512         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3513         BUG_ON(!backref);
3514
3515         backref->errors |= errors;
3516
3517         if (item_type != BTRFS_DIR_ITEM_KEY) {
3518                 if (backref->found_dir_index || backref->found_back_ref ||
3519                     backref->found_forward_ref) {
3520                         if (backref->index != index)
3521                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3522                 } else {
3523                         backref->index = index;
3524                 }
3525         }
3526
3527         if (item_type == BTRFS_DIR_ITEM_KEY) {
3528                 if (backref->found_forward_ref)
3529                         rec->found_ref++;
3530                 backref->found_dir_item = 1;
3531         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3532                 backref->found_dir_index = 1;
3533         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3534                 if (backref->found_forward_ref)
3535                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3536                 else if (backref->found_dir_item)
3537                         rec->found_ref++;
3538                 backref->found_forward_ref = 1;
3539         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3540                 if (backref->found_back_ref)
3541                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3542                 backref->found_back_ref = 1;
3543         } else {
3544                 BUG_ON(1);
3545         }
3546
3547         if (backref->found_forward_ref && backref->found_dir_item)
3548                 backref->reachable = 1;
3549         return 0;
3550 }
3551
3552 static int merge_root_recs(struct btrfs_root *root,
3553                            struct cache_tree *src_cache,
3554                            struct cache_tree *dst_cache)
3555 {
3556         struct cache_extent *cache;
3557         struct ptr_node *node;
3558         struct inode_record *rec;
3559         struct inode_backref *backref;
3560         int ret = 0;
3561
3562         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3563                 free_inode_recs_tree(src_cache);
3564                 return 0;
3565         }
3566
3567         while (1) {
3568                 cache = search_cache_extent(src_cache, 0);
3569                 if (!cache)
3570                         break;
3571                 node = container_of(cache, struct ptr_node, cache);
3572                 rec = node->data;
3573                 remove_cache_extent(src_cache, &node->cache);
3574                 free(node);
3575
3576                 ret = is_child_root(root, root->objectid, rec->ino);
3577                 if (ret < 0)
3578                         break;
3579                 else if (ret == 0)
3580                         goto skip;
3581
3582                 list_for_each_entry(backref, &rec->backrefs, list) {
3583                         BUG_ON(backref->found_inode_ref);
3584                         if (backref->found_dir_item)
3585                                 add_root_backref(dst_cache, rec->ino,
3586                                         root->root_key.objectid, backref->dir,
3587                                         backref->index, backref->name,
3588                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3589                                         backref->errors);
3590                         if (backref->found_dir_index)
3591                                 add_root_backref(dst_cache, rec->ino,
3592                                         root->root_key.objectid, backref->dir,
3593                                         backref->index, backref->name,
3594                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3595                                         backref->errors);
3596                 }
3597 skip:
3598                 free_inode_rec(rec);
3599         }
3600         if (ret < 0)
3601                 return ret;
3602         return 0;
3603 }
3604
3605 static int check_root_refs(struct btrfs_root *root,
3606                            struct cache_tree *root_cache)
3607 {
3608         struct root_record *rec;
3609         struct root_record *ref_root;
3610         struct root_backref *backref;
3611         struct cache_extent *cache;
3612         int loop = 1;
3613         int ret;
3614         int error;
3615         int errors = 0;
3616
3617         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3618         BUG_ON(IS_ERR(rec));
3619         rec->found_ref = 1;
3620
3621         /* fixme: this can not detect circular references */
3622         while (loop) {
3623                 loop = 0;
3624                 cache = search_cache_extent(root_cache, 0);
3625                 while (1) {
3626                         if (!cache)
3627                                 break;
3628                         rec = container_of(cache, struct root_record, cache);
3629                         cache = next_cache_extent(cache);
3630
3631                         if (rec->found_ref == 0)
3632                                 continue;
3633
3634                         list_for_each_entry(backref, &rec->backrefs, list) {
3635                                 if (!backref->reachable)
3636                                         continue;
3637
3638                                 ref_root = get_root_rec(root_cache,
3639                                                         backref->ref_root);
3640                                 BUG_ON(IS_ERR(ref_root));
3641                                 if (ref_root->found_ref > 0)
3642                                         continue;
3643
3644                                 backref->reachable = 0;
3645                                 rec->found_ref--;
3646                                 if (rec->found_ref == 0)
3647                                         loop = 1;
3648                         }
3649                 }
3650         }
3651
3652         cache = search_cache_extent(root_cache, 0);
3653         while (1) {
3654                 if (!cache)
3655                         break;
3656                 rec = container_of(cache, struct root_record, cache);
3657                 cache = next_cache_extent(cache);
3658
3659                 if (rec->found_ref == 0 &&
3660                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3661                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3662                         ret = check_orphan_item(root->fs_info->tree_root,
3663                                                 rec->objectid);
3664                         if (ret == 0)
3665                                 continue;
3666
3667                         /*
3668                          * If we don't have a root item then we likely just have
3669                          * a dir item in a snapshot for this root but no actual
3670                          * ref key or anything so it's meaningless.
3671                          */
3672                         if (!rec->found_root_item)
3673                                 continue;
3674                         errors++;
3675                         fprintf(stderr, "fs tree %llu not referenced\n",
3676                                 (unsigned long long)rec->objectid);
3677                 }
3678
3679                 error = 0;
3680                 if (rec->found_ref > 0 && !rec->found_root_item)
3681                         error = 1;
3682                 list_for_each_entry(backref, &rec->backrefs, list) {
3683                         if (!backref->found_dir_item)
3684                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3685                         if (!backref->found_dir_index)
3686                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3687                         if (!backref->found_back_ref)
3688                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3689                         if (!backref->found_forward_ref)
3690                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3691                         if (backref->reachable && backref->errors)
3692                                 error = 1;
3693                 }
3694                 if (!error)
3695                         continue;
3696
3697                 errors++;
3698                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3699                         (unsigned long long)rec->objectid, rec->found_ref,
3700                          rec->found_root_item ? "" : "not found");
3701
3702                 list_for_each_entry(backref, &rec->backrefs, list) {
3703                         if (!backref->reachable)
3704                                 continue;
3705                         if (!backref->errors && rec->found_root_item)
3706                                 continue;
3707                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3708                                 " index %llu namelen %u name %s errors %x\n",
3709                                 (unsigned long long)backref->ref_root,
3710                                 (unsigned long long)backref->dir,
3711                                 (unsigned long long)backref->index,
3712                                 backref->namelen, backref->name,
3713                                 backref->errors);
3714                         print_ref_error(backref->errors);
3715                 }
3716         }
3717         return errors > 0 ? 1 : 0;
3718 }
3719
3720 static int process_root_ref(struct extent_buffer *eb, int slot,
3721                             struct btrfs_key *key,
3722                             struct cache_tree *root_cache)
3723 {
3724         u64 dirid;
3725         u64 index;
3726         u32 len;
3727         u32 name_len;
3728         struct btrfs_root_ref *ref;
3729         char namebuf[BTRFS_NAME_LEN];
3730         int error;
3731
3732         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3733
3734         dirid = btrfs_root_ref_dirid(eb, ref);
3735         index = btrfs_root_ref_sequence(eb, ref);
3736         name_len = btrfs_root_ref_name_len(eb, ref);
3737
3738         if (name_len <= BTRFS_NAME_LEN) {
3739                 len = name_len;
3740                 error = 0;
3741         } else {
3742                 len = BTRFS_NAME_LEN;
3743                 error = REF_ERR_NAME_TOO_LONG;
3744         }
3745         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3746
3747         if (key->type == BTRFS_ROOT_REF_KEY) {
3748                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3749                                  index, namebuf, len, key->type, error);
3750         } else {
3751                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3752                                  index, namebuf, len, key->type, error);
3753         }
3754         return 0;
3755 }
3756
3757 static void free_corrupt_block(struct cache_extent *cache)
3758 {
3759         struct btrfs_corrupt_block *corrupt;
3760
3761         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3762         free(corrupt);
3763 }
3764
3765 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3766
3767 /*
3768  * Repair the btree of the given root.
3769  *
3770  * The fix is to remove the node key in corrupt_blocks cache_tree.
3771  * and rebalance the tree.
3772  * After the fix, the btree should be writeable.
3773  */
3774 static int repair_btree(struct btrfs_root *root,
3775                         struct cache_tree *corrupt_blocks)
3776 {
3777         struct btrfs_trans_handle *trans;
3778         struct btrfs_path path;
3779         struct btrfs_corrupt_block *corrupt;
3780         struct cache_extent *cache;
3781         struct btrfs_key key;
3782         u64 offset;
3783         int level;
3784         int ret = 0;
3785
3786         if (cache_tree_empty(corrupt_blocks))
3787                 return 0;
3788
3789         trans = btrfs_start_transaction(root, 1);
3790         if (IS_ERR(trans)) {
3791                 ret = PTR_ERR(trans);
3792                 fprintf(stderr, "Error starting transaction: %s\n",
3793                         strerror(-ret));
3794                 return ret;
3795         }
3796         btrfs_init_path(&path);
3797         cache = first_cache_extent(corrupt_blocks);
3798         while (cache) {
3799                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3800                                        cache);
3801                 level = corrupt->level;
3802                 path.lowest_level = level;
3803                 key.objectid = corrupt->key.objectid;
3804                 key.type = corrupt->key.type;
3805                 key.offset = corrupt->key.offset;
3806
3807                 /*
3808                  * Here we don't want to do any tree balance, since it may
3809                  * cause a balance with corrupted brother leaf/node,
3810                  * so ins_len set to 0 here.
3811                  * Balance will be done after all corrupt node/leaf is deleted.
3812                  */
3813                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3814                 if (ret < 0)
3815                         goto out;
3816                 offset = btrfs_node_blockptr(path.nodes[level],
3817                                              path.slots[level]);
3818
3819                 /* Remove the ptr */
3820                 ret = btrfs_del_ptr(trans, root, &path, level,
3821                                     path.slots[level]);
3822                 if (ret < 0)
3823                         goto out;
3824                 /*
3825                  * Remove the corresponding extent
3826                  * return value is not concerned.
3827                  */
3828                 btrfs_release_path(&path);
3829                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3830                                         0, root->root_key.objectid,
3831                                         level - 1, 0);
3832                 cache = next_cache_extent(cache);
3833         }
3834
3835         /* Balance the btree using btrfs_search_slot() */
3836         cache = first_cache_extent(corrupt_blocks);
3837         while (cache) {
3838                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3839                                        cache);
3840                 memcpy(&key, &corrupt->key, sizeof(key));
3841                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3842                 if (ret < 0)
3843                         goto out;
3844                 /* return will always >0 since it won't find the item */
3845                 ret = 0;
3846                 btrfs_release_path(&path);
3847                 cache = next_cache_extent(cache);
3848         }
3849 out:
3850         btrfs_commit_transaction(trans, root);
3851         btrfs_release_path(&path);
3852         return ret;
3853 }
3854
3855 static int check_fs_root(struct btrfs_root *root,
3856                          struct cache_tree *root_cache,
3857                          struct walk_control *wc)
3858 {
3859         int ret = 0;
3860         int err = 0;
3861         int wret;
3862         int level;
3863         struct btrfs_path path;
3864         struct shared_node root_node;
3865         struct root_record *rec;
3866         struct btrfs_root_item *root_item = &root->root_item;
3867         struct cache_tree corrupt_blocks;
3868         struct orphan_data_extent *orphan;
3869         struct orphan_data_extent *tmp;
3870         enum btrfs_tree_block_status status;
3871         struct node_refs nrefs;
3872
3873         /*
3874          * Reuse the corrupt_block cache tree to record corrupted tree block
3875          *
3876          * Unlike the usage in extent tree check, here we do it in a per
3877          * fs/subvol tree base.
3878          */
3879         cache_tree_init(&corrupt_blocks);
3880         root->fs_info->corrupt_blocks = &corrupt_blocks;
3881
3882         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3883                 rec = get_root_rec(root_cache, root->root_key.objectid);
3884                 BUG_ON(IS_ERR(rec));
3885                 if (btrfs_root_refs(root_item) > 0)
3886                         rec->found_root_item = 1;
3887         }
3888
3889         btrfs_init_path(&path);
3890         memset(&root_node, 0, sizeof(root_node));
3891         cache_tree_init(&root_node.root_cache);
3892         cache_tree_init(&root_node.inode_cache);
3893         memset(&nrefs, 0, sizeof(nrefs));
3894
3895         /* Move the orphan extent record to corresponding inode_record */
3896         list_for_each_entry_safe(orphan, tmp,
3897                                  &root->orphan_data_extents, list) {
3898                 struct inode_record *inode;
3899
3900                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3901                                       1);
3902                 BUG_ON(IS_ERR(inode));
3903                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3904                 list_move(&orphan->list, &inode->orphan_extents);
3905         }
3906
3907         level = btrfs_header_level(root->node);
3908         memset(wc->nodes, 0, sizeof(wc->nodes));
3909         wc->nodes[level] = &root_node;
3910         wc->active_node = level;
3911         wc->root_level = level;
3912
3913         /* We may not have checked the root block, lets do that now */
3914         if (btrfs_is_leaf(root->node))
3915                 status = btrfs_check_leaf(root, NULL, root->node);
3916         else
3917                 status = btrfs_check_node(root, NULL, root->node);
3918         if (status != BTRFS_TREE_BLOCK_CLEAN)
3919                 return -EIO;
3920
3921         if (btrfs_root_refs(root_item) > 0 ||
3922             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3923                 path.nodes[level] = root->node;
3924                 extent_buffer_get(root->node);
3925                 path.slots[level] = 0;
3926         } else {
3927                 struct btrfs_key key;
3928                 struct btrfs_disk_key found_key;
3929
3930                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3931                 level = root_item->drop_level;
3932                 path.lowest_level = level;
3933                 if (level > btrfs_header_level(root->node) ||
3934                     level >= BTRFS_MAX_LEVEL) {
3935                         error("ignoring invalid drop level: %u", level);
3936                         goto skip_walking;
3937                 }
3938                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3939                 if (wret < 0)
3940                         goto skip_walking;
3941                 btrfs_node_key(path.nodes[level], &found_key,
3942                                 path.slots[level]);
3943                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3944                                         sizeof(found_key)));
3945         }
3946
3947         while (1) {
3948                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3949                 if (wret < 0)
3950                         ret = wret;
3951                 if (wret != 0)
3952                         break;
3953
3954                 wret = walk_up_tree(root, &path, wc, &level);
3955                 if (wret < 0)
3956                         ret = wret;
3957                 if (wret != 0)
3958                         break;
3959         }
3960 skip_walking:
3961         btrfs_release_path(&path);
3962
3963         if (!cache_tree_empty(&corrupt_blocks)) {
3964                 struct cache_extent *cache;
3965                 struct btrfs_corrupt_block *corrupt;
3966
3967                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3968                        root->root_key.objectid);
3969                 cache = first_cache_extent(&corrupt_blocks);
3970                 while (cache) {
3971                         corrupt = container_of(cache,
3972                                                struct btrfs_corrupt_block,
3973                                                cache);
3974                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3975                                cache->start, corrupt->level,
3976                                corrupt->key.objectid, corrupt->key.type,
3977                                corrupt->key.offset);
3978                         cache = next_cache_extent(cache);
3979                 }
3980                 if (repair) {
3981                         printf("Try to repair the btree for root %llu\n",
3982                                root->root_key.objectid);
3983                         ret = repair_btree(root, &corrupt_blocks);
3984                         if (ret < 0)
3985                                 fprintf(stderr, "Failed to repair btree: %s\n",
3986                                         strerror(-ret));
3987                         if (!ret)
3988                                 printf("Btree for root %llu is fixed\n",
3989                                        root->root_key.objectid);
3990                 }
3991         }
3992
3993         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3994         if (err < 0)
3995                 ret = err;
3996
3997         if (root_node.current) {
3998                 root_node.current->checked = 1;
3999                 maybe_free_inode_rec(&root_node.inode_cache,
4000                                 root_node.current);
4001         }
4002
4003         err = check_inode_recs(root, &root_node.inode_cache);
4004         if (!ret)
4005                 ret = err;
4006
4007         free_corrupt_blocks_tree(&corrupt_blocks);
4008         root->fs_info->corrupt_blocks = NULL;
4009         free_orphan_data_extents(&root->orphan_data_extents);
4010         return ret;
4011 }
4012
4013 static int fs_root_objectid(u64 objectid)
4014 {
4015         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4016             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4017                 return 1;
4018         return is_fstree(objectid);
4019 }
4020
4021 static int check_fs_roots(struct btrfs_root *root,
4022                           struct cache_tree *root_cache)
4023 {
4024         struct btrfs_path path;
4025         struct btrfs_key key;
4026         struct walk_control wc;
4027         struct extent_buffer *leaf, *tree_node;
4028         struct btrfs_root *tmp_root;
4029         struct btrfs_root *tree_root = root->fs_info->tree_root;
4030         int ret;
4031         int err = 0;
4032
4033         if (ctx.progress_enabled) {
4034                 ctx.tp = TASK_FS_ROOTS;
4035                 task_start(ctx.info);
4036         }
4037
4038         /*
4039          * Just in case we made any changes to the extent tree that weren't
4040          * reflected into the free space cache yet.
4041          */
4042         if (repair)
4043                 reset_cached_block_groups(root->fs_info);
4044         memset(&wc, 0, sizeof(wc));
4045         cache_tree_init(&wc.shared);
4046         btrfs_init_path(&path);
4047
4048 again:
4049         key.offset = 0;
4050         key.objectid = 0;
4051         key.type = BTRFS_ROOT_ITEM_KEY;
4052         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4053         if (ret < 0) {
4054                 err = 1;
4055                 goto out;
4056         }
4057         tree_node = tree_root->node;
4058         while (1) {
4059                 if (tree_node != tree_root->node) {
4060                         free_root_recs_tree(root_cache);
4061                         btrfs_release_path(&path);
4062                         goto again;
4063                 }
4064                 leaf = path.nodes[0];
4065                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4066                         ret = btrfs_next_leaf(tree_root, &path);
4067                         if (ret) {
4068                                 if (ret < 0)
4069                                         err = 1;
4070                                 break;
4071                         }
4072                         leaf = path.nodes[0];
4073                 }
4074                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4075                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4076                     fs_root_objectid(key.objectid)) {
4077                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4078                                 tmp_root = btrfs_read_fs_root_no_cache(
4079                                                 root->fs_info, &key);
4080                         } else {
4081                                 key.offset = (u64)-1;
4082                                 tmp_root = btrfs_read_fs_root(
4083                                                 root->fs_info, &key);
4084                         }
4085                         if (IS_ERR(tmp_root)) {
4086                                 err = 1;
4087                                 goto next;
4088                         }
4089                         ret = check_fs_root(tmp_root, root_cache, &wc);
4090                         if (ret == -EAGAIN) {
4091                                 free_root_recs_tree(root_cache);
4092                                 btrfs_release_path(&path);
4093                                 goto again;
4094                         }
4095                         if (ret)
4096                                 err = 1;
4097                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4098                                 btrfs_free_fs_root(tmp_root);
4099                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4100                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4101                         process_root_ref(leaf, path.slots[0], &key,
4102                                          root_cache);
4103                 }
4104 next:
4105                 path.slots[0]++;
4106         }
4107 out:
4108         btrfs_release_path(&path);
4109         if (err)
4110                 free_extent_cache_tree(&wc.shared);
4111         if (!cache_tree_empty(&wc.shared))
4112                 fprintf(stderr, "warning line %d\n", __LINE__);
4113
4114         task_stop(ctx.info);
4115
4116         return err;
4117 }
4118
4119 /*
4120  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4121  * INODE_REF/INODE_EXTREF match.
4122  *
4123  * @root:       the root of the fs/file tree
4124  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4125  * @key:        the key of the DIR_ITEM/DIR_INDEX
4126  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4127  *              distinguish root_dir between normal dir/file
4128  * @name:       the name in the INODE_REF/INODE_EXTREF
4129  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4130  * @mode:       the st_mode of INODE_ITEM
4131  *
4132  * Return 0 if no error occurred.
4133  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4134  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4135  * dir/file.
4136  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4137  * not match for normal dir/file.
4138  */
4139 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4140                          struct btrfs_key *key, u64 index, char *name,
4141                          u32 namelen, u32 mode)
4142 {
4143         struct btrfs_path path;
4144         struct extent_buffer *node;
4145         struct btrfs_dir_item *di;
4146         struct btrfs_key location;
4147         char namebuf[BTRFS_NAME_LEN] = {0};
4148         u32 total;
4149         u32 cur = 0;
4150         u32 len;
4151         u32 name_len;
4152         u32 data_len;
4153         u8 filetype;
4154         int slot;
4155         int ret;
4156
4157         btrfs_init_path(&path);
4158         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4159         if (ret < 0) {
4160                 ret = DIR_ITEM_MISSING;
4161                 goto out;
4162         }
4163
4164         /* Process root dir and goto out*/
4165         if (index == 0) {
4166                 if (ret == 0) {
4167                         ret = ROOT_DIR_ERROR;
4168                         error(
4169                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4170                                 root->objectid,
4171                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4172                                         "REF" : "EXTREF",
4173                                 ref_key->objectid, ref_key->offset,
4174                                 key->type == BTRFS_DIR_ITEM_KEY ?
4175                                         "DIR_ITEM" : "DIR_INDEX");
4176                 } else {
4177                         ret = 0;
4178                 }
4179
4180                 goto out;
4181         }
4182
4183         /* Process normal file/dir */
4184         if (ret > 0) {
4185                 ret = DIR_ITEM_MISSING;
4186                 error(
4187                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4188                         root->objectid,
4189                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4190                         ref_key->objectid, ref_key->offset,
4191                         key->type == BTRFS_DIR_ITEM_KEY ?
4192                                 "DIR_ITEM" : "DIR_INDEX",
4193                         key->objectid, key->offset, namelen, name,
4194                         imode_to_type(mode));
4195                 goto out;
4196         }
4197
4198         /* Check whether inode_id/filetype/name match */
4199         node = path.nodes[0];
4200         slot = path.slots[0];
4201         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4202         total = btrfs_item_size_nr(node, slot);
4203         while (cur < total) {
4204                 ret = DIR_ITEM_MISMATCH;
4205                 name_len = btrfs_dir_name_len(node, di);
4206                 data_len = btrfs_dir_data_len(node, di);
4207
4208                 btrfs_dir_item_key_to_cpu(node, di, &location);
4209                 if (location.objectid != ref_key->objectid ||
4210                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4211                     location.offset != 0)
4212                         goto next;
4213
4214                 filetype = btrfs_dir_type(node, di);
4215                 if (imode_to_type(mode) != filetype)
4216                         goto next;
4217
4218                 if (name_len <= BTRFS_NAME_LEN) {
4219                         len = name_len;
4220                 } else {
4221                         len = BTRFS_NAME_LEN;
4222                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4223                         root->objectid,
4224                         key->type == BTRFS_DIR_ITEM_KEY ?
4225                         "DIR_ITEM" : "DIR_INDEX",
4226                         key->objectid, key->offset, name_len);
4227                 }
4228                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4229                 if (len != namelen || strncmp(namebuf, name, len))
4230                         goto next;
4231
4232                 ret = 0;
4233                 goto out;
4234 next:
4235                 len = sizeof(*di) + name_len + data_len;
4236                 di = (struct btrfs_dir_item *)((char *)di + len);
4237                 cur += len;
4238         }
4239         if (ret == DIR_ITEM_MISMATCH)
4240                 error(
4241                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4242                         root->objectid,
4243                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4244                         ref_key->objectid, ref_key->offset,
4245                         key->type == BTRFS_DIR_ITEM_KEY ?
4246                                 "DIR_ITEM" : "DIR_INDEX",
4247                         key->objectid, key->offset, namelen, name,
4248                         imode_to_type(mode));
4249 out:
4250         btrfs_release_path(&path);
4251         return ret;
4252 }
4253
4254 /*
4255  * Traverse the given INODE_REF and call find_dir_item() to find related
4256  * DIR_ITEM/DIR_INDEX.
4257  *
4258  * @root:       the root of the fs/file tree
4259  * @ref_key:    the key of the INODE_REF
4260  * @refs:       the count of INODE_REF
4261  * @mode:       the st_mode of INODE_ITEM
4262  *
4263  * Return 0 if no error occurred.
4264  */
4265 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4266                            struct extent_buffer *node, int slot, u64 *refs,
4267                            int mode)
4268 {
4269         struct btrfs_key key;
4270         struct btrfs_inode_ref *ref;
4271         char namebuf[BTRFS_NAME_LEN] = {0};
4272         u32 total;
4273         u32 cur = 0;
4274         u32 len;
4275         u32 name_len;
4276         u64 index;
4277         int ret, err = 0;
4278
4279         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4280         total = btrfs_item_size_nr(node, slot);
4281
4282 next:
4283         /* Update inode ref count */
4284         (*refs)++;
4285
4286         index = btrfs_inode_ref_index(node, ref);
4287         name_len = btrfs_inode_ref_name_len(node, ref);
4288         if (name_len <= BTRFS_NAME_LEN) {
4289                 len = name_len;
4290         } else {
4291                 len = BTRFS_NAME_LEN;
4292                 warning("root %llu INODE_REF[%llu %llu] name too long",
4293                         root->objectid, ref_key->objectid, ref_key->offset);
4294         }
4295
4296         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4297
4298         /* Check root dir ref name */
4299         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4300                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4301                       root->objectid, ref_key->objectid, ref_key->offset,
4302                       namebuf);
4303                 err |= ROOT_DIR_ERROR;
4304         }
4305
4306         /* Find related DIR_INDEX */
4307         key.objectid = ref_key->offset;
4308         key.type = BTRFS_DIR_INDEX_KEY;
4309         key.offset = index;
4310         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4311         err |= ret;
4312
4313         /* Find related dir_item */
4314         key.objectid = ref_key->offset;
4315         key.type = BTRFS_DIR_ITEM_KEY;
4316         key.offset = btrfs_name_hash(namebuf, len);
4317         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4318         err |= ret;
4319
4320         len = sizeof(*ref) + name_len;
4321         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4322         cur += len;
4323         if (cur < total)
4324                 goto next;
4325
4326         return err;
4327 }
4328
4329 /*
4330  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4331  * DIR_ITEM/DIR_INDEX.
4332  *
4333  * @root:       the root of the fs/file tree
4334  * @ref_key:    the key of the INODE_EXTREF
4335  * @refs:       the count of INODE_EXTREF
4336  * @mode:       the st_mode of INODE_ITEM
4337  *
4338  * Return 0 if no error occurred.
4339  */
4340 static int check_inode_extref(struct btrfs_root *root,
4341                               struct btrfs_key *ref_key,
4342                               struct extent_buffer *node, int slot, u64 *refs,
4343                               int mode)
4344 {
4345         struct btrfs_key key;
4346         struct btrfs_inode_extref *extref;
4347         char namebuf[BTRFS_NAME_LEN] = {0};
4348         u32 total;
4349         u32 cur = 0;
4350         u32 len;
4351         u32 name_len;
4352         u64 index;
4353         u64 parent;
4354         int ret;
4355         int err = 0;
4356
4357         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4358         total = btrfs_item_size_nr(node, slot);
4359
4360 next:
4361         /* update inode ref count */
4362         (*refs)++;
4363         name_len = btrfs_inode_extref_name_len(node, extref);
4364         index = btrfs_inode_extref_index(node, extref);
4365         parent = btrfs_inode_extref_parent(node, extref);
4366         if (name_len <= BTRFS_NAME_LEN) {
4367                 len = name_len;
4368         } else {
4369                 len = BTRFS_NAME_LEN;
4370                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4371                         root->objectid, ref_key->objectid, ref_key->offset);
4372         }
4373         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4374
4375         /* Check root dir ref name */
4376         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4377                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4378                       root->objectid, ref_key->objectid, ref_key->offset,
4379                       namebuf);
4380                 err |= ROOT_DIR_ERROR;
4381         }
4382
4383         /* find related dir_index */
4384         key.objectid = parent;
4385         key.type = BTRFS_DIR_INDEX_KEY;
4386         key.offset = index;
4387         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4388         err |= ret;
4389
4390         /* find related dir_item */
4391         key.objectid = parent;
4392         key.type = BTRFS_DIR_ITEM_KEY;
4393         key.offset = btrfs_name_hash(namebuf, len);
4394         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4395         err |= ret;
4396
4397         len = sizeof(*extref) + name_len;
4398         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4399         cur += len;
4400
4401         if (cur < total)
4402                 goto next;
4403
4404         return err;
4405 }
4406
4407 /*
4408  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4409  * DIR_ITEM/DIR_INDEX match.
4410  *
4411  * @root:       the root of the fs/file tree
4412  * @key:        the key of the INODE_REF/INODE_EXTREF
4413  * @name:       the name in the INODE_REF/INODE_EXTREF
4414  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4415  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4416  * to (u64)-1
4417  * @ext_ref:    the EXTENDED_IREF feature
4418  *
4419  * Return 0 if no error occurred.
4420  * Return >0 for error bitmap
4421  */
4422 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4423                           char *name, int namelen, u64 index,
4424                           unsigned int ext_ref)
4425 {
4426         struct btrfs_path path;
4427         struct btrfs_inode_ref *ref;
4428         struct btrfs_inode_extref *extref;
4429         struct extent_buffer *node;
4430         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4431         u32 total;
4432         u32 cur = 0;
4433         u32 len;
4434         u32 ref_namelen;
4435         u64 ref_index;
4436         u64 parent;
4437         u64 dir_id;
4438         int slot;
4439         int ret;
4440
4441         btrfs_init_path(&path);
4442         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4443         if (ret) {
4444                 ret = INODE_REF_MISSING;
4445                 goto extref;
4446         }
4447
4448         node = path.nodes[0];
4449         slot = path.slots[0];
4450
4451         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4452         total = btrfs_item_size_nr(node, slot);
4453
4454         /* Iterate all entry of INODE_REF */
4455         while (cur < total) {
4456                 ret = INODE_REF_MISSING;
4457
4458                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4459                 ref_index = btrfs_inode_ref_index(node, ref);
4460                 if (index != (u64)-1 && index != ref_index)
4461                         goto next_ref;
4462
4463                 if (ref_namelen <= BTRFS_NAME_LEN) {
4464                         len = ref_namelen;
4465                 } else {
4466                         len = BTRFS_NAME_LEN;
4467                         warning("root %llu INODE %s[%llu %llu] name too long",
4468                                 root->objectid,
4469                                 key->type == BTRFS_INODE_REF_KEY ?
4470                                         "REF" : "EXTREF",
4471                                 key->objectid, key->offset);
4472                 }
4473                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4474                                    len);
4475
4476                 if (len != namelen || strncmp(ref_namebuf, name, len))
4477                         goto next_ref;
4478
4479                 ret = 0;
4480                 goto out;
4481 next_ref:
4482                 len = sizeof(*ref) + ref_namelen;
4483                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4484                 cur += len;
4485         }
4486
4487 extref:
4488         /* Skip if not support EXTENDED_IREF feature */
4489         if (!ext_ref)
4490                 goto out;
4491
4492         btrfs_release_path(&path);
4493         btrfs_init_path(&path);
4494
4495         dir_id = key->offset;
4496         key->type = BTRFS_INODE_EXTREF_KEY;
4497         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4498
4499         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4500         if (ret) {
4501                 ret = INODE_REF_MISSING;
4502                 goto out;
4503         }
4504
4505         node = path.nodes[0];
4506         slot = path.slots[0];
4507
4508         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4509         cur = 0;
4510         total = btrfs_item_size_nr(node, slot);
4511
4512         /* Iterate all entry of INODE_EXTREF */
4513         while (cur < total) {
4514                 ret = INODE_REF_MISSING;
4515
4516                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4517                 ref_index = btrfs_inode_extref_index(node, extref);
4518                 parent = btrfs_inode_extref_parent(node, extref);
4519                 if (index != (u64)-1 && index != ref_index)
4520                         goto next_extref;
4521
4522                 if (parent != dir_id)
4523                         goto next_extref;
4524
4525                 if (ref_namelen <= BTRFS_NAME_LEN) {
4526                         len = ref_namelen;
4527                 } else {
4528                         len = BTRFS_NAME_LEN;
4529                         warning("root %llu INODE %s[%llu %llu] name too long",
4530                                 root->objectid,
4531                                 key->type == BTRFS_INODE_REF_KEY ?
4532                                         "REF" : "EXTREF",
4533                                 key->objectid, key->offset);
4534                 }
4535                 read_extent_buffer(node, ref_namebuf,
4536                                    (unsigned long)(extref + 1), len);
4537
4538                 if (len != namelen || strncmp(ref_namebuf, name, len))
4539                         goto next_extref;
4540
4541                 ret = 0;
4542                 goto out;
4543
4544 next_extref:
4545                 len = sizeof(*extref) + ref_namelen;
4546                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4547                 cur += len;
4548
4549         }
4550 out:
4551         btrfs_release_path(&path);
4552         return ret;
4553 }
4554
4555 /*
4556  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4557  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4558  *
4559  * @root:       the root of the fs/file tree
4560  * @key:        the key of the INODE_REF/INODE_EXTREF
4561  * @size:       the st_size of the INODE_ITEM
4562  * @ext_ref:    the EXTENDED_IREF feature
4563  *
4564  * Return 0 if no error occurred.
4565  */
4566 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4567                           struct extent_buffer *node, int slot, u64 *size,
4568                           unsigned int ext_ref)
4569 {
4570         struct btrfs_dir_item *di;
4571         struct btrfs_inode_item *ii;
4572         struct btrfs_path path;
4573         struct btrfs_key location;
4574         char namebuf[BTRFS_NAME_LEN] = {0};
4575         u32 total;
4576         u32 cur = 0;
4577         u32 len;
4578         u32 name_len;
4579         u32 data_len;
4580         u8 filetype;
4581         u32 mode;
4582         u64 index;
4583         int ret;
4584         int err = 0;
4585
4586         /*
4587          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4588          * ignore index check.
4589          */
4590         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4591
4592         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4593         total = btrfs_item_size_nr(node, slot);
4594
4595         while (cur < total) {
4596                 data_len = btrfs_dir_data_len(node, di);
4597                 if (data_len)
4598                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4599                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4600                               "DIR_ITEM" : "DIR_INDEX",
4601                               key->objectid, key->offset, data_len);
4602
4603                 name_len = btrfs_dir_name_len(node, di);
4604                 if (name_len <= BTRFS_NAME_LEN) {
4605                         len = name_len;
4606                 } else {
4607                         len = BTRFS_NAME_LEN;
4608                         warning("root %llu %s[%llu %llu] name too long",
4609                                 root->objectid,
4610                                 key->type == BTRFS_DIR_ITEM_KEY ?
4611                                 "DIR_ITEM" : "DIR_INDEX",
4612                                 key->objectid, key->offset);
4613                 }
4614                 (*size) += name_len;
4615
4616                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4617                 filetype = btrfs_dir_type(node, di);
4618
4619                 btrfs_init_path(&path);
4620                 btrfs_dir_item_key_to_cpu(node, di, &location);
4621
4622                 /* Ignore related ROOT_ITEM check */
4623                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4624                         goto next;
4625
4626                 /* Check relative INODE_ITEM(existence/filetype) */
4627                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4628                 if (ret) {
4629                         err |= INODE_ITEM_MISSING;
4630                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4631                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4632                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4633                               key->offset, location.objectid, name_len,
4634                               namebuf, filetype);
4635                         goto next;
4636                 }
4637
4638                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4639                                     struct btrfs_inode_item);
4640                 mode = btrfs_inode_mode(path.nodes[0], ii);
4641
4642                 if (imode_to_type(mode) != filetype) {
4643                         err |= INODE_ITEM_MISMATCH;
4644                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4645                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4646                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4647                               key->offset, name_len, namebuf, filetype);
4648                 }
4649
4650                 /* Check relative INODE_REF/INODE_EXTREF */
4651                 location.type = BTRFS_INODE_REF_KEY;
4652                 location.offset = key->objectid;
4653                 ret = find_inode_ref(root, &location, namebuf, len,
4654                                        index, ext_ref);
4655                 err |= ret;
4656                 if (ret & INODE_REF_MISSING)
4657                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4658                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4659                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4660                               key->offset, name_len, namebuf, filetype);
4661
4662 next:
4663                 btrfs_release_path(&path);
4664                 len = sizeof(*di) + name_len + data_len;
4665                 di = (struct btrfs_dir_item *)((char *)di + len);
4666                 cur += len;
4667
4668                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4669                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4670                               root->objectid, key->objectid, key->offset);
4671                         break;
4672                 }
4673         }
4674
4675         return err;
4676 }
4677
4678 /*
4679  * Check file extent datasum/hole, update the size of the file extents,
4680  * check and update the last offset of the file extent.
4681  *
4682  * @root:       the root of fs/file tree.
4683  * @fkey:       the key of the file extent.
4684  * @nodatasum:  INODE_NODATASUM feature.
4685  * @size:       the sum of all EXTENT_DATA items size for this inode.
4686  * @end:        the offset of the last extent.
4687  *
4688  * Return 0 if no error occurred.
4689  */
4690 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4691                              struct extent_buffer *node, int slot,
4692                              unsigned int nodatasum, u64 *size, u64 *end)
4693 {
4694         struct btrfs_file_extent_item *fi;
4695         u64 disk_bytenr;
4696         u64 disk_num_bytes;
4697         u64 extent_num_bytes;
4698         u64 found;
4699         unsigned int extent_type;
4700         unsigned int is_hole;
4701         int ret;
4702         int err = 0;
4703
4704         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4705
4706         extent_type = btrfs_file_extent_type(node, fi);
4707         /* Skip if file extent is inline */
4708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4709                 struct btrfs_item *e = btrfs_item_nr(slot);
4710                 u32 item_inline_len;
4711
4712                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4713                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4714                 if (extent_num_bytes == 0 ||
4715                     extent_num_bytes != item_inline_len)
4716                         err |= FILE_EXTENT_ERROR;
4717                 *size += extent_num_bytes;
4718                 return err;
4719         }
4720
4721         /* Check extent type */
4722         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4723                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4724                 err |= FILE_EXTENT_ERROR;
4725                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4726                       root->objectid, fkey->objectid, fkey->offset);
4727                 return err;
4728         }
4729
4730         /* Check REG_EXTENT/PREALLOC_EXTENT */
4731         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4732         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4733         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4734         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4735
4736         /* Check EXTENT_DATA datasum */
4737         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4738         if (found > 0 && nodatasum) {
4739                 err |= ODD_CSUM_ITEM;
4740                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4741                       root->objectid, fkey->objectid, fkey->offset);
4742         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4743                    !is_hole &&
4744                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4745                 err |= CSUM_ITEM_MISSING;
4746                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4747                       root->objectid, fkey->objectid, fkey->offset);
4748         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4749                 err |= ODD_CSUM_ITEM;
4750                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         }
4753
4754         /* Check EXTENT_DATA hole */
4755         if (no_holes && is_hole) {
4756                 err |= FILE_EXTENT_ERROR;
4757                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4758                       root->objectid, fkey->objectid, fkey->offset);
4759         } else if (!no_holes && *end != fkey->offset) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         }
4764
4765         *end += extent_num_bytes;
4766         if (!is_hole)
4767                 *size += extent_num_bytes;
4768
4769         return err;
4770 }
4771
4772 /*
4773  * Check INODE_ITEM and related ITEMs (the same inode number)
4774  * 1. check link count
4775  * 2. check inode ref/extref
4776  * 3. check dir item/index
4777  *
4778  * @ext_ref:    the EXTENDED_IREF feature
4779  *
4780  * Return 0 if no error occurred.
4781  * Return >0 for error or hit the traversal is done(by error bitmap)
4782  */
4783 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4784                             unsigned int ext_ref)
4785 {
4786         struct extent_buffer *node;
4787         struct btrfs_inode_item *ii;
4788         struct btrfs_key key;
4789         u64 inode_id;
4790         u32 mode;
4791         u64 nlink;
4792         u64 nbytes;
4793         u64 isize;
4794         u64 size = 0;
4795         u64 refs = 0;
4796         u64 extent_end = 0;
4797         u64 extent_size = 0;
4798         unsigned int dir;
4799         unsigned int nodatasum;
4800         int slot;
4801         int ret;
4802         int err = 0;
4803
4804         node = path->nodes[0];
4805         slot = path->slots[0];
4806
4807         btrfs_item_key_to_cpu(node, &key, slot);
4808         inode_id = key.objectid;
4809
4810         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4811                 ret = btrfs_next_item(root, path);
4812                 if (ret > 0)
4813                         err |= LAST_ITEM;
4814                 return err;
4815         }
4816
4817         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4818         isize = btrfs_inode_size(node, ii);
4819         nbytes = btrfs_inode_nbytes(node, ii);
4820         mode = btrfs_inode_mode(node, ii);
4821         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4822         nlink = btrfs_inode_nlink(node, ii);
4823         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4824
4825         while (1) {
4826                 ret = btrfs_next_item(root, path);
4827                 if (ret < 0) {
4828                         /* out will fill 'err' rusing current statistics */
4829                         goto out;
4830                 } else if (ret > 0) {
4831                         err |= LAST_ITEM;
4832                         goto out;
4833                 }
4834
4835                 node = path->nodes[0];
4836                 slot = path->slots[0];
4837                 btrfs_item_key_to_cpu(node, &key, slot);
4838                 if (key.objectid != inode_id)
4839                         goto out;
4840
4841                 switch (key.type) {
4842                 case BTRFS_INODE_REF_KEY:
4843                         ret = check_inode_ref(root, &key, node, slot, &refs,
4844                                               mode);
4845                         err |= ret;
4846                         break;
4847                 case BTRFS_INODE_EXTREF_KEY:
4848                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4849                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4850                                         root->objectid, key.objectid,
4851                                         key.offset);
4852                         ret = check_inode_extref(root, &key, node, slot, &refs,
4853                                                  mode);
4854                         err |= ret;
4855                         break;
4856                 case BTRFS_DIR_ITEM_KEY:
4857                 case BTRFS_DIR_INDEX_KEY:
4858                         if (!dir) {
4859                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4860                                         root->objectid, inode_id,
4861                                         imode_to_type(mode), key.objectid,
4862                                         key.offset);
4863                         }
4864                         ret = check_dir_item(root, &key, node, slot, &size,
4865                                              ext_ref);
4866                         err |= ret;
4867                         break;
4868                 case BTRFS_EXTENT_DATA_KEY:
4869                         if (dir) {
4870                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4871                                         root->objectid, inode_id, key.objectid,
4872                                         key.offset);
4873                         }
4874                         ret = check_file_extent(root, &key, node, slot,
4875                                                 nodatasum, &extent_size,
4876                                                 &extent_end);
4877                         err |= ret;
4878                         break;
4879                 case BTRFS_XATTR_ITEM_KEY:
4880                         break;
4881                 default:
4882                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4883                               key.objectid, key.type, key.offset);
4884                 }
4885         }
4886
4887 out:
4888         /* verify INODE_ITEM nlink/isize/nbytes */
4889         if (dir) {
4890                 if (nlink != 1) {
4891                         err |= LINK_COUNT_ERROR;
4892                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4893                               root->objectid, inode_id, nlink);
4894                 }
4895
4896                 /*
4897                  * Just a warning, as dir inode nbytes is just an
4898                  * instructive value.
4899                  */
4900                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4901                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4902                                 root->objectid, inode_id, root->nodesize);
4903                 }
4904
4905                 if (isize != size) {
4906                         err |= ISIZE_ERROR;
4907                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4908                               root->objectid, inode_id, isize, size);
4909                 }
4910         } else {
4911                 if (nlink != refs) {
4912                         err |= LINK_COUNT_ERROR;
4913                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4914                               root->objectid, inode_id, nlink, refs);
4915                 } else if (!nlink) {
4916                         err |= ORPHAN_ITEM;
4917                 }
4918
4919                 if (!nbytes && !no_holes && extent_end < isize) {
4920                         err |= NBYTES_ERROR;
4921                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4922                               root->objectid, inode_id, isize);
4923                 }
4924
4925                 if (nbytes != extent_size) {
4926                         err |= NBYTES_ERROR;
4927                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4928                               root->objectid, inode_id, nbytes, extent_size);
4929                 }
4930         }
4931
4932         return err;
4933 }
4934
4935 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4936 {
4937         struct btrfs_path path;
4938         struct btrfs_key key;
4939         int err = 0;
4940         int ret;
4941
4942         btrfs_init_path(&path);
4943         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4944         key.type = BTRFS_INODE_ITEM_KEY;
4945         key.offset = 0;
4946
4947         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4948         if (ret < 0)
4949                 goto out;
4950         if (ret > 0) {
4951                 ret = 0;
4952                 err |= INODE_ITEM_MISSING;
4953         }
4954
4955         err |= check_inode_item(root, &path, ext_ref);
4956         err &= ~LAST_ITEM;
4957         if (err && !ret)
4958                 ret = -EIO;
4959 out:
4960         btrfs_release_path(&path);
4961         return ret;
4962 }
4963
4964 /*
4965  * Iterate all item on the tree and call check_inode_item() to check.
4966  *
4967  * @root:       the root of the tree to be checked.
4968  * @ext_ref:    the EXTENDED_IREF feature
4969  *
4970  * Return 0 if no error found.
4971  * Return <0 for error.
4972  */
4973 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4974 {
4975         struct btrfs_path path;
4976         struct node_refs nrefs;
4977         struct btrfs_root_item *root_item = &root->root_item;
4978         int ret, wret;
4979         int level;
4980
4981         /*
4982          * We need to manually check the first inode item(256)
4983          * As the following traversal function will only start from
4984          * the first inode item in the leaf, if inode item(256) is missing
4985          * we will just skip it forever.
4986          */
4987         ret = check_fs_first_inode(root, ext_ref);
4988         if (ret < 0)
4989                 return ret;
4990
4991         memset(&nrefs, 0, sizeof(nrefs));
4992         level = btrfs_header_level(root->node);
4993         btrfs_init_path(&path);
4994
4995         if (btrfs_root_refs(root_item) > 0 ||
4996             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4997                 path.nodes[level] = root->node;
4998                 path.slots[level] = 0;
4999                 extent_buffer_get(root->node);
5000         } else {
5001                 struct btrfs_key key;
5002
5003                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5004                 level = root_item->drop_level;
5005                 path.lowest_level = level;
5006                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5007                 if (ret < 0)
5008                         goto out;
5009                 ret = 0;
5010         }
5011
5012         while (1) {
5013                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5014                 if (wret < 0)
5015                         ret = wret;
5016                 if (wret != 0)
5017                         break;
5018
5019                 wret = walk_up_tree_v2(root, &path, &level);
5020                 if (wret < 0)
5021                         ret = wret;
5022                 if (wret != 0)
5023                         break;
5024         }
5025
5026 out:
5027         btrfs_release_path(&path);
5028         return ret;
5029 }
5030
5031 /*
5032  * Find the relative ref for root_ref and root_backref.
5033  *
5034  * @root:       the root of the root tree.
5035  * @ref_key:    the key of the root ref.
5036  *
5037  * Return 0 if no error occurred.
5038  */
5039 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5040                           struct extent_buffer *node, int slot)
5041 {
5042         struct btrfs_path path;
5043         struct btrfs_key key;
5044         struct btrfs_root_ref *ref;
5045         struct btrfs_root_ref *backref;
5046         char ref_name[BTRFS_NAME_LEN] = {0};
5047         char backref_name[BTRFS_NAME_LEN] = {0};
5048         u64 ref_dirid;
5049         u64 ref_seq;
5050         u32 ref_namelen;
5051         u64 backref_dirid;
5052         u64 backref_seq;
5053         u32 backref_namelen;
5054         u32 len;
5055         int ret;
5056         int err = 0;
5057
5058         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5059         ref_dirid = btrfs_root_ref_dirid(node, ref);
5060         ref_seq = btrfs_root_ref_sequence(node, ref);
5061         ref_namelen = btrfs_root_ref_name_len(node, ref);
5062
5063         if (ref_namelen <= BTRFS_NAME_LEN) {
5064                 len = ref_namelen;
5065         } else {
5066                 len = BTRFS_NAME_LEN;
5067                 warning("%s[%llu %llu] ref_name too long",
5068                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5069                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5070                         ref_key->offset);
5071         }
5072         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5073
5074         /* Find relative root_ref */
5075         key.objectid = ref_key->offset;
5076         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5077         key.offset = ref_key->objectid;
5078
5079         btrfs_init_path(&path);
5080         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5081         if (ret) {
5082                 err |= ROOT_REF_MISSING;
5083                 error("%s[%llu %llu] couldn't find relative ref",
5084                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5085                       "ROOT_REF" : "ROOT_BACKREF",
5086                       ref_key->objectid, ref_key->offset);
5087                 goto out;
5088         }
5089
5090         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5091                                  struct btrfs_root_ref);
5092         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5093         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5094         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5095
5096         if (backref_namelen <= BTRFS_NAME_LEN) {
5097                 len = backref_namelen;
5098         } else {
5099                 len = BTRFS_NAME_LEN;
5100                 warning("%s[%llu %llu] ref_name too long",
5101                         key.type == BTRFS_ROOT_REF_KEY ?
5102                         "ROOT_REF" : "ROOT_BACKREF",
5103                         key.objectid, key.offset);
5104         }
5105         read_extent_buffer(path.nodes[0], backref_name,
5106                            (unsigned long)(backref + 1), len);
5107
5108         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5109             ref_namelen != backref_namelen ||
5110             strncmp(ref_name, backref_name, len)) {
5111                 err |= ROOT_REF_MISMATCH;
5112                 error("%s[%llu %llu] mismatch relative ref",
5113                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5114                       "ROOT_REF" : "ROOT_BACKREF",
5115                       ref_key->objectid, ref_key->offset);
5116         }
5117 out:
5118         btrfs_release_path(&path);
5119         return err;
5120 }
5121
5122 /*
5123  * Check all fs/file tree in low_memory mode.
5124  *
5125  * 1. for fs tree root item, call check_fs_root_v2()
5126  * 2. for fs tree root ref/backref, call check_root_ref()
5127  *
5128  * Return 0 if no error occurred.
5129  */
5130 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5131 {
5132         struct btrfs_root *tree_root = fs_info->tree_root;
5133         struct btrfs_root *cur_root = NULL;
5134         struct btrfs_path path;
5135         struct btrfs_key key;
5136         struct extent_buffer *node;
5137         unsigned int ext_ref;
5138         int slot;
5139         int ret;
5140         int err = 0;
5141
5142         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5143
5144         btrfs_init_path(&path);
5145         key.objectid = BTRFS_FS_TREE_OBJECTID;
5146         key.offset = 0;
5147         key.type = BTRFS_ROOT_ITEM_KEY;
5148
5149         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5150         if (ret < 0) {
5151                 err = ret;
5152                 goto out;
5153         } else if (ret > 0) {
5154                 err = -ENOENT;
5155                 goto out;
5156         }
5157
5158         while (1) {
5159                 node = path.nodes[0];
5160                 slot = path.slots[0];
5161                 btrfs_item_key_to_cpu(node, &key, slot);
5162                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5163                         goto out;
5164                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5165                     fs_root_objectid(key.objectid)) {
5166                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5167                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5168                                                                        &key);
5169                         } else {
5170                                 key.offset = (u64)-1;
5171                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5172                         }
5173
5174                         if (IS_ERR(cur_root)) {
5175                                 error("Fail to read fs/subvol tree: %lld",
5176                                       key.objectid);
5177                                 err = -EIO;
5178                                 goto next;
5179                         }
5180
5181                         ret = check_fs_root_v2(cur_root, ext_ref);
5182                         err |= ret;
5183
5184                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5185                                 btrfs_free_fs_root(cur_root);
5186                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5187                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5188                         ret = check_root_ref(tree_root, &key, node, slot);
5189                         err |= ret;
5190                 }
5191 next:
5192                 ret = btrfs_next_item(tree_root, &path);
5193                 if (ret > 0)
5194                         goto out;
5195                 if (ret < 0) {
5196                         err = ret;
5197                         goto out;
5198                 }
5199         }
5200
5201 out:
5202         btrfs_release_path(&path);
5203         return err;
5204 }
5205
5206 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5207 {
5208         struct list_head *cur = rec->backrefs.next;
5209         struct extent_backref *back;
5210         struct tree_backref *tback;
5211         struct data_backref *dback;
5212         u64 found = 0;
5213         int err = 0;
5214
5215         while(cur != &rec->backrefs) {
5216                 back = to_extent_backref(cur);
5217                 cur = cur->next;
5218                 if (!back->found_extent_tree) {
5219                         err = 1;
5220                         if (!print_errs)
5221                                 goto out;
5222                         if (back->is_data) {
5223                                 dback = to_data_backref(back);
5224                                 fprintf(stderr, "Backref %llu %s %llu"
5225                                         " owner %llu offset %llu num_refs %lu"
5226                                         " not found in extent tree\n",
5227                                         (unsigned long long)rec->start,
5228                                         back->full_backref ?
5229                                         "parent" : "root",
5230                                         back->full_backref ?
5231                                         (unsigned long long)dback->parent:
5232                                         (unsigned long long)dback->root,
5233                                         (unsigned long long)dback->owner,
5234                                         (unsigned long long)dback->offset,
5235                                         (unsigned long)dback->num_refs);
5236                         } else {
5237                                 tback = to_tree_backref(back);
5238                                 fprintf(stderr, "Backref %llu parent %llu"
5239                                         " root %llu not found in extent tree\n",
5240                                         (unsigned long long)rec->start,
5241                                         (unsigned long long)tback->parent,
5242                                         (unsigned long long)tback->root);
5243                         }
5244                 }
5245                 if (!back->is_data && !back->found_ref) {
5246                         err = 1;
5247                         if (!print_errs)
5248                                 goto out;
5249                         tback = to_tree_backref(back);
5250                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5251                                 (unsigned long long)rec->start,
5252                                 back->full_backref ? "parent" : "root",
5253                                 back->full_backref ?
5254                                 (unsigned long long)tback->parent :
5255                                 (unsigned long long)tback->root, back);
5256                 }
5257                 if (back->is_data) {
5258                         dback = to_data_backref(back);
5259                         if (dback->found_ref != dback->num_refs) {
5260                                 err = 1;
5261                                 if (!print_errs)
5262                                         goto out;
5263                                 fprintf(stderr, "Incorrect local backref count"
5264                                         " on %llu %s %llu owner %llu"
5265                                         " offset %llu found %u wanted %u back %p\n",
5266                                         (unsigned long long)rec->start,
5267                                         back->full_backref ?
5268                                         "parent" : "root",
5269                                         back->full_backref ?
5270                                         (unsigned long long)dback->parent:
5271                                         (unsigned long long)dback->root,
5272                                         (unsigned long long)dback->owner,
5273                                         (unsigned long long)dback->offset,
5274                                         dback->found_ref, dback->num_refs, back);
5275                         }
5276                         if (dback->disk_bytenr != rec->start) {
5277                                 err = 1;
5278                                 if (!print_errs)
5279                                         goto out;
5280                                 fprintf(stderr, "Backref disk bytenr does not"
5281                                         " match extent record, bytenr=%llu, "
5282                                         "ref bytenr=%llu\n",
5283                                         (unsigned long long)rec->start,
5284                                         (unsigned long long)dback->disk_bytenr);
5285                         }
5286
5287                         if (dback->bytes != rec->nr) {
5288                                 err = 1;
5289                                 if (!print_errs)
5290                                         goto out;
5291                                 fprintf(stderr, "Backref bytes do not match "
5292                                         "extent backref, bytenr=%llu, ref "
5293                                         "bytes=%llu, backref bytes=%llu\n",
5294                                         (unsigned long long)rec->start,
5295                                         (unsigned long long)rec->nr,
5296                                         (unsigned long long)dback->bytes);
5297                         }
5298                 }
5299                 if (!back->is_data) {
5300                         found += 1;
5301                 } else {
5302                         dback = to_data_backref(back);
5303                         found += dback->found_ref;
5304                 }
5305         }
5306         if (found != rec->refs) {
5307                 err = 1;
5308                 if (!print_errs)
5309                         goto out;
5310                 fprintf(stderr, "Incorrect global backref count "
5311                         "on %llu found %llu wanted %llu\n",
5312                         (unsigned long long)rec->start,
5313                         (unsigned long long)found,
5314                         (unsigned long long)rec->refs);
5315         }
5316 out:
5317         return err;
5318 }
5319
5320 static int free_all_extent_backrefs(struct extent_record *rec)
5321 {
5322         struct extent_backref *back;
5323         struct list_head *cur;
5324         while (!list_empty(&rec->backrefs)) {
5325                 cur = rec->backrefs.next;
5326                 back = to_extent_backref(cur);
5327                 list_del(cur);
5328                 free(back);
5329         }
5330         return 0;
5331 }
5332
5333 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5334                                      struct cache_tree *extent_cache)
5335 {
5336         struct cache_extent *cache;
5337         struct extent_record *rec;
5338
5339         while (1) {
5340                 cache = first_cache_extent(extent_cache);
5341                 if (!cache)
5342                         break;
5343                 rec = container_of(cache, struct extent_record, cache);
5344                 remove_cache_extent(extent_cache, cache);
5345                 free_all_extent_backrefs(rec);
5346                 free(rec);
5347         }
5348 }
5349
5350 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5351                                  struct extent_record *rec)
5352 {
5353         if (rec->content_checked && rec->owner_ref_checked &&
5354             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5355             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5356             !rec->bad_full_backref && !rec->crossing_stripes &&
5357             !rec->wrong_chunk_type) {
5358                 remove_cache_extent(extent_cache, &rec->cache);
5359                 free_all_extent_backrefs(rec);
5360                 list_del_init(&rec->list);
5361                 free(rec);
5362         }
5363         return 0;
5364 }
5365
5366 static int check_owner_ref(struct btrfs_root *root,
5367                             struct extent_record *rec,
5368                             struct extent_buffer *buf)
5369 {
5370         struct extent_backref *node;
5371         struct tree_backref *back;
5372         struct btrfs_root *ref_root;
5373         struct btrfs_key key;
5374         struct btrfs_path path;
5375         struct extent_buffer *parent;
5376         int level;
5377         int found = 0;
5378         int ret;
5379
5380         list_for_each_entry(node, &rec->backrefs, list) {
5381                 if (node->is_data)
5382                         continue;
5383                 if (!node->found_ref)
5384                         continue;
5385                 if (node->full_backref)
5386                         continue;
5387                 back = to_tree_backref(node);
5388                 if (btrfs_header_owner(buf) == back->root)
5389                         return 0;
5390         }
5391         BUG_ON(rec->is_root);
5392
5393         /* try to find the block by search corresponding fs tree */
5394         key.objectid = btrfs_header_owner(buf);
5395         key.type = BTRFS_ROOT_ITEM_KEY;
5396         key.offset = (u64)-1;
5397
5398         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5399         if (IS_ERR(ref_root))
5400                 return 1;
5401
5402         level = btrfs_header_level(buf);
5403         if (level == 0)
5404                 btrfs_item_key_to_cpu(buf, &key, 0);
5405         else
5406                 btrfs_node_key_to_cpu(buf, &key, 0);
5407
5408         btrfs_init_path(&path);
5409         path.lowest_level = level + 1;
5410         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5411         if (ret < 0)
5412                 return 0;
5413
5414         parent = path.nodes[level + 1];
5415         if (parent && buf->start == btrfs_node_blockptr(parent,
5416                                                         path.slots[level + 1]))
5417                 found = 1;
5418
5419         btrfs_release_path(&path);
5420         return found ? 0 : 1;
5421 }
5422
5423 static int is_extent_tree_record(struct extent_record *rec)
5424 {
5425         struct list_head *cur = rec->backrefs.next;
5426         struct extent_backref *node;
5427         struct tree_backref *back;
5428         int is_extent = 0;
5429
5430         while(cur != &rec->backrefs) {
5431                 node = to_extent_backref(cur);
5432                 cur = cur->next;
5433                 if (node->is_data)
5434                         return 0;
5435                 back = to_tree_backref(node);
5436                 if (node->full_backref)
5437                         return 0;
5438                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5439                         is_extent = 1;
5440         }
5441         return is_extent;
5442 }
5443
5444
5445 static int record_bad_block_io(struct btrfs_fs_info *info,
5446                                struct cache_tree *extent_cache,
5447                                u64 start, u64 len)
5448 {
5449         struct extent_record *rec;
5450         struct cache_extent *cache;
5451         struct btrfs_key key;
5452
5453         cache = lookup_cache_extent(extent_cache, start, len);
5454         if (!cache)
5455                 return 0;
5456
5457         rec = container_of(cache, struct extent_record, cache);
5458         if (!is_extent_tree_record(rec))
5459                 return 0;
5460
5461         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5462         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5463 }
5464
5465 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5466                        struct extent_buffer *buf, int slot)
5467 {
5468         if (btrfs_header_level(buf)) {
5469                 struct btrfs_key_ptr ptr1, ptr2;
5470
5471                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5472                                    sizeof(struct btrfs_key_ptr));
5473                 read_extent_buffer(buf, &ptr2,
5474                                    btrfs_node_key_ptr_offset(slot + 1),
5475                                    sizeof(struct btrfs_key_ptr));
5476                 write_extent_buffer(buf, &ptr1,
5477                                     btrfs_node_key_ptr_offset(slot + 1),
5478                                     sizeof(struct btrfs_key_ptr));
5479                 write_extent_buffer(buf, &ptr2,
5480                                     btrfs_node_key_ptr_offset(slot),
5481                                     sizeof(struct btrfs_key_ptr));
5482                 if (slot == 0) {
5483                         struct btrfs_disk_key key;
5484                         btrfs_node_key(buf, &key, 0);
5485                         btrfs_fixup_low_keys(root, path, &key,
5486                                              btrfs_header_level(buf) + 1);
5487                 }
5488         } else {
5489                 struct btrfs_item *item1, *item2;
5490                 struct btrfs_key k1, k2;
5491                 char *item1_data, *item2_data;
5492                 u32 item1_offset, item2_offset, item1_size, item2_size;
5493
5494                 item1 = btrfs_item_nr(slot);
5495                 item2 = btrfs_item_nr(slot + 1);
5496                 btrfs_item_key_to_cpu(buf, &k1, slot);
5497                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5498                 item1_offset = btrfs_item_offset(buf, item1);
5499                 item2_offset = btrfs_item_offset(buf, item2);
5500                 item1_size = btrfs_item_size(buf, item1);
5501                 item2_size = btrfs_item_size(buf, item2);
5502
5503                 item1_data = malloc(item1_size);
5504                 if (!item1_data)
5505                         return -ENOMEM;
5506                 item2_data = malloc(item2_size);
5507                 if (!item2_data) {
5508                         free(item1_data);
5509                         return -ENOMEM;
5510                 }
5511
5512                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5513                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5514
5515                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5516                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5517                 free(item1_data);
5518                 free(item2_data);
5519
5520                 btrfs_set_item_offset(buf, item1, item2_offset);
5521                 btrfs_set_item_offset(buf, item2, item1_offset);
5522                 btrfs_set_item_size(buf, item1, item2_size);
5523                 btrfs_set_item_size(buf, item2, item1_size);
5524
5525                 path->slots[0] = slot;
5526                 btrfs_set_item_key_unsafe(root, path, &k2);
5527                 path->slots[0] = slot + 1;
5528                 btrfs_set_item_key_unsafe(root, path, &k1);
5529         }
5530         return 0;
5531 }
5532
5533 static int fix_key_order(struct btrfs_trans_handle *trans,
5534                          struct btrfs_root *root,
5535                          struct btrfs_path *path)
5536 {
5537         struct extent_buffer *buf;
5538         struct btrfs_key k1, k2;
5539         int i;
5540         int level = path->lowest_level;
5541         int ret = -EIO;
5542
5543         buf = path->nodes[level];
5544         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5545                 if (level) {
5546                         btrfs_node_key_to_cpu(buf, &k1, i);
5547                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5548                 } else {
5549                         btrfs_item_key_to_cpu(buf, &k1, i);
5550                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5551                 }
5552                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5553                         continue;
5554                 ret = swap_values(root, path, buf, i);
5555                 if (ret)
5556                         break;
5557                 btrfs_mark_buffer_dirty(buf);
5558                 i = 0;
5559         }
5560         return ret;
5561 }
5562
5563 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5564                              struct btrfs_root *root,
5565                              struct btrfs_path *path,
5566                              struct extent_buffer *buf, int slot)
5567 {
5568         struct btrfs_key key;
5569         int nritems = btrfs_header_nritems(buf);
5570
5571         btrfs_item_key_to_cpu(buf, &key, slot);
5572
5573         /* These are all the keys we can deal with missing. */
5574         if (key.type != BTRFS_DIR_INDEX_KEY &&
5575             key.type != BTRFS_EXTENT_ITEM_KEY &&
5576             key.type != BTRFS_METADATA_ITEM_KEY &&
5577             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5578             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5579                 return -1;
5580
5581         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5582                (unsigned long long)key.objectid, key.type,
5583                (unsigned long long)key.offset, slot, buf->start);
5584         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5585                               btrfs_item_nr_offset(slot + 1),
5586                               sizeof(struct btrfs_item) *
5587                               (nritems - slot - 1));
5588         btrfs_set_header_nritems(buf, nritems - 1);
5589         if (slot == 0) {
5590                 struct btrfs_disk_key disk_key;
5591
5592                 btrfs_item_key(buf, &disk_key, 0);
5593                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5594         }
5595         btrfs_mark_buffer_dirty(buf);
5596         return 0;
5597 }
5598
5599 static int fix_item_offset(struct btrfs_trans_handle *trans,
5600                            struct btrfs_root *root,
5601                            struct btrfs_path *path)
5602 {
5603         struct extent_buffer *buf;
5604         int i;
5605         int ret = 0;
5606
5607         /* We should only get this for leaves */
5608         BUG_ON(path->lowest_level);
5609         buf = path->nodes[0];
5610 again:
5611         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5612                 unsigned int shift = 0, offset;
5613
5614                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5615                     BTRFS_LEAF_DATA_SIZE(root)) {
5616                         if (btrfs_item_end_nr(buf, i) >
5617                             BTRFS_LEAF_DATA_SIZE(root)) {
5618                                 ret = delete_bogus_item(trans, root, path,
5619                                                         buf, i);
5620                                 if (!ret)
5621                                         goto again;
5622                                 fprintf(stderr, "item is off the end of the "
5623                                         "leaf, can't fix\n");
5624                                 ret = -EIO;
5625                                 break;
5626                         }
5627                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5628                                 btrfs_item_end_nr(buf, i);
5629                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5630                            btrfs_item_offset_nr(buf, i - 1)) {
5631                         if (btrfs_item_end_nr(buf, i) >
5632                             btrfs_item_offset_nr(buf, i - 1)) {
5633                                 ret = delete_bogus_item(trans, root, path,
5634                                                         buf, i);
5635                                 if (!ret)
5636                                         goto again;
5637                                 fprintf(stderr, "items overlap, can't fix\n");
5638                                 ret = -EIO;
5639                                 break;
5640                         }
5641                         shift = btrfs_item_offset_nr(buf, i - 1) -
5642                                 btrfs_item_end_nr(buf, i);
5643                 }
5644                 if (!shift)
5645                         continue;
5646
5647                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5648                        i, shift, (unsigned long long)buf->start);
5649                 offset = btrfs_item_offset_nr(buf, i);
5650                 memmove_extent_buffer(buf,
5651                                       btrfs_leaf_data(buf) + offset + shift,
5652                                       btrfs_leaf_data(buf) + offset,
5653                                       btrfs_item_size_nr(buf, i));
5654                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5655                                       offset + shift);
5656                 btrfs_mark_buffer_dirty(buf);
5657         }
5658
5659         /*
5660          * We may have moved things, in which case we want to exit so we don't
5661          * write those changes out.  Once we have proper abort functionality in
5662          * progs this can be changed to something nicer.
5663          */
5664         BUG_ON(ret);
5665         return ret;
5666 }
5667
5668 /*
5669  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5670  * then just return -EIO.
5671  */
5672 static int try_to_fix_bad_block(struct btrfs_root *root,
5673                                 struct extent_buffer *buf,
5674                                 enum btrfs_tree_block_status status)
5675 {
5676         struct btrfs_trans_handle *trans;
5677         struct ulist *roots;
5678         struct ulist_node *node;
5679         struct btrfs_root *search_root;
5680         struct btrfs_path path;
5681         struct ulist_iterator iter;
5682         struct btrfs_key root_key, key;
5683         int ret;
5684
5685         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5686             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5687                 return -EIO;
5688
5689         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5690         if (ret)
5691                 return -EIO;
5692
5693         btrfs_init_path(&path);
5694         ULIST_ITER_INIT(&iter);
5695         while ((node = ulist_next(roots, &iter))) {
5696                 root_key.objectid = node->val;
5697                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5698                 root_key.offset = (u64)-1;
5699
5700                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5701                 if (IS_ERR(root)) {
5702                         ret = -EIO;
5703                         break;
5704                 }
5705
5706
5707                 trans = btrfs_start_transaction(search_root, 0);
5708                 if (IS_ERR(trans)) {
5709                         ret = PTR_ERR(trans);
5710                         break;
5711                 }
5712
5713                 path.lowest_level = btrfs_header_level(buf);
5714                 path.skip_check_block = 1;
5715                 if (path.lowest_level)
5716                         btrfs_node_key_to_cpu(buf, &key, 0);
5717                 else
5718                         btrfs_item_key_to_cpu(buf, &key, 0);
5719                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5720                 if (ret) {
5721                         ret = -EIO;
5722                         btrfs_commit_transaction(trans, search_root);
5723                         break;
5724                 }
5725                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5726                         ret = fix_key_order(trans, search_root, &path);
5727                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5728                         ret = fix_item_offset(trans, search_root, &path);
5729                 if (ret) {
5730                         btrfs_commit_transaction(trans, search_root);
5731                         break;
5732                 }
5733                 btrfs_release_path(&path);
5734                 btrfs_commit_transaction(trans, search_root);
5735         }
5736         ulist_free(roots);
5737         btrfs_release_path(&path);
5738         return ret;
5739 }
5740
5741 static int check_block(struct btrfs_root *root,
5742                        struct cache_tree *extent_cache,
5743                        struct extent_buffer *buf, u64 flags)
5744 {
5745         struct extent_record *rec;
5746         struct cache_extent *cache;
5747         struct btrfs_key key;
5748         enum btrfs_tree_block_status status;
5749         int ret = 0;
5750         int level;
5751
5752         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5753         if (!cache)
5754                 return 1;
5755         rec = container_of(cache, struct extent_record, cache);
5756         rec->generation = btrfs_header_generation(buf);
5757
5758         level = btrfs_header_level(buf);
5759         if (btrfs_header_nritems(buf) > 0) {
5760
5761                 if (level == 0)
5762                         btrfs_item_key_to_cpu(buf, &key, 0);
5763                 else
5764                         btrfs_node_key_to_cpu(buf, &key, 0);
5765
5766                 rec->info_objectid = key.objectid;
5767         }
5768         rec->info_level = level;
5769
5770         if (btrfs_is_leaf(buf))
5771                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5772         else
5773                 status = btrfs_check_node(root, &rec->parent_key, buf);
5774
5775         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5776                 if (repair)
5777                         status = try_to_fix_bad_block(root, buf, status);
5778                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5779                         ret = -EIO;
5780                         fprintf(stderr, "bad block %llu\n",
5781                                 (unsigned long long)buf->start);
5782                 } else {
5783                         /*
5784                          * Signal to callers we need to start the scan over
5785                          * again since we'll have cowed blocks.
5786                          */
5787                         ret = -EAGAIN;
5788                 }
5789         } else {
5790                 rec->content_checked = 1;
5791                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5792                         rec->owner_ref_checked = 1;
5793                 else {
5794                         ret = check_owner_ref(root, rec, buf);
5795                         if (!ret)
5796                                 rec->owner_ref_checked = 1;
5797                 }
5798         }
5799         if (!ret)
5800                 maybe_free_extent_rec(extent_cache, rec);
5801         return ret;
5802 }
5803
5804 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5805                                                 u64 parent, u64 root)
5806 {
5807         struct list_head *cur = rec->backrefs.next;
5808         struct extent_backref *node;
5809         struct tree_backref *back;
5810
5811         while(cur != &rec->backrefs) {
5812                 node = to_extent_backref(cur);
5813                 cur = cur->next;
5814                 if (node->is_data)
5815                         continue;
5816                 back = to_tree_backref(node);
5817                 if (parent > 0) {
5818                         if (!node->full_backref)
5819                                 continue;
5820                         if (parent == back->parent)
5821                                 return back;
5822                 } else {
5823                         if (node->full_backref)
5824                                 continue;
5825                         if (back->root == root)
5826                                 return back;
5827                 }
5828         }
5829         return NULL;
5830 }
5831
5832 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5833                                                 u64 parent, u64 root)
5834 {
5835         struct tree_backref *ref = malloc(sizeof(*ref));
5836
5837         if (!ref)
5838                 return NULL;
5839         memset(&ref->node, 0, sizeof(ref->node));
5840         if (parent > 0) {
5841                 ref->parent = parent;
5842                 ref->node.full_backref = 1;
5843         } else {
5844                 ref->root = root;
5845                 ref->node.full_backref = 0;
5846         }
5847         list_add_tail(&ref->node.list, &rec->backrefs);
5848
5849         return ref;
5850 }
5851
5852 static struct data_backref *find_data_backref(struct extent_record *rec,
5853                                                 u64 parent, u64 root,
5854                                                 u64 owner, u64 offset,
5855                                                 int found_ref,
5856                                                 u64 disk_bytenr, u64 bytes)
5857 {
5858         struct list_head *cur = rec->backrefs.next;
5859         struct extent_backref *node;
5860         struct data_backref *back;
5861
5862         while(cur != &rec->backrefs) {
5863                 node = to_extent_backref(cur);
5864                 cur = cur->next;
5865                 if (!node->is_data)
5866                         continue;
5867                 back = to_data_backref(node);
5868                 if (parent > 0) {
5869                         if (!node->full_backref)
5870                                 continue;
5871                         if (parent == back->parent)
5872                                 return back;
5873                 } else {
5874                         if (node->full_backref)
5875                                 continue;
5876                         if (back->root == root && back->owner == owner &&
5877                             back->offset == offset) {
5878                                 if (found_ref && node->found_ref &&
5879                                     (back->bytes != bytes ||
5880                                     back->disk_bytenr != disk_bytenr))
5881                                         continue;
5882                                 return back;
5883                         }
5884                 }
5885         }
5886         return NULL;
5887 }
5888
5889 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5890                                                 u64 parent, u64 root,
5891                                                 u64 owner, u64 offset,
5892                                                 u64 max_size)
5893 {
5894         struct data_backref *ref = malloc(sizeof(*ref));
5895
5896         if (!ref)
5897                 return NULL;
5898         memset(&ref->node, 0, sizeof(ref->node));
5899         ref->node.is_data = 1;
5900
5901         if (parent > 0) {
5902                 ref->parent = parent;
5903                 ref->owner = 0;
5904                 ref->offset = 0;
5905                 ref->node.full_backref = 1;
5906         } else {
5907                 ref->root = root;
5908                 ref->owner = owner;
5909                 ref->offset = offset;
5910                 ref->node.full_backref = 0;
5911         }
5912         ref->bytes = max_size;
5913         ref->found_ref = 0;
5914         ref->num_refs = 0;
5915         list_add_tail(&ref->node.list, &rec->backrefs);
5916         if (max_size > rec->max_size)
5917                 rec->max_size = max_size;
5918         return ref;
5919 }
5920
5921 /* Check if the type of extent matches with its chunk */
5922 static void check_extent_type(struct extent_record *rec)
5923 {
5924         struct btrfs_block_group_cache *bg_cache;
5925
5926         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5927         if (!bg_cache)
5928                 return;
5929
5930         /* data extent, check chunk directly*/
5931         if (!rec->metadata) {
5932                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5933                         rec->wrong_chunk_type = 1;
5934                 return;
5935         }
5936
5937         /* metadata extent, check the obvious case first */
5938         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5939                                  BTRFS_BLOCK_GROUP_METADATA))) {
5940                 rec->wrong_chunk_type = 1;
5941                 return;
5942         }
5943
5944         /*
5945          * Check SYSTEM extent, as it's also marked as metadata, we can only
5946          * make sure it's a SYSTEM extent by its backref
5947          */
5948         if (!list_empty(&rec->backrefs)) {
5949                 struct extent_backref *node;
5950                 struct tree_backref *tback;
5951                 u64 bg_type;
5952
5953                 node = to_extent_backref(rec->backrefs.next);
5954                 if (node->is_data) {
5955                         /* tree block shouldn't have data backref */
5956                         rec->wrong_chunk_type = 1;
5957                         return;
5958                 }
5959                 tback = container_of(node, struct tree_backref, node);
5960
5961                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5962                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5963                 else
5964                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5965                 if (!(bg_cache->flags & bg_type))
5966                         rec->wrong_chunk_type = 1;
5967         }
5968 }
5969
5970 /*
5971  * Allocate a new extent record, fill default values from @tmpl and insert int
5972  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5973  * the cache, otherwise it fails.
5974  */
5975 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5976                 struct extent_record *tmpl)
5977 {
5978         struct extent_record *rec;
5979         int ret = 0;
5980
5981         rec = malloc(sizeof(*rec));
5982         if (!rec)
5983                 return -ENOMEM;
5984         rec->start = tmpl->start;
5985         rec->max_size = tmpl->max_size;
5986         rec->nr = max(tmpl->nr, tmpl->max_size);
5987         rec->found_rec = tmpl->found_rec;
5988         rec->content_checked = tmpl->content_checked;
5989         rec->owner_ref_checked = tmpl->owner_ref_checked;
5990         rec->num_duplicates = 0;
5991         rec->metadata = tmpl->metadata;
5992         rec->flag_block_full_backref = FLAG_UNSET;
5993         rec->bad_full_backref = 0;
5994         rec->crossing_stripes = 0;
5995         rec->wrong_chunk_type = 0;
5996         rec->is_root = tmpl->is_root;
5997         rec->refs = tmpl->refs;
5998         rec->extent_item_refs = tmpl->extent_item_refs;
5999         rec->parent_generation = tmpl->parent_generation;
6000         INIT_LIST_HEAD(&rec->backrefs);
6001         INIT_LIST_HEAD(&rec->dups);
6002         INIT_LIST_HEAD(&rec->list);
6003         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6004         rec->cache.start = tmpl->start;
6005         rec->cache.size = tmpl->nr;
6006         ret = insert_cache_extent(extent_cache, &rec->cache);
6007         if (ret) {
6008                 free(rec);
6009                 return ret;
6010         }
6011         bytes_used += rec->nr;
6012
6013         if (tmpl->metadata)
6014                 rec->crossing_stripes = check_crossing_stripes(global_info,
6015                                 rec->start, global_info->tree_root->nodesize);
6016         check_extent_type(rec);
6017         return ret;
6018 }
6019
6020 /*
6021  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6022  * some are hints:
6023  * - refs              - if found, increase refs
6024  * - is_root           - if found, set
6025  * - content_checked   - if found, set
6026  * - owner_ref_checked - if found, set
6027  *
6028  * If not found, create a new one, initialize and insert.
6029  */
6030 static int add_extent_rec(struct cache_tree *extent_cache,
6031                 struct extent_record *tmpl)
6032 {
6033         struct extent_record *rec;
6034         struct cache_extent *cache;
6035         int ret = 0;
6036         int dup = 0;
6037
6038         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6039         if (cache) {
6040                 rec = container_of(cache, struct extent_record, cache);
6041                 if (tmpl->refs)
6042                         rec->refs++;
6043                 if (rec->nr == 1)
6044                         rec->nr = max(tmpl->nr, tmpl->max_size);
6045
6046                 /*
6047                  * We need to make sure to reset nr to whatever the extent
6048                  * record says was the real size, this way we can compare it to
6049                  * the backrefs.
6050                  */
6051                 if (tmpl->found_rec) {
6052                         if (tmpl->start != rec->start || rec->found_rec) {
6053                                 struct extent_record *tmp;
6054
6055                                 dup = 1;
6056                                 if (list_empty(&rec->list))
6057                                         list_add_tail(&rec->list,
6058                                                       &duplicate_extents);
6059
6060                                 /*
6061                                  * We have to do this song and dance in case we
6062                                  * find an extent record that falls inside of
6063                                  * our current extent record but does not have
6064                                  * the same objectid.
6065                                  */
6066                                 tmp = malloc(sizeof(*tmp));
6067                                 if (!tmp)
6068                                         return -ENOMEM;
6069                                 tmp->start = tmpl->start;
6070                                 tmp->max_size = tmpl->max_size;
6071                                 tmp->nr = tmpl->nr;
6072                                 tmp->found_rec = 1;
6073                                 tmp->metadata = tmpl->metadata;
6074                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6075                                 INIT_LIST_HEAD(&tmp->list);
6076                                 list_add_tail(&tmp->list, &rec->dups);
6077                                 rec->num_duplicates++;
6078                         } else {
6079                                 rec->nr = tmpl->nr;
6080                                 rec->found_rec = 1;
6081                         }
6082                 }
6083
6084                 if (tmpl->extent_item_refs && !dup) {
6085                         if (rec->extent_item_refs) {
6086                                 fprintf(stderr, "block %llu rec "
6087                                         "extent_item_refs %llu, passed %llu\n",
6088                                         (unsigned long long)tmpl->start,
6089                                         (unsigned long long)
6090                                                         rec->extent_item_refs,
6091                                         (unsigned long long)tmpl->extent_item_refs);
6092                         }
6093                         rec->extent_item_refs = tmpl->extent_item_refs;
6094                 }
6095                 if (tmpl->is_root)
6096                         rec->is_root = 1;
6097                 if (tmpl->content_checked)
6098                         rec->content_checked = 1;
6099                 if (tmpl->owner_ref_checked)
6100                         rec->owner_ref_checked = 1;
6101                 memcpy(&rec->parent_key, &tmpl->parent_key,
6102                                 sizeof(tmpl->parent_key));
6103                 if (tmpl->parent_generation)
6104                         rec->parent_generation = tmpl->parent_generation;
6105                 if (rec->max_size < tmpl->max_size)
6106                         rec->max_size = tmpl->max_size;
6107
6108                 /*
6109                  * A metadata extent can't cross stripe_len boundary, otherwise
6110                  * kernel scrub won't be able to handle it.
6111                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6112                  * it.
6113                  */
6114                 if (tmpl->metadata)
6115                         rec->crossing_stripes = check_crossing_stripes(
6116                                         global_info, rec->start,
6117                                         global_info->tree_root->nodesize);
6118                 check_extent_type(rec);
6119                 maybe_free_extent_rec(extent_cache, rec);
6120                 return ret;
6121         }
6122
6123         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6124
6125         return ret;
6126 }
6127
6128 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6129                             u64 parent, u64 root, int found_ref)
6130 {
6131         struct extent_record *rec;
6132         struct tree_backref *back;
6133         struct cache_extent *cache;
6134         int ret;
6135
6136         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6137         if (!cache) {
6138                 struct extent_record tmpl;
6139
6140                 memset(&tmpl, 0, sizeof(tmpl));
6141                 tmpl.start = bytenr;
6142                 tmpl.nr = 1;
6143                 tmpl.metadata = 1;
6144
6145                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6146                 if (ret)
6147                         return ret;
6148
6149                 /* really a bug in cache_extent implement now */
6150                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6151                 if (!cache)
6152                         return -ENOENT;
6153         }
6154
6155         rec = container_of(cache, struct extent_record, cache);
6156         if (rec->start != bytenr) {
6157                 /*
6158                  * Several cause, from unaligned bytenr to over lapping extents
6159                  */
6160                 return -EEXIST;
6161         }
6162
6163         back = find_tree_backref(rec, parent, root);
6164         if (!back) {
6165                 back = alloc_tree_backref(rec, parent, root);
6166                 if (!back)
6167                         return -ENOMEM;
6168         }
6169
6170         if (found_ref) {
6171                 if (back->node.found_ref) {
6172                         fprintf(stderr, "Extent back ref already exists "
6173                                 "for %llu parent %llu root %llu \n",
6174                                 (unsigned long long)bytenr,
6175                                 (unsigned long long)parent,
6176                                 (unsigned long long)root);
6177                 }
6178                 back->node.found_ref = 1;
6179         } else {
6180                 if (back->node.found_extent_tree) {
6181                         fprintf(stderr, "Extent back ref already exists "
6182                                 "for %llu parent %llu root %llu \n",
6183                                 (unsigned long long)bytenr,
6184                                 (unsigned long long)parent,
6185                                 (unsigned long long)root);
6186                 }
6187                 back->node.found_extent_tree = 1;
6188         }
6189         check_extent_type(rec);
6190         maybe_free_extent_rec(extent_cache, rec);
6191         return 0;
6192 }
6193
6194 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6195                             u64 parent, u64 root, u64 owner, u64 offset,
6196                             u32 num_refs, int found_ref, u64 max_size)
6197 {
6198         struct extent_record *rec;
6199         struct data_backref *back;
6200         struct cache_extent *cache;
6201         int ret;
6202
6203         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6204         if (!cache) {
6205                 struct extent_record tmpl;
6206
6207                 memset(&tmpl, 0, sizeof(tmpl));
6208                 tmpl.start = bytenr;
6209                 tmpl.nr = 1;
6210                 tmpl.max_size = max_size;
6211
6212                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6213                 if (ret)
6214                         return ret;
6215
6216                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6217                 if (!cache)
6218                         abort();
6219         }
6220
6221         rec = container_of(cache, struct extent_record, cache);
6222         if (rec->max_size < max_size)
6223                 rec->max_size = max_size;
6224
6225         /*
6226          * If found_ref is set then max_size is the real size and must match the
6227          * existing refs.  So if we have already found a ref then we need to
6228          * make sure that this ref matches the existing one, otherwise we need
6229          * to add a new backref so we can notice that the backrefs don't match
6230          * and we need to figure out who is telling the truth.  This is to
6231          * account for that awful fsync bug I introduced where we'd end up with
6232          * a btrfs_file_extent_item that would have its length include multiple
6233          * prealloc extents or point inside of a prealloc extent.
6234          */
6235         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6236                                  bytenr, max_size);
6237         if (!back) {
6238                 back = alloc_data_backref(rec, parent, root, owner, offset,
6239                                           max_size);
6240                 BUG_ON(!back);
6241         }
6242
6243         if (found_ref) {
6244                 BUG_ON(num_refs != 1);
6245                 if (back->node.found_ref)
6246                         BUG_ON(back->bytes != max_size);
6247                 back->node.found_ref = 1;
6248                 back->found_ref += 1;
6249                 back->bytes = max_size;
6250                 back->disk_bytenr = bytenr;
6251                 rec->refs += 1;
6252                 rec->content_checked = 1;
6253                 rec->owner_ref_checked = 1;
6254         } else {
6255                 if (back->node.found_extent_tree) {
6256                         fprintf(stderr, "Extent back ref already exists "
6257                                 "for %llu parent %llu root %llu "
6258                                 "owner %llu offset %llu num_refs %lu\n",
6259                                 (unsigned long long)bytenr,
6260                                 (unsigned long long)parent,
6261                                 (unsigned long long)root,
6262                                 (unsigned long long)owner,
6263                                 (unsigned long long)offset,
6264                                 (unsigned long)num_refs);
6265                 }
6266                 back->num_refs = num_refs;
6267                 back->node.found_extent_tree = 1;
6268         }
6269         maybe_free_extent_rec(extent_cache, rec);
6270         return 0;
6271 }
6272
6273 static int add_pending(struct cache_tree *pending,
6274                        struct cache_tree *seen, u64 bytenr, u32 size)
6275 {
6276         int ret;
6277         ret = add_cache_extent(seen, bytenr, size);
6278         if (ret)
6279                 return ret;
6280         add_cache_extent(pending, bytenr, size);
6281         return 0;
6282 }
6283
6284 static int pick_next_pending(struct cache_tree *pending,
6285                         struct cache_tree *reada,
6286                         struct cache_tree *nodes,
6287                         u64 last, struct block_info *bits, int bits_nr,
6288                         int *reada_bits)
6289 {
6290         unsigned long node_start = last;
6291         struct cache_extent *cache;
6292         int ret;
6293
6294         cache = search_cache_extent(reada, 0);
6295         if (cache) {
6296                 bits[0].start = cache->start;
6297                 bits[0].size = cache->size;
6298                 *reada_bits = 1;
6299                 return 1;
6300         }
6301         *reada_bits = 0;
6302         if (node_start > 32768)
6303                 node_start -= 32768;
6304
6305         cache = search_cache_extent(nodes, node_start);
6306         if (!cache)
6307                 cache = search_cache_extent(nodes, 0);
6308
6309         if (!cache) {
6310                  cache = search_cache_extent(pending, 0);
6311                  if (!cache)
6312                          return 0;
6313                  ret = 0;
6314                  do {
6315                          bits[ret].start = cache->start;
6316                          bits[ret].size = cache->size;
6317                          cache = next_cache_extent(cache);
6318                          ret++;
6319                  } while (cache && ret < bits_nr);
6320                  return ret;
6321         }
6322
6323         ret = 0;
6324         do {
6325                 bits[ret].start = cache->start;
6326                 bits[ret].size = cache->size;
6327                 cache = next_cache_extent(cache);
6328                 ret++;
6329         } while (cache && ret < bits_nr);
6330
6331         if (bits_nr - ret > 8) {
6332                 u64 lookup = bits[0].start + bits[0].size;
6333                 struct cache_extent *next;
6334                 next = search_cache_extent(pending, lookup);
6335                 while(next) {
6336                         if (next->start - lookup > 32768)
6337                                 break;
6338                         bits[ret].start = next->start;
6339                         bits[ret].size = next->size;
6340                         lookup = next->start + next->size;
6341                         ret++;
6342                         if (ret == bits_nr)
6343                                 break;
6344                         next = next_cache_extent(next);
6345                         if (!next)
6346                                 break;
6347                 }
6348         }
6349         return ret;
6350 }
6351
6352 static void free_chunk_record(struct cache_extent *cache)
6353 {
6354         struct chunk_record *rec;
6355
6356         rec = container_of(cache, struct chunk_record, cache);
6357         list_del_init(&rec->list);
6358         list_del_init(&rec->dextents);
6359         free(rec);
6360 }
6361
6362 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6363 {
6364         cache_tree_free_extents(chunk_cache, free_chunk_record);
6365 }
6366
6367 static void free_device_record(struct rb_node *node)
6368 {
6369         struct device_record *rec;
6370
6371         rec = container_of(node, struct device_record, node);
6372         free(rec);
6373 }
6374
6375 FREE_RB_BASED_TREE(device_cache, free_device_record);
6376
6377 int insert_block_group_record(struct block_group_tree *tree,
6378                               struct block_group_record *bg_rec)
6379 {
6380         int ret;
6381
6382         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6383         if (ret)
6384                 return ret;
6385
6386         list_add_tail(&bg_rec->list, &tree->block_groups);
6387         return 0;
6388 }
6389
6390 static void free_block_group_record(struct cache_extent *cache)
6391 {
6392         struct block_group_record *rec;
6393
6394         rec = container_of(cache, struct block_group_record, cache);
6395         list_del_init(&rec->list);
6396         free(rec);
6397 }
6398
6399 void free_block_group_tree(struct block_group_tree *tree)
6400 {
6401         cache_tree_free_extents(&tree->tree, free_block_group_record);
6402 }
6403
6404 int insert_device_extent_record(struct device_extent_tree *tree,
6405                                 struct device_extent_record *de_rec)
6406 {
6407         int ret;
6408
6409         /*
6410          * Device extent is a bit different from the other extents, because
6411          * the extents which belong to the different devices may have the
6412          * same start and size, so we need use the special extent cache
6413          * search/insert functions.
6414          */
6415         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6416         if (ret)
6417                 return ret;
6418
6419         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6420         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6421         return 0;
6422 }
6423
6424 static void free_device_extent_record(struct cache_extent *cache)
6425 {
6426         struct device_extent_record *rec;
6427
6428         rec = container_of(cache, struct device_extent_record, cache);
6429         if (!list_empty(&rec->chunk_list))
6430                 list_del_init(&rec->chunk_list);
6431         if (!list_empty(&rec->device_list))
6432                 list_del_init(&rec->device_list);
6433         free(rec);
6434 }
6435
6436 void free_device_extent_tree(struct device_extent_tree *tree)
6437 {
6438         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6439 }
6440
6441 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6442 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6443                                  struct extent_buffer *leaf, int slot)
6444 {
6445         struct btrfs_extent_ref_v0 *ref0;
6446         struct btrfs_key key;
6447         int ret;
6448
6449         btrfs_item_key_to_cpu(leaf, &key, slot);
6450         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6451         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6452                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6453                                 0, 0);
6454         } else {
6455                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6456                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6457         }
6458         return ret;
6459 }
6460 #endif
6461
6462 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6463                                             struct btrfs_key *key,
6464                                             int slot)
6465 {
6466         struct btrfs_chunk *ptr;
6467         struct chunk_record *rec;
6468         int num_stripes, i;
6469
6470         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6471         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6472
6473         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6474         if (!rec) {
6475                 fprintf(stderr, "memory allocation failed\n");
6476                 exit(-1);
6477         }
6478
6479         INIT_LIST_HEAD(&rec->list);
6480         INIT_LIST_HEAD(&rec->dextents);
6481         rec->bg_rec = NULL;
6482
6483         rec->cache.start = key->offset;
6484         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6485
6486         rec->generation = btrfs_header_generation(leaf);
6487
6488         rec->objectid = key->objectid;
6489         rec->type = key->type;
6490         rec->offset = key->offset;
6491
6492         rec->length = rec->cache.size;
6493         rec->owner = btrfs_chunk_owner(leaf, ptr);
6494         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6495         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6496         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6497         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6498         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6499         rec->num_stripes = num_stripes;
6500         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6501
6502         for (i = 0; i < rec->num_stripes; ++i) {
6503                 rec->stripes[i].devid =
6504                         btrfs_stripe_devid_nr(leaf, ptr, i);
6505                 rec->stripes[i].offset =
6506                         btrfs_stripe_offset_nr(leaf, ptr, i);
6507                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6508                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6509                                 BTRFS_UUID_SIZE);
6510         }
6511
6512         return rec;
6513 }
6514
6515 static int process_chunk_item(struct cache_tree *chunk_cache,
6516                               struct btrfs_key *key, struct extent_buffer *eb,
6517                               int slot)
6518 {
6519         struct chunk_record *rec;
6520         struct btrfs_chunk *chunk;
6521         int ret = 0;
6522
6523         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6524         /*
6525          * Do extra check for this chunk item,
6526          *
6527          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6528          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6529          * and owner<->key_type check.
6530          */
6531         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6532                                       key->offset);
6533         if (ret < 0) {
6534                 error("chunk(%llu, %llu) is not valid, ignore it",
6535                       key->offset, btrfs_chunk_length(eb, chunk));
6536                 return 0;
6537         }
6538         rec = btrfs_new_chunk_record(eb, key, slot);
6539         ret = insert_cache_extent(chunk_cache, &rec->cache);
6540         if (ret) {
6541                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6542                         rec->offset, rec->length);
6543                 free(rec);
6544         }
6545
6546         return ret;
6547 }
6548
6549 static int process_device_item(struct rb_root *dev_cache,
6550                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6551 {
6552         struct btrfs_dev_item *ptr;
6553         struct device_record *rec;
6554         int ret = 0;
6555
6556         ptr = btrfs_item_ptr(eb,
6557                 slot, struct btrfs_dev_item);
6558
6559         rec = malloc(sizeof(*rec));
6560         if (!rec) {
6561                 fprintf(stderr, "memory allocation failed\n");
6562                 return -ENOMEM;
6563         }
6564
6565         rec->devid = key->offset;
6566         rec->generation = btrfs_header_generation(eb);
6567
6568         rec->objectid = key->objectid;
6569         rec->type = key->type;
6570         rec->offset = key->offset;
6571
6572         rec->devid = btrfs_device_id(eb, ptr);
6573         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6574         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6575
6576         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6577         if (ret) {
6578                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6579                 free(rec);
6580         }
6581
6582         return ret;
6583 }
6584
6585 struct block_group_record *
6586 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6587                              int slot)
6588 {
6589         struct btrfs_block_group_item *ptr;
6590         struct block_group_record *rec;
6591
6592         rec = calloc(1, sizeof(*rec));
6593         if (!rec) {
6594                 fprintf(stderr, "memory allocation failed\n");
6595                 exit(-1);
6596         }
6597
6598         rec->cache.start = key->objectid;
6599         rec->cache.size = key->offset;
6600
6601         rec->generation = btrfs_header_generation(leaf);
6602
6603         rec->objectid = key->objectid;
6604         rec->type = key->type;
6605         rec->offset = key->offset;
6606
6607         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6608         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6609
6610         INIT_LIST_HEAD(&rec->list);
6611
6612         return rec;
6613 }
6614
6615 static int process_block_group_item(struct block_group_tree *block_group_cache,
6616                                     struct btrfs_key *key,
6617                                     struct extent_buffer *eb, int slot)
6618 {
6619         struct block_group_record *rec;
6620         int ret = 0;
6621
6622         rec = btrfs_new_block_group_record(eb, key, slot);
6623         ret = insert_block_group_record(block_group_cache, rec);
6624         if (ret) {
6625                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6626                         rec->objectid, rec->offset);
6627                 free(rec);
6628         }
6629
6630         return ret;
6631 }
6632
6633 struct device_extent_record *
6634 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6635                                struct btrfs_key *key, int slot)
6636 {
6637         struct device_extent_record *rec;
6638         struct btrfs_dev_extent *ptr;
6639
6640         rec = calloc(1, sizeof(*rec));
6641         if (!rec) {
6642                 fprintf(stderr, "memory allocation failed\n");
6643                 exit(-1);
6644         }
6645
6646         rec->cache.objectid = key->objectid;
6647         rec->cache.start = key->offset;
6648
6649         rec->generation = btrfs_header_generation(leaf);
6650
6651         rec->objectid = key->objectid;
6652         rec->type = key->type;
6653         rec->offset = key->offset;
6654
6655         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6656         rec->chunk_objecteid =
6657                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6658         rec->chunk_offset =
6659                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6660         rec->length = btrfs_dev_extent_length(leaf, ptr);
6661         rec->cache.size = rec->length;
6662
6663         INIT_LIST_HEAD(&rec->chunk_list);
6664         INIT_LIST_HEAD(&rec->device_list);
6665
6666         return rec;
6667 }
6668
6669 static int
6670 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6671                            struct btrfs_key *key, struct extent_buffer *eb,
6672                            int slot)
6673 {
6674         struct device_extent_record *rec;
6675         int ret;
6676
6677         rec = btrfs_new_device_extent_record(eb, key, slot);
6678         ret = insert_device_extent_record(dev_extent_cache, rec);
6679         if (ret) {
6680                 fprintf(stderr,
6681                         "Device extent[%llu, %llu, %llu] existed.\n",
6682                         rec->objectid, rec->offset, rec->length);
6683                 free(rec);
6684         }
6685
6686         return ret;
6687 }
6688
6689 static int process_extent_item(struct btrfs_root *root,
6690                                struct cache_tree *extent_cache,
6691                                struct extent_buffer *eb, int slot)
6692 {
6693         struct btrfs_extent_item *ei;
6694         struct btrfs_extent_inline_ref *iref;
6695         struct btrfs_extent_data_ref *dref;
6696         struct btrfs_shared_data_ref *sref;
6697         struct btrfs_key key;
6698         struct extent_record tmpl;
6699         unsigned long end;
6700         unsigned long ptr;
6701         int ret;
6702         int type;
6703         u32 item_size = btrfs_item_size_nr(eb, slot);
6704         u64 refs = 0;
6705         u64 offset;
6706         u64 num_bytes;
6707         int metadata = 0;
6708
6709         btrfs_item_key_to_cpu(eb, &key, slot);
6710
6711         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6712                 metadata = 1;
6713                 num_bytes = root->nodesize;
6714         } else {
6715                 num_bytes = key.offset;
6716         }
6717
6718         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6719                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6720                       key.objectid, root->sectorsize);
6721                 return -EIO;
6722         }
6723         if (item_size < sizeof(*ei)) {
6724 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6725                 struct btrfs_extent_item_v0 *ei0;
6726                 BUG_ON(item_size != sizeof(*ei0));
6727                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6728                 refs = btrfs_extent_refs_v0(eb, ei0);
6729 #else
6730                 BUG();
6731 #endif
6732                 memset(&tmpl, 0, sizeof(tmpl));
6733                 tmpl.start = key.objectid;
6734                 tmpl.nr = num_bytes;
6735                 tmpl.extent_item_refs = refs;
6736                 tmpl.metadata = metadata;
6737                 tmpl.found_rec = 1;
6738                 tmpl.max_size = num_bytes;
6739
6740                 return add_extent_rec(extent_cache, &tmpl);
6741         }
6742
6743         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6744         refs = btrfs_extent_refs(eb, ei);
6745         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6746                 metadata = 1;
6747         else
6748                 metadata = 0;
6749         if (metadata && num_bytes != root->nodesize) {
6750                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6751                       num_bytes, root->nodesize);
6752                 return -EIO;
6753         }
6754         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6755                 error("ignore invalid data extent, length %llu is not aligned to %u",
6756                       num_bytes, root->sectorsize);
6757                 return -EIO;
6758         }
6759
6760         memset(&tmpl, 0, sizeof(tmpl));
6761         tmpl.start = key.objectid;
6762         tmpl.nr = num_bytes;
6763         tmpl.extent_item_refs = refs;
6764         tmpl.metadata = metadata;
6765         tmpl.found_rec = 1;
6766         tmpl.max_size = num_bytes;
6767         add_extent_rec(extent_cache, &tmpl);
6768
6769         ptr = (unsigned long)(ei + 1);
6770         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6771             key.type == BTRFS_EXTENT_ITEM_KEY)
6772                 ptr += sizeof(struct btrfs_tree_block_info);
6773
6774         end = (unsigned long)ei + item_size;
6775         while (ptr < end) {
6776                 iref = (struct btrfs_extent_inline_ref *)ptr;
6777                 type = btrfs_extent_inline_ref_type(eb, iref);
6778                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6779                 switch (type) {
6780                 case BTRFS_TREE_BLOCK_REF_KEY:
6781                         ret = add_tree_backref(extent_cache, key.objectid,
6782                                         0, offset, 0);
6783                         if (ret < 0)
6784                                 error("add_tree_backref failed: %s",
6785                                       strerror(-ret));
6786                         break;
6787                 case BTRFS_SHARED_BLOCK_REF_KEY:
6788                         ret = add_tree_backref(extent_cache, key.objectid,
6789                                         offset, 0, 0);
6790                         if (ret < 0)
6791                                 error("add_tree_backref failed: %s",
6792                                       strerror(-ret));
6793                         break;
6794                 case BTRFS_EXTENT_DATA_REF_KEY:
6795                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6796                         add_data_backref(extent_cache, key.objectid, 0,
6797                                         btrfs_extent_data_ref_root(eb, dref),
6798                                         btrfs_extent_data_ref_objectid(eb,
6799                                                                        dref),
6800                                         btrfs_extent_data_ref_offset(eb, dref),
6801                                         btrfs_extent_data_ref_count(eb, dref),
6802                                         0, num_bytes);
6803                         break;
6804                 case BTRFS_SHARED_DATA_REF_KEY:
6805                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6806                         add_data_backref(extent_cache, key.objectid, offset,
6807                                         0, 0, 0,
6808                                         btrfs_shared_data_ref_count(eb, sref),
6809                                         0, num_bytes);
6810                         break;
6811                 default:
6812                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6813                                 key.objectid, key.type, num_bytes);
6814                         goto out;
6815                 }
6816                 ptr += btrfs_extent_inline_ref_size(type);
6817         }
6818         WARN_ON(ptr > end);
6819 out:
6820         return 0;
6821 }
6822
6823 static int check_cache_range(struct btrfs_root *root,
6824                              struct btrfs_block_group_cache *cache,
6825                              u64 offset, u64 bytes)
6826 {
6827         struct btrfs_free_space *entry;
6828         u64 *logical;
6829         u64 bytenr;
6830         int stripe_len;
6831         int i, nr, ret;
6832
6833         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6834                 bytenr = btrfs_sb_offset(i);
6835                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6836                                        cache->key.objectid, bytenr, 0,
6837                                        &logical, &nr, &stripe_len);
6838                 if (ret)
6839                         return ret;
6840
6841                 while (nr--) {
6842                         if (logical[nr] + stripe_len <= offset)
6843                                 continue;
6844                         if (offset + bytes <= logical[nr])
6845                                 continue;
6846                         if (logical[nr] == offset) {
6847                                 if (stripe_len >= bytes) {
6848                                         free(logical);
6849                                         return 0;
6850                                 }
6851                                 bytes -= stripe_len;
6852                                 offset += stripe_len;
6853                         } else if (logical[nr] < offset) {
6854                                 if (logical[nr] + stripe_len >=
6855                                     offset + bytes) {
6856                                         free(logical);
6857                                         return 0;
6858                                 }
6859                                 bytes = (offset + bytes) -
6860                                         (logical[nr] + stripe_len);
6861                                 offset = logical[nr] + stripe_len;
6862                         } else {
6863                                 /*
6864                                  * Could be tricky, the super may land in the
6865                                  * middle of the area we're checking.  First
6866                                  * check the easiest case, it's at the end.
6867                                  */
6868                                 if (logical[nr] + stripe_len >=
6869                                     bytes + offset) {
6870                                         bytes = logical[nr] - offset;
6871                                         continue;
6872                                 }
6873
6874                                 /* Check the left side */
6875                                 ret = check_cache_range(root, cache,
6876                                                         offset,
6877                                                         logical[nr] - offset);
6878                                 if (ret) {
6879                                         free(logical);
6880                                         return ret;
6881                                 }
6882
6883                                 /* Now we continue with the right side */
6884                                 bytes = (offset + bytes) -
6885                                         (logical[nr] + stripe_len);
6886                                 offset = logical[nr] + stripe_len;
6887                         }
6888                 }
6889
6890                 free(logical);
6891         }
6892
6893         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6894         if (!entry) {
6895                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6896                         offset, offset+bytes);
6897                 return -EINVAL;
6898         }
6899
6900         if (entry->offset != offset) {
6901                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6902                         entry->offset);
6903                 return -EINVAL;
6904         }
6905
6906         if (entry->bytes != bytes) {
6907                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6908                         bytes, entry->bytes, offset);
6909                 return -EINVAL;
6910         }
6911
6912         unlink_free_space(cache->free_space_ctl, entry);
6913         free(entry);
6914         return 0;
6915 }
6916
6917 static int verify_space_cache(struct btrfs_root *root,
6918                               struct btrfs_block_group_cache *cache)
6919 {
6920         struct btrfs_path path;
6921         struct extent_buffer *leaf;
6922         struct btrfs_key key;
6923         u64 last;
6924         int ret = 0;
6925
6926         root = root->fs_info->extent_root;
6927
6928         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6929
6930         btrfs_init_path(&path);
6931         key.objectid = last;
6932         key.offset = 0;
6933         key.type = BTRFS_EXTENT_ITEM_KEY;
6934         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6935         if (ret < 0)
6936                 goto out;
6937         ret = 0;
6938         while (1) {
6939                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6940                         ret = btrfs_next_leaf(root, &path);
6941                         if (ret < 0)
6942                                 goto out;
6943                         if (ret > 0) {
6944                                 ret = 0;
6945                                 break;
6946                         }
6947                 }
6948                 leaf = path.nodes[0];
6949                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6950                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6951                         break;
6952                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6953                     key.type != BTRFS_METADATA_ITEM_KEY) {
6954                         path.slots[0]++;
6955                         continue;
6956                 }
6957
6958                 if (last == key.objectid) {
6959                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6960                                 last = key.objectid + key.offset;
6961                         else
6962                                 last = key.objectid + root->nodesize;
6963                         path.slots[0]++;
6964                         continue;
6965                 }
6966
6967                 ret = check_cache_range(root, cache, last,
6968                                         key.objectid - last);
6969                 if (ret)
6970                         break;
6971                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6972                         last = key.objectid + key.offset;
6973                 else
6974                         last = key.objectid + root->nodesize;
6975                 path.slots[0]++;
6976         }
6977
6978         if (last < cache->key.objectid + cache->key.offset)
6979                 ret = check_cache_range(root, cache, last,
6980                                         cache->key.objectid +
6981                                         cache->key.offset - last);
6982
6983 out:
6984         btrfs_release_path(&path);
6985
6986         if (!ret &&
6987             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6988                 fprintf(stderr, "There are still entries left in the space "
6989                         "cache\n");
6990                 ret = -EINVAL;
6991         }
6992
6993         return ret;
6994 }
6995
6996 static int check_space_cache(struct btrfs_root *root)
6997 {
6998         struct btrfs_block_group_cache *cache;
6999         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7000         int ret;
7001         int error = 0;
7002
7003         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7004             btrfs_super_generation(root->fs_info->super_copy) !=
7005             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7006                 printf("cache and super generation don't match, space cache "
7007                        "will be invalidated\n");
7008                 return 0;
7009         }
7010
7011         if (ctx.progress_enabled) {
7012                 ctx.tp = TASK_FREE_SPACE;
7013                 task_start(ctx.info);
7014         }
7015
7016         while (1) {
7017                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7018                 if (!cache)
7019                         break;
7020
7021                 start = cache->key.objectid + cache->key.offset;
7022                 if (!cache->free_space_ctl) {
7023                         if (btrfs_init_free_space_ctl(cache,
7024                                                       root->sectorsize)) {
7025                                 ret = -ENOMEM;
7026                                 break;
7027                         }
7028                 } else {
7029                         btrfs_remove_free_space_cache(cache);
7030                 }
7031
7032                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7033                         ret = exclude_super_stripes(root, cache);
7034                         if (ret) {
7035                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7036                                         strerror(-ret));
7037                                 error++;
7038                                 continue;
7039                         }
7040                         ret = load_free_space_tree(root->fs_info, cache);
7041                         free_excluded_extents(root, cache);
7042                         if (ret < 0) {
7043                                 fprintf(stderr, "could not load free space tree: %s\n",
7044                                         strerror(-ret));
7045                                 error++;
7046                                 continue;
7047                         }
7048                         error += ret;
7049                 } else {
7050                         ret = load_free_space_cache(root->fs_info, cache);
7051                         if (!ret)
7052                                 continue;
7053                 }
7054
7055                 ret = verify_space_cache(root, cache);
7056                 if (ret) {
7057                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7058                                 cache->key.objectid);
7059                         error++;
7060                 }
7061         }
7062
7063         task_stop(ctx.info);
7064
7065         return error ? -EINVAL : 0;
7066 }
7067
7068 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7069                         u64 num_bytes, unsigned long leaf_offset,
7070                         struct extent_buffer *eb) {
7071
7072         u64 offset = 0;
7073         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7074         char *data;
7075         unsigned long csum_offset;
7076         u32 csum;
7077         u32 csum_expected;
7078         u64 read_len;
7079         u64 data_checked = 0;
7080         u64 tmp;
7081         int ret = 0;
7082         int mirror;
7083         int num_copies;
7084
7085         if (num_bytes % root->sectorsize)
7086                 return -EINVAL;
7087
7088         data = malloc(num_bytes);
7089         if (!data)
7090                 return -ENOMEM;
7091
7092         while (offset < num_bytes) {
7093                 mirror = 0;
7094 again:
7095                 read_len = num_bytes - offset;
7096                 /* read as much space once a time */
7097                 ret = read_extent_data(root, data + offset,
7098                                 bytenr + offset, &read_len, mirror);
7099                 if (ret)
7100                         goto out;
7101                 data_checked = 0;
7102                 /* verify every 4k data's checksum */
7103                 while (data_checked < read_len) {
7104                         csum = ~(u32)0;
7105                         tmp = offset + data_checked;
7106
7107                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
7108                                                csum, root->sectorsize);
7109                         btrfs_csum_final(csum, (u8 *)&csum);
7110
7111                         csum_offset = leaf_offset +
7112                                  tmp / root->sectorsize * csum_size;
7113                         read_extent_buffer(eb, (char *)&csum_expected,
7114                                            csum_offset, csum_size);
7115                         /* try another mirror */
7116                         if (csum != csum_expected) {
7117                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7118                                                 mirror, bytenr + tmp,
7119                                                 csum, csum_expected);
7120                                 num_copies = btrfs_num_copies(
7121                                                 &root->fs_info->mapping_tree,
7122                                                 bytenr, num_bytes);
7123                                 if (mirror < num_copies - 1) {
7124                                         mirror += 1;
7125                                         goto again;
7126                                 }
7127                         }
7128                         data_checked += root->sectorsize;
7129                 }
7130                 offset += read_len;
7131         }
7132 out:
7133         free(data);
7134         return ret;
7135 }
7136
7137 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7138                                u64 num_bytes)
7139 {
7140         struct btrfs_path path;
7141         struct extent_buffer *leaf;
7142         struct btrfs_key key;
7143         int ret;
7144
7145         btrfs_init_path(&path);
7146         key.objectid = bytenr;
7147         key.type = BTRFS_EXTENT_ITEM_KEY;
7148         key.offset = (u64)-1;
7149
7150 again:
7151         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7152                                 0, 0);
7153         if (ret < 0) {
7154                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7155                 btrfs_release_path(&path);
7156                 return ret;
7157         } else if (ret) {
7158                 if (path.slots[0] > 0) {
7159                         path.slots[0]--;
7160                 } else {
7161                         ret = btrfs_prev_leaf(root, &path);
7162                         if (ret < 0) {
7163                                 goto out;
7164                         } else if (ret > 0) {
7165                                 ret = 0;
7166                                 goto out;
7167                         }
7168                 }
7169         }
7170
7171         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7172
7173         /*
7174          * Block group items come before extent items if they have the same
7175          * bytenr, so walk back one more just in case.  Dear future traveller,
7176          * first congrats on mastering time travel.  Now if it's not too much
7177          * trouble could you go back to 2006 and tell Chris to make the
7178          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7179          * EXTENT_ITEM_KEY please?
7180          */
7181         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7182                 if (path.slots[0] > 0) {
7183                         path.slots[0]--;
7184                 } else {
7185                         ret = btrfs_prev_leaf(root, &path);
7186                         if (ret < 0) {
7187                                 goto out;
7188                         } else if (ret > 0) {
7189                                 ret = 0;
7190                                 goto out;
7191                         }
7192                 }
7193                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7194         }
7195
7196         while (num_bytes) {
7197                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7198                         ret = btrfs_next_leaf(root, &path);
7199                         if (ret < 0) {
7200                                 fprintf(stderr, "Error going to next leaf "
7201                                         "%d\n", ret);
7202                                 btrfs_release_path(&path);
7203                                 return ret;
7204                         } else if (ret) {
7205                                 break;
7206                         }
7207                 }
7208                 leaf = path.nodes[0];
7209                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7210                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7211                         path.slots[0]++;
7212                         continue;
7213                 }
7214                 if (key.objectid + key.offset < bytenr) {
7215                         path.slots[0]++;
7216                         continue;
7217                 }
7218                 if (key.objectid > bytenr + num_bytes)
7219                         break;
7220
7221                 if (key.objectid == bytenr) {
7222                         if (key.offset >= num_bytes) {
7223                                 num_bytes = 0;
7224                                 break;
7225                         }
7226                         num_bytes -= key.offset;
7227                         bytenr += key.offset;
7228                 } else if (key.objectid < bytenr) {
7229                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7230                                 num_bytes = 0;
7231                                 break;
7232                         }
7233                         num_bytes = (bytenr + num_bytes) -
7234                                 (key.objectid + key.offset);
7235                         bytenr = key.objectid + key.offset;
7236                 } else {
7237                         if (key.objectid + key.offset < bytenr + num_bytes) {
7238                                 u64 new_start = key.objectid + key.offset;
7239                                 u64 new_bytes = bytenr + num_bytes - new_start;
7240
7241                                 /*
7242                                  * Weird case, the extent is in the middle of
7243                                  * our range, we'll have to search one side
7244                                  * and then the other.  Not sure if this happens
7245                                  * in real life, but no harm in coding it up
7246                                  * anyway just in case.
7247                                  */
7248                                 btrfs_release_path(&path);
7249                                 ret = check_extent_exists(root, new_start,
7250                                                           new_bytes);
7251                                 if (ret) {
7252                                         fprintf(stderr, "Right section didn't "
7253                                                 "have a record\n");
7254                                         break;
7255                                 }
7256                                 num_bytes = key.objectid - bytenr;
7257                                 goto again;
7258                         }
7259                         num_bytes = key.objectid - bytenr;
7260                 }
7261                 path.slots[0]++;
7262         }
7263         ret = 0;
7264
7265 out:
7266         if (num_bytes && !ret) {
7267                 fprintf(stderr, "There are no extents for csum range "
7268                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7269                 ret = 1;
7270         }
7271
7272         btrfs_release_path(&path);
7273         return ret;
7274 }
7275
7276 static int check_csums(struct btrfs_root *root)
7277 {
7278         struct btrfs_path path;
7279         struct extent_buffer *leaf;
7280         struct btrfs_key key;
7281         u64 offset = 0, num_bytes = 0;
7282         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7283         int errors = 0;
7284         int ret;
7285         u64 data_len;
7286         unsigned long leaf_offset;
7287
7288         root = root->fs_info->csum_root;
7289         if (!extent_buffer_uptodate(root->node)) {
7290                 fprintf(stderr, "No valid csum tree found\n");
7291                 return -ENOENT;
7292         }
7293
7294         btrfs_init_path(&path);
7295         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7296         key.type = BTRFS_EXTENT_CSUM_KEY;
7297         key.offset = 0;
7298         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7299         if (ret < 0) {
7300                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7301                 btrfs_release_path(&path);
7302                 return ret;
7303         }
7304
7305         if (ret > 0 && path.slots[0])
7306                 path.slots[0]--;
7307         ret = 0;
7308
7309         while (1) {
7310                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7311                         ret = btrfs_next_leaf(root, &path);
7312                         if (ret < 0) {
7313                                 fprintf(stderr, "Error going to next leaf "
7314                                         "%d\n", ret);
7315                                 break;
7316                         }
7317                         if (ret)
7318                                 break;
7319                 }
7320                 leaf = path.nodes[0];
7321
7322                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7323                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7324                         path.slots[0]++;
7325                         continue;
7326                 }
7327
7328                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7329                               csum_size) * root->sectorsize;
7330                 if (!check_data_csum)
7331                         goto skip_csum_check;
7332                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7333                 ret = check_extent_csums(root, key.offset, data_len,
7334                                          leaf_offset, leaf);
7335                 if (ret)
7336                         break;
7337 skip_csum_check:
7338                 if (!num_bytes) {
7339                         offset = key.offset;
7340                 } else if (key.offset != offset + num_bytes) {
7341                         ret = check_extent_exists(root, offset, num_bytes);
7342                         if (ret) {
7343                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7344                                         "there is no extent record\n",
7345                                         offset, offset+num_bytes);
7346                                 errors++;
7347                         }
7348                         offset = key.offset;
7349                         num_bytes = 0;
7350                 }
7351                 num_bytes += data_len;
7352                 path.slots[0]++;
7353         }
7354
7355         btrfs_release_path(&path);
7356         return errors;
7357 }
7358
7359 static int is_dropped_key(struct btrfs_key *key,
7360                           struct btrfs_key *drop_key) {
7361         if (key->objectid < drop_key->objectid)
7362                 return 1;
7363         else if (key->objectid == drop_key->objectid) {
7364                 if (key->type < drop_key->type)
7365                         return 1;
7366                 else if (key->type == drop_key->type) {
7367                         if (key->offset < drop_key->offset)
7368                                 return 1;
7369                 }
7370         }
7371         return 0;
7372 }
7373
7374 /*
7375  * Here are the rules for FULL_BACKREF.
7376  *
7377  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7378  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7379  *      FULL_BACKREF set.
7380  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7381  *    if it happened after the relocation occurred since we'll have dropped the
7382  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7383  *    have no real way to know for sure.
7384  *
7385  * We process the blocks one root at a time, and we start from the lowest root
7386  * objectid and go to the highest.  So we can just lookup the owner backref for
7387  * the record and if we don't find it then we know it doesn't exist and we have
7388  * a FULL BACKREF.
7389  *
7390  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7391  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7392  * be set or not and then we can check later once we've gathered all the refs.
7393  */
7394 static int calc_extent_flag(struct btrfs_root *root,
7395                            struct cache_tree *extent_cache,
7396                            struct extent_buffer *buf,
7397                            struct root_item_record *ri,
7398                            u64 *flags)
7399 {
7400         struct extent_record *rec;
7401         struct cache_extent *cache;
7402         struct tree_backref *tback;
7403         u64 owner = 0;
7404
7405         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7406         /* we have added this extent before */
7407         if (!cache)
7408                 return -ENOENT;
7409
7410         rec = container_of(cache, struct extent_record, cache);
7411
7412         /*
7413          * Except file/reloc tree, we can not have
7414          * FULL BACKREF MODE
7415          */
7416         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7417                 goto normal;
7418         /*
7419          * root node
7420          */
7421         if (buf->start == ri->bytenr)
7422                 goto normal;
7423
7424         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7425                 goto full_backref;
7426
7427         owner = btrfs_header_owner(buf);
7428         if (owner == ri->objectid)
7429                 goto normal;
7430
7431         tback = find_tree_backref(rec, 0, owner);
7432         if (!tback)
7433                 goto full_backref;
7434 normal:
7435         *flags = 0;
7436         if (rec->flag_block_full_backref != FLAG_UNSET &&
7437             rec->flag_block_full_backref != 0)
7438                 rec->bad_full_backref = 1;
7439         return 0;
7440 full_backref:
7441         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7442         if (rec->flag_block_full_backref != FLAG_UNSET &&
7443             rec->flag_block_full_backref != 1)
7444                 rec->bad_full_backref = 1;
7445         return 0;
7446 }
7447
7448 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7449 {
7450         fprintf(stderr, "Invalid key type(");
7451         print_key_type(stderr, 0, key_type);
7452         fprintf(stderr, ") found in root(");
7453         print_objectid(stderr, rootid, 0);
7454         fprintf(stderr, ")\n");
7455 }
7456
7457 /*
7458  * Check if the key is valid with its extent buffer.
7459  *
7460  * This is a early check in case invalid key exists in a extent buffer
7461  * This is not comprehensive yet, but should prevent wrong key/item passed
7462  * further
7463  */
7464 static int check_type_with_root(u64 rootid, u8 key_type)
7465 {
7466         switch (key_type) {
7467         /* Only valid in chunk tree */
7468         case BTRFS_DEV_ITEM_KEY:
7469         case BTRFS_CHUNK_ITEM_KEY:
7470                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7471                         goto err;
7472                 break;
7473         /* valid in csum and log tree */
7474         case BTRFS_CSUM_TREE_OBJECTID:
7475                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7476                       is_fstree(rootid)))
7477                         goto err;
7478                 break;
7479         case BTRFS_EXTENT_ITEM_KEY:
7480         case BTRFS_METADATA_ITEM_KEY:
7481         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7482                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7483                         goto err;
7484                 break;
7485         case BTRFS_ROOT_ITEM_KEY:
7486                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7487                         goto err;
7488                 break;
7489         case BTRFS_DEV_EXTENT_KEY:
7490                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7491                         goto err;
7492                 break;
7493         }
7494         return 0;
7495 err:
7496         report_mismatch_key_root(key_type, rootid);
7497         return -EINVAL;
7498 }
7499
7500 static int run_next_block(struct btrfs_root *root,
7501                           struct block_info *bits,
7502                           int bits_nr,
7503                           u64 *last,
7504                           struct cache_tree *pending,
7505                           struct cache_tree *seen,
7506                           struct cache_tree *reada,
7507                           struct cache_tree *nodes,
7508                           struct cache_tree *extent_cache,
7509                           struct cache_tree *chunk_cache,
7510                           struct rb_root *dev_cache,
7511                           struct block_group_tree *block_group_cache,
7512                           struct device_extent_tree *dev_extent_cache,
7513                           struct root_item_record *ri)
7514 {
7515         struct extent_buffer *buf;
7516         struct extent_record *rec = NULL;
7517         u64 bytenr;
7518         u32 size;
7519         u64 parent;
7520         u64 owner;
7521         u64 flags;
7522         u64 ptr;
7523         u64 gen = 0;
7524         int ret = 0;
7525         int i;
7526         int nritems;
7527         struct btrfs_key key;
7528         struct cache_extent *cache;
7529         int reada_bits;
7530
7531         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7532                                     bits_nr, &reada_bits);
7533         if (nritems == 0)
7534                 return 1;
7535
7536         if (!reada_bits) {
7537                 for(i = 0; i < nritems; i++) {
7538                         ret = add_cache_extent(reada, bits[i].start,
7539                                                bits[i].size);
7540                         if (ret == -EEXIST)
7541                                 continue;
7542
7543                         /* fixme, get the parent transid */
7544                         readahead_tree_block(root, bits[i].start,
7545                                              bits[i].size, 0);
7546                 }
7547         }
7548         *last = bits[0].start;
7549         bytenr = bits[0].start;
7550         size = bits[0].size;
7551
7552         cache = lookup_cache_extent(pending, bytenr, size);
7553         if (cache) {
7554                 remove_cache_extent(pending, cache);
7555                 free(cache);
7556         }
7557         cache = lookup_cache_extent(reada, bytenr, size);
7558         if (cache) {
7559                 remove_cache_extent(reada, cache);
7560                 free(cache);
7561         }
7562         cache = lookup_cache_extent(nodes, bytenr, size);
7563         if (cache) {
7564                 remove_cache_extent(nodes, cache);
7565                 free(cache);
7566         }
7567         cache = lookup_cache_extent(extent_cache, bytenr, size);
7568         if (cache) {
7569                 rec = container_of(cache, struct extent_record, cache);
7570                 gen = rec->parent_generation;
7571         }
7572
7573         /* fixme, get the real parent transid */
7574         buf = read_tree_block(root, bytenr, size, gen);
7575         if (!extent_buffer_uptodate(buf)) {
7576                 record_bad_block_io(root->fs_info,
7577                                     extent_cache, bytenr, size);
7578                 goto out;
7579         }
7580
7581         nritems = btrfs_header_nritems(buf);
7582
7583         flags = 0;
7584         if (!init_extent_tree) {
7585                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7586                                        btrfs_header_level(buf), 1, NULL,
7587                                        &flags);
7588                 if (ret < 0) {
7589                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7590                         if (ret < 0) {
7591                                 fprintf(stderr, "Couldn't calc extent flags\n");
7592                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7593                         }
7594                 }
7595         } else {
7596                 flags = 0;
7597                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7598                 if (ret < 0) {
7599                         fprintf(stderr, "Couldn't calc extent flags\n");
7600                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7601                 }
7602         }
7603
7604         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7605                 if (ri != NULL &&
7606                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7607                     ri->objectid == btrfs_header_owner(buf)) {
7608                         /*
7609                          * Ok we got to this block from it's original owner and
7610                          * we have FULL_BACKREF set.  Relocation can leave
7611                          * converted blocks over so this is altogether possible,
7612                          * however it's not possible if the generation > the
7613                          * last snapshot, so check for this case.
7614                          */
7615                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7616                             btrfs_header_generation(buf) > ri->last_snapshot) {
7617                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7618                                 rec->bad_full_backref = 1;
7619                         }
7620                 }
7621         } else {
7622                 if (ri != NULL &&
7623                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7624                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7625                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7626                         rec->bad_full_backref = 1;
7627                 }
7628         }
7629
7630         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7631                 rec->flag_block_full_backref = 1;
7632                 parent = bytenr;
7633                 owner = 0;
7634         } else {
7635                 rec->flag_block_full_backref = 0;
7636                 parent = 0;
7637                 owner = btrfs_header_owner(buf);
7638         }
7639
7640         ret = check_block(root, extent_cache, buf, flags);
7641         if (ret)
7642                 goto out;
7643
7644         if (btrfs_is_leaf(buf)) {
7645                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7646                 for (i = 0; i < nritems; i++) {
7647                         struct btrfs_file_extent_item *fi;
7648                         btrfs_item_key_to_cpu(buf, &key, i);
7649                         /*
7650                          * Check key type against the leaf owner.
7651                          * Could filter quite a lot of early error if
7652                          * owner is correct
7653                          */
7654                         if (check_type_with_root(btrfs_header_owner(buf),
7655                                                  key.type)) {
7656                                 fprintf(stderr, "ignoring invalid key\n");
7657                                 continue;
7658                         }
7659                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7660                                 process_extent_item(root, extent_cache, buf,
7661                                                     i);
7662                                 continue;
7663                         }
7664                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7665                                 process_extent_item(root, extent_cache, buf,
7666                                                     i);
7667                                 continue;
7668                         }
7669                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7670                                 total_csum_bytes +=
7671                                         btrfs_item_size_nr(buf, i);
7672                                 continue;
7673                         }
7674                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7675                                 process_chunk_item(chunk_cache, &key, buf, i);
7676                                 continue;
7677                         }
7678                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7679                                 process_device_item(dev_cache, &key, buf, i);
7680                                 continue;
7681                         }
7682                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7683                                 process_block_group_item(block_group_cache,
7684                                         &key, buf, i);
7685                                 continue;
7686                         }
7687                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7688                                 process_device_extent_item(dev_extent_cache,
7689                                         &key, buf, i);
7690                                 continue;
7691
7692                         }
7693                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7694 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7695                                 process_extent_ref_v0(extent_cache, buf, i);
7696 #else
7697                                 BUG();
7698 #endif
7699                                 continue;
7700                         }
7701
7702                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7703                                 ret = add_tree_backref(extent_cache,
7704                                                 key.objectid, 0, key.offset, 0);
7705                                 if (ret < 0)
7706                                         error("add_tree_backref failed: %s",
7707                                               strerror(-ret));
7708                                 continue;
7709                         }
7710                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7711                                 ret = add_tree_backref(extent_cache,
7712                                                 key.objectid, key.offset, 0, 0);
7713                                 if (ret < 0)
7714                                         error("add_tree_backref failed: %s",
7715                                               strerror(-ret));
7716                                 continue;
7717                         }
7718                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7719                                 struct btrfs_extent_data_ref *ref;
7720                                 ref = btrfs_item_ptr(buf, i,
7721                                                 struct btrfs_extent_data_ref);
7722                                 add_data_backref(extent_cache,
7723                                         key.objectid, 0,
7724                                         btrfs_extent_data_ref_root(buf, ref),
7725                                         btrfs_extent_data_ref_objectid(buf,
7726                                                                        ref),
7727                                         btrfs_extent_data_ref_offset(buf, ref),
7728                                         btrfs_extent_data_ref_count(buf, ref),
7729                                         0, root->sectorsize);
7730                                 continue;
7731                         }
7732                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7733                                 struct btrfs_shared_data_ref *ref;
7734                                 ref = btrfs_item_ptr(buf, i,
7735                                                 struct btrfs_shared_data_ref);
7736                                 add_data_backref(extent_cache,
7737                                         key.objectid, key.offset, 0, 0, 0,
7738                                         btrfs_shared_data_ref_count(buf, ref),
7739                                         0, root->sectorsize);
7740                                 continue;
7741                         }
7742                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7743                                 struct bad_item *bad;
7744
7745                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7746                                         continue;
7747                                 if (!owner)
7748                                         continue;
7749                                 bad = malloc(sizeof(struct bad_item));
7750                                 if (!bad)
7751                                         continue;
7752                                 INIT_LIST_HEAD(&bad->list);
7753                                 memcpy(&bad->key, &key,
7754                                        sizeof(struct btrfs_key));
7755                                 bad->root_id = owner;
7756                                 list_add_tail(&bad->list, &delete_items);
7757                                 continue;
7758                         }
7759                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7760                                 continue;
7761                         fi = btrfs_item_ptr(buf, i,
7762                                             struct btrfs_file_extent_item);
7763                         if (btrfs_file_extent_type(buf, fi) ==
7764                             BTRFS_FILE_EXTENT_INLINE)
7765                                 continue;
7766                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7767                                 continue;
7768
7769                         data_bytes_allocated +=
7770                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7771                         if (data_bytes_allocated < root->sectorsize) {
7772                                 abort();
7773                         }
7774                         data_bytes_referenced +=
7775                                 btrfs_file_extent_num_bytes(buf, fi);
7776                         add_data_backref(extent_cache,
7777                                 btrfs_file_extent_disk_bytenr(buf, fi),
7778                                 parent, owner, key.objectid, key.offset -
7779                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7780                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7781                 }
7782         } else {
7783                 int level;
7784                 struct btrfs_key first_key;
7785
7786                 first_key.objectid = 0;
7787
7788                 if (nritems > 0)
7789                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7790                 level = btrfs_header_level(buf);
7791                 for (i = 0; i < nritems; i++) {
7792                         struct extent_record tmpl;
7793
7794                         ptr = btrfs_node_blockptr(buf, i);
7795                         size = root->nodesize;
7796                         btrfs_node_key_to_cpu(buf, &key, i);
7797                         if (ri != NULL) {
7798                                 if ((level == ri->drop_level)
7799                                     && is_dropped_key(&key, &ri->drop_key)) {
7800                                         continue;
7801                                 }
7802                         }
7803
7804                         memset(&tmpl, 0, sizeof(tmpl));
7805                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7806                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7807                         tmpl.start = ptr;
7808                         tmpl.nr = size;
7809                         tmpl.refs = 1;
7810                         tmpl.metadata = 1;
7811                         tmpl.max_size = size;
7812                         ret = add_extent_rec(extent_cache, &tmpl);
7813                         if (ret < 0)
7814                                 goto out;
7815
7816                         ret = add_tree_backref(extent_cache, ptr, parent,
7817                                         owner, 1);
7818                         if (ret < 0) {
7819                                 error("add_tree_backref failed: %s",
7820                                       strerror(-ret));
7821                                 continue;
7822                         }
7823
7824                         if (level > 1) {
7825                                 add_pending(nodes, seen, ptr, size);
7826                         } else {
7827                                 add_pending(pending, seen, ptr, size);
7828                         }
7829                 }
7830                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7831                                       nritems) * sizeof(struct btrfs_key_ptr);
7832         }
7833         total_btree_bytes += buf->len;
7834         if (fs_root_objectid(btrfs_header_owner(buf)))
7835                 total_fs_tree_bytes += buf->len;
7836         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7837                 total_extent_tree_bytes += buf->len;
7838         if (!found_old_backref &&
7839             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7840             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7841             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7842                 found_old_backref = 1;
7843 out:
7844         free_extent_buffer(buf);
7845         return ret;
7846 }
7847
7848 static int add_root_to_pending(struct extent_buffer *buf,
7849                                struct cache_tree *extent_cache,
7850                                struct cache_tree *pending,
7851                                struct cache_tree *seen,
7852                                struct cache_tree *nodes,
7853                                u64 objectid)
7854 {
7855         struct extent_record tmpl;
7856         int ret;
7857
7858         if (btrfs_header_level(buf) > 0)
7859                 add_pending(nodes, seen, buf->start, buf->len);
7860         else
7861                 add_pending(pending, seen, buf->start, buf->len);
7862
7863         memset(&tmpl, 0, sizeof(tmpl));
7864         tmpl.start = buf->start;
7865         tmpl.nr = buf->len;
7866         tmpl.is_root = 1;
7867         tmpl.refs = 1;
7868         tmpl.metadata = 1;
7869         tmpl.max_size = buf->len;
7870         add_extent_rec(extent_cache, &tmpl);
7871
7872         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7873             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7874                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7875                                 0, 1);
7876         else
7877                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7878                                 1);
7879         return ret;
7880 }
7881
7882 /* as we fix the tree, we might be deleting blocks that
7883  * we're tracking for repair.  This hook makes sure we
7884  * remove any backrefs for blocks as we are fixing them.
7885  */
7886 static int free_extent_hook(struct btrfs_trans_handle *trans,
7887                             struct btrfs_root *root,
7888                             u64 bytenr, u64 num_bytes, u64 parent,
7889                             u64 root_objectid, u64 owner, u64 offset,
7890                             int refs_to_drop)
7891 {
7892         struct extent_record *rec;
7893         struct cache_extent *cache;
7894         int is_data;
7895         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7896
7897         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7898         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7899         if (!cache)
7900                 return 0;
7901
7902         rec = container_of(cache, struct extent_record, cache);
7903         if (is_data) {
7904                 struct data_backref *back;
7905                 back = find_data_backref(rec, parent, root_objectid, owner,
7906                                          offset, 1, bytenr, num_bytes);
7907                 if (!back)
7908                         goto out;
7909                 if (back->node.found_ref) {
7910                         back->found_ref -= refs_to_drop;
7911                         if (rec->refs)
7912                                 rec->refs -= refs_to_drop;
7913                 }
7914                 if (back->node.found_extent_tree) {
7915                         back->num_refs -= refs_to_drop;
7916                         if (rec->extent_item_refs)
7917                                 rec->extent_item_refs -= refs_to_drop;
7918                 }
7919                 if (back->found_ref == 0)
7920                         back->node.found_ref = 0;
7921                 if (back->num_refs == 0)
7922                         back->node.found_extent_tree = 0;
7923
7924                 if (!back->node.found_extent_tree && back->node.found_ref) {
7925                         list_del(&back->node.list);
7926                         free(back);
7927                 }
7928         } else {
7929                 struct tree_backref *back;
7930                 back = find_tree_backref(rec, parent, root_objectid);
7931                 if (!back)
7932                         goto out;
7933                 if (back->node.found_ref) {
7934                         if (rec->refs)
7935                                 rec->refs--;
7936                         back->node.found_ref = 0;
7937                 }
7938                 if (back->node.found_extent_tree) {
7939                         if (rec->extent_item_refs)
7940                                 rec->extent_item_refs--;
7941                         back->node.found_extent_tree = 0;
7942                 }
7943                 if (!back->node.found_extent_tree && back->node.found_ref) {
7944                         list_del(&back->node.list);
7945                         free(back);
7946                 }
7947         }
7948         maybe_free_extent_rec(extent_cache, rec);
7949 out:
7950         return 0;
7951 }
7952
7953 static int delete_extent_records(struct btrfs_trans_handle *trans,
7954                                  struct btrfs_root *root,
7955                                  struct btrfs_path *path,
7956                                  u64 bytenr, u64 new_len)
7957 {
7958         struct btrfs_key key;
7959         struct btrfs_key found_key;
7960         struct extent_buffer *leaf;
7961         int ret;
7962         int slot;
7963
7964
7965         key.objectid = bytenr;
7966         key.type = (u8)-1;
7967         key.offset = (u64)-1;
7968
7969         while(1) {
7970                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7971                                         &key, path, 0, 1);
7972                 if (ret < 0)
7973                         break;
7974
7975                 if (ret > 0) {
7976                         ret = 0;
7977                         if (path->slots[0] == 0)
7978                                 break;
7979                         path->slots[0]--;
7980                 }
7981                 ret = 0;
7982
7983                 leaf = path->nodes[0];
7984                 slot = path->slots[0];
7985
7986                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7987                 if (found_key.objectid != bytenr)
7988                         break;
7989
7990                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7991                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7992                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7993                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7994                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7995                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7996                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7997                         btrfs_release_path(path);
7998                         if (found_key.type == 0) {
7999                                 if (found_key.offset == 0)
8000                                         break;
8001                                 key.offset = found_key.offset - 1;
8002                                 key.type = found_key.type;
8003                         }
8004                         key.type = found_key.type - 1;
8005                         key.offset = (u64)-1;
8006                         continue;
8007                 }
8008
8009                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8010                         found_key.objectid, found_key.type, found_key.offset);
8011
8012                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8013                 if (ret)
8014                         break;
8015                 btrfs_release_path(path);
8016
8017                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8018                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8019                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8020                                 found_key.offset : root->nodesize;
8021
8022                         ret = btrfs_update_block_group(trans, root, bytenr,
8023                                                        bytes, 0, 0);
8024                         if (ret)
8025                                 break;
8026                 }
8027         }
8028
8029         btrfs_release_path(path);
8030         return ret;
8031 }
8032
8033 /*
8034  * for a single backref, this will allocate a new extent
8035  * and add the backref to it.
8036  */
8037 static int record_extent(struct btrfs_trans_handle *trans,
8038                          struct btrfs_fs_info *info,
8039                          struct btrfs_path *path,
8040                          struct extent_record *rec,
8041                          struct extent_backref *back,
8042                          int allocated, u64 flags)
8043 {
8044         int ret = 0;
8045         struct btrfs_root *extent_root = info->extent_root;
8046         struct extent_buffer *leaf;
8047         struct btrfs_key ins_key;
8048         struct btrfs_extent_item *ei;
8049         struct data_backref *dback;
8050         struct btrfs_tree_block_info *bi;
8051
8052         if (!back->is_data)
8053                 rec->max_size = max_t(u64, rec->max_size,
8054                                     info->extent_root->nodesize);
8055
8056         if (!allocated) {
8057                 u32 item_size = sizeof(*ei);
8058
8059                 if (!back->is_data)
8060                         item_size += sizeof(*bi);
8061
8062                 ins_key.objectid = rec->start;
8063                 ins_key.offset = rec->max_size;
8064                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8065
8066                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8067                                         &ins_key, item_size);
8068                 if (ret)
8069                         goto fail;
8070
8071                 leaf = path->nodes[0];
8072                 ei = btrfs_item_ptr(leaf, path->slots[0],
8073                                     struct btrfs_extent_item);
8074
8075                 btrfs_set_extent_refs(leaf, ei, 0);
8076                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8077
8078                 if (back->is_data) {
8079                         btrfs_set_extent_flags(leaf, ei,
8080                                                BTRFS_EXTENT_FLAG_DATA);
8081                 } else {
8082                         struct btrfs_disk_key copy_key;;
8083
8084                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8085                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8086                                              sizeof(*bi));
8087
8088                         btrfs_set_disk_key_objectid(&copy_key,
8089                                                     rec->info_objectid);
8090                         btrfs_set_disk_key_type(&copy_key, 0);
8091                         btrfs_set_disk_key_offset(&copy_key, 0);
8092
8093                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8094                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8095
8096                         btrfs_set_extent_flags(leaf, ei,
8097                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8098                 }
8099
8100                 btrfs_mark_buffer_dirty(leaf);
8101                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8102                                                rec->max_size, 1, 0);
8103                 if (ret)
8104                         goto fail;
8105                 btrfs_release_path(path);
8106         }
8107
8108         if (back->is_data) {
8109                 u64 parent;
8110                 int i;
8111
8112                 dback = to_data_backref(back);
8113                 if (back->full_backref)
8114                         parent = dback->parent;
8115                 else
8116                         parent = 0;
8117
8118                 for (i = 0; i < dback->found_ref; i++) {
8119                         /* if parent != 0, we're doing a full backref
8120                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8121                          * just makes the backref allocator create a data
8122                          * backref
8123                          */
8124                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8125                                                    rec->start, rec->max_size,
8126                                                    parent,
8127                                                    dback->root,
8128                                                    parent ?
8129                                                    BTRFS_FIRST_FREE_OBJECTID :
8130                                                    dback->owner,
8131                                                    dback->offset);
8132                         if (ret)
8133                                 break;
8134                 }
8135                 fprintf(stderr, "adding new data backref"
8136                                 " on %llu %s %llu owner %llu"
8137                                 " offset %llu found %d\n",
8138                                 (unsigned long long)rec->start,
8139                                 back->full_backref ?
8140                                 "parent" : "root",
8141                                 back->full_backref ?
8142                                 (unsigned long long)parent :
8143                                 (unsigned long long)dback->root,
8144                                 (unsigned long long)dback->owner,
8145                                 (unsigned long long)dback->offset,
8146                                 dback->found_ref);
8147         } else {
8148                 u64 parent;
8149                 struct tree_backref *tback;
8150
8151                 tback = to_tree_backref(back);
8152                 if (back->full_backref)
8153                         parent = tback->parent;
8154                 else
8155                         parent = 0;
8156
8157                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8158                                            rec->start, rec->max_size,
8159                                            parent, tback->root, 0, 0);
8160                 fprintf(stderr, "adding new tree backref on "
8161                         "start %llu len %llu parent %llu root %llu\n",
8162                         rec->start, rec->max_size, parent, tback->root);
8163         }
8164 fail:
8165         btrfs_release_path(path);
8166         return ret;
8167 }
8168
8169 static struct extent_entry *find_entry(struct list_head *entries,
8170                                        u64 bytenr, u64 bytes)
8171 {
8172         struct extent_entry *entry = NULL;
8173
8174         list_for_each_entry(entry, entries, list) {
8175                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8176                         return entry;
8177         }
8178
8179         return NULL;
8180 }
8181
8182 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8183 {
8184         struct extent_entry *entry, *best = NULL, *prev = NULL;
8185
8186         list_for_each_entry(entry, entries, list) {
8187                 /*
8188                  * If there are as many broken entries as entries then we know
8189                  * not to trust this particular entry.
8190                  */
8191                 if (entry->broken == entry->count)
8192                         continue;
8193
8194                 /*
8195                  * Special case, when there are only two entries and 'best' is
8196                  * the first one
8197                  */
8198                 if (!prev) {
8199                         best = entry;
8200                         prev = entry;
8201                         continue;
8202                 }
8203
8204                 /*
8205                  * If our current entry == best then we can't be sure our best
8206                  * is really the best, so we need to keep searching.
8207                  */
8208                 if (best && best->count == entry->count) {
8209                         prev = entry;
8210                         best = NULL;
8211                         continue;
8212                 }
8213
8214                 /* Prev == entry, not good enough, have to keep searching */
8215                 if (!prev->broken && prev->count == entry->count)
8216                         continue;
8217
8218                 if (!best)
8219                         best = (prev->count > entry->count) ? prev : entry;
8220                 else if (best->count < entry->count)
8221                         best = entry;
8222                 prev = entry;
8223         }
8224
8225         return best;
8226 }
8227
8228 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8229                       struct data_backref *dback, struct extent_entry *entry)
8230 {
8231         struct btrfs_trans_handle *trans;
8232         struct btrfs_root *root;
8233         struct btrfs_file_extent_item *fi;
8234         struct extent_buffer *leaf;
8235         struct btrfs_key key;
8236         u64 bytenr, bytes;
8237         int ret, err;
8238
8239         key.objectid = dback->root;
8240         key.type = BTRFS_ROOT_ITEM_KEY;
8241         key.offset = (u64)-1;
8242         root = btrfs_read_fs_root(info, &key);
8243         if (IS_ERR(root)) {
8244                 fprintf(stderr, "Couldn't find root for our ref\n");
8245                 return -EINVAL;
8246         }
8247
8248         /*
8249          * The backref points to the original offset of the extent if it was
8250          * split, so we need to search down to the offset we have and then walk
8251          * forward until we find the backref we're looking for.
8252          */
8253         key.objectid = dback->owner;
8254         key.type = BTRFS_EXTENT_DATA_KEY;
8255         key.offset = dback->offset;
8256         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8257         if (ret < 0) {
8258                 fprintf(stderr, "Error looking up ref %d\n", ret);
8259                 return ret;
8260         }
8261
8262         while (1) {
8263                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8264                         ret = btrfs_next_leaf(root, path);
8265                         if (ret) {
8266                                 fprintf(stderr, "Couldn't find our ref, next\n");
8267                                 return -EINVAL;
8268                         }
8269                 }
8270                 leaf = path->nodes[0];
8271                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8272                 if (key.objectid != dback->owner ||
8273                     key.type != BTRFS_EXTENT_DATA_KEY) {
8274                         fprintf(stderr, "Couldn't find our ref, search\n");
8275                         return -EINVAL;
8276                 }
8277                 fi = btrfs_item_ptr(leaf, path->slots[0],
8278                                     struct btrfs_file_extent_item);
8279                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8280                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8281
8282                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8283                         break;
8284                 path->slots[0]++;
8285         }
8286
8287         btrfs_release_path(path);
8288
8289         trans = btrfs_start_transaction(root, 1);
8290         if (IS_ERR(trans))
8291                 return PTR_ERR(trans);
8292
8293         /*
8294          * Ok we have the key of the file extent we want to fix, now we can cow
8295          * down to the thing and fix it.
8296          */
8297         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8298         if (ret < 0) {
8299                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8300                         key.objectid, key.type, key.offset, ret);
8301                 goto out;
8302         }
8303         if (ret > 0) {
8304                 fprintf(stderr, "Well that's odd, we just found this key "
8305                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8306                         key.offset);
8307                 ret = -EINVAL;
8308                 goto out;
8309         }
8310         leaf = path->nodes[0];
8311         fi = btrfs_item_ptr(leaf, path->slots[0],
8312                             struct btrfs_file_extent_item);
8313
8314         if (btrfs_file_extent_compression(leaf, fi) &&
8315             dback->disk_bytenr != entry->bytenr) {
8316                 fprintf(stderr, "Ref doesn't match the record start and is "
8317                         "compressed, please take a btrfs-image of this file "
8318                         "system and send it to a btrfs developer so they can "
8319                         "complete this functionality for bytenr %Lu\n",
8320                         dback->disk_bytenr);
8321                 ret = -EINVAL;
8322                 goto out;
8323         }
8324
8325         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8326                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8327         } else if (dback->disk_bytenr > entry->bytenr) {
8328                 u64 off_diff, offset;
8329
8330                 off_diff = dback->disk_bytenr - entry->bytenr;
8331                 offset = btrfs_file_extent_offset(leaf, fi);
8332                 if (dback->disk_bytenr + offset +
8333                     btrfs_file_extent_num_bytes(leaf, fi) >
8334                     entry->bytenr + entry->bytes) {
8335                         fprintf(stderr, "Ref is past the entry end, please "
8336                                 "take a btrfs-image of this file system and "
8337                                 "send it to a btrfs developer, ref %Lu\n",
8338                                 dback->disk_bytenr);
8339                         ret = -EINVAL;
8340                         goto out;
8341                 }
8342                 offset += off_diff;
8343                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8344                 btrfs_set_file_extent_offset(leaf, fi, offset);
8345         } else if (dback->disk_bytenr < entry->bytenr) {
8346                 u64 offset;
8347
8348                 offset = btrfs_file_extent_offset(leaf, fi);
8349                 if (dback->disk_bytenr + offset < entry->bytenr) {
8350                         fprintf(stderr, "Ref is before the entry start, please"
8351                                 " take a btrfs-image of this file system and "
8352                                 "send it to a btrfs developer, ref %Lu\n",
8353                                 dback->disk_bytenr);
8354                         ret = -EINVAL;
8355                         goto out;
8356                 }
8357
8358                 offset += dback->disk_bytenr;
8359                 offset -= entry->bytenr;
8360                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8361                 btrfs_set_file_extent_offset(leaf, fi, offset);
8362         }
8363
8364         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8365
8366         /*
8367          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8368          * only do this if we aren't using compression, otherwise it's a
8369          * trickier case.
8370          */
8371         if (!btrfs_file_extent_compression(leaf, fi))
8372                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8373         else
8374                 printf("ram bytes may be wrong?\n");
8375         btrfs_mark_buffer_dirty(leaf);
8376 out:
8377         err = btrfs_commit_transaction(trans, root);
8378         btrfs_release_path(path);
8379         return ret ? ret : err;
8380 }
8381
8382 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8383                            struct extent_record *rec)
8384 {
8385         struct extent_backref *back;
8386         struct data_backref *dback;
8387         struct extent_entry *entry, *best = NULL;
8388         LIST_HEAD(entries);
8389         int nr_entries = 0;
8390         int broken_entries = 0;
8391         int ret = 0;
8392         short mismatch = 0;
8393
8394         /*
8395          * Metadata is easy and the backrefs should always agree on bytenr and
8396          * size, if not we've got bigger issues.
8397          */
8398         if (rec->metadata)
8399                 return 0;
8400
8401         list_for_each_entry(back, &rec->backrefs, list) {
8402                 if (back->full_backref || !back->is_data)
8403                         continue;
8404
8405                 dback = to_data_backref(back);
8406
8407                 /*
8408                  * We only pay attention to backrefs that we found a real
8409                  * backref for.
8410                  */
8411                 if (dback->found_ref == 0)
8412                         continue;
8413
8414                 /*
8415                  * For now we only catch when the bytes don't match, not the
8416                  * bytenr.  We can easily do this at the same time, but I want
8417                  * to have a fs image to test on before we just add repair
8418                  * functionality willy-nilly so we know we won't screw up the
8419                  * repair.
8420                  */
8421
8422                 entry = find_entry(&entries, dback->disk_bytenr,
8423                                    dback->bytes);
8424                 if (!entry) {
8425                         entry = malloc(sizeof(struct extent_entry));
8426                         if (!entry) {
8427                                 ret = -ENOMEM;
8428                                 goto out;
8429                         }
8430                         memset(entry, 0, sizeof(*entry));
8431                         entry->bytenr = dback->disk_bytenr;
8432                         entry->bytes = dback->bytes;
8433                         list_add_tail(&entry->list, &entries);
8434                         nr_entries++;
8435                 }
8436
8437                 /*
8438                  * If we only have on entry we may think the entries agree when
8439                  * in reality they don't so we have to do some extra checking.
8440                  */
8441                 if (dback->disk_bytenr != rec->start ||
8442                     dback->bytes != rec->nr || back->broken)
8443                         mismatch = 1;
8444
8445                 if (back->broken) {
8446                         entry->broken++;
8447                         broken_entries++;
8448                 }
8449
8450                 entry->count++;
8451         }
8452
8453         /* Yay all the backrefs agree, carry on good sir */
8454         if (nr_entries <= 1 && !mismatch)
8455                 goto out;
8456
8457         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8458                 "%Lu\n", rec->start);
8459
8460         /*
8461          * First we want to see if the backrefs can agree amongst themselves who
8462          * is right, so figure out which one of the entries has the highest
8463          * count.
8464          */
8465         best = find_most_right_entry(&entries);
8466
8467         /*
8468          * Ok so we may have an even split between what the backrefs think, so
8469          * this is where we use the extent ref to see what it thinks.
8470          */
8471         if (!best) {
8472                 entry = find_entry(&entries, rec->start, rec->nr);
8473                 if (!entry && (!broken_entries || !rec->found_rec)) {
8474                         fprintf(stderr, "Backrefs don't agree with each other "
8475                                 "and extent record doesn't agree with anybody,"
8476                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8477                                 rec->start, rec->nr);
8478                         ret = -EINVAL;
8479                         goto out;
8480                 } else if (!entry) {
8481                         /*
8482                          * Ok our backrefs were broken, we'll assume this is the
8483                          * correct value and add an entry for this range.
8484                          */
8485                         entry = malloc(sizeof(struct extent_entry));
8486                         if (!entry) {
8487                                 ret = -ENOMEM;
8488                                 goto out;
8489                         }
8490                         memset(entry, 0, sizeof(*entry));
8491                         entry->bytenr = rec->start;
8492                         entry->bytes = rec->nr;
8493                         list_add_tail(&entry->list, &entries);
8494                         nr_entries++;
8495                 }
8496                 entry->count++;
8497                 best = find_most_right_entry(&entries);
8498                 if (!best) {
8499                         fprintf(stderr, "Backrefs and extent record evenly "
8500                                 "split on who is right, this is going to "
8501                                 "require user input to fix bytenr %Lu bytes "
8502                                 "%Lu\n", rec->start, rec->nr);
8503                         ret = -EINVAL;
8504                         goto out;
8505                 }
8506         }
8507
8508         /*
8509          * I don't think this can happen currently as we'll abort() if we catch
8510          * this case higher up, but in case somebody removes that we still can't
8511          * deal with it properly here yet, so just bail out of that's the case.
8512          */
8513         if (best->bytenr != rec->start) {
8514                 fprintf(stderr, "Extent start and backref starts don't match, "
8515                         "please use btrfs-image on this file system and send "
8516                         "it to a btrfs developer so they can make fsck fix "
8517                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8518                         rec->start, rec->nr);
8519                 ret = -EINVAL;
8520                 goto out;
8521         }
8522
8523         /*
8524          * Ok great we all agreed on an extent record, let's go find the real
8525          * references and fix up the ones that don't match.
8526          */
8527         list_for_each_entry(back, &rec->backrefs, list) {
8528                 if (back->full_backref || !back->is_data)
8529                         continue;
8530
8531                 dback = to_data_backref(back);
8532
8533                 /*
8534                  * Still ignoring backrefs that don't have a real ref attached
8535                  * to them.
8536                  */
8537                 if (dback->found_ref == 0)
8538                         continue;
8539
8540                 if (dback->bytes == best->bytes &&
8541                     dback->disk_bytenr == best->bytenr)
8542                         continue;
8543
8544                 ret = repair_ref(info, path, dback, best);
8545                 if (ret)
8546                         goto out;
8547         }
8548
8549         /*
8550          * Ok we messed with the actual refs, which means we need to drop our
8551          * entire cache and go back and rescan.  I know this is a huge pain and
8552          * adds a lot of extra work, but it's the only way to be safe.  Once all
8553          * the backrefs agree we may not need to do anything to the extent
8554          * record itself.
8555          */
8556         ret = -EAGAIN;
8557 out:
8558         while (!list_empty(&entries)) {
8559                 entry = list_entry(entries.next, struct extent_entry, list);
8560                 list_del_init(&entry->list);
8561                 free(entry);
8562         }
8563         return ret;
8564 }
8565
8566 static int process_duplicates(struct btrfs_root *root,
8567                               struct cache_tree *extent_cache,
8568                               struct extent_record *rec)
8569 {
8570         struct extent_record *good, *tmp;
8571         struct cache_extent *cache;
8572         int ret;
8573
8574         /*
8575          * If we found a extent record for this extent then return, or if we
8576          * have more than one duplicate we are likely going to need to delete
8577          * something.
8578          */
8579         if (rec->found_rec || rec->num_duplicates > 1)
8580                 return 0;
8581
8582         /* Shouldn't happen but just in case */
8583         BUG_ON(!rec->num_duplicates);
8584
8585         /*
8586          * So this happens if we end up with a backref that doesn't match the
8587          * actual extent entry.  So either the backref is bad or the extent
8588          * entry is bad.  Either way we want to have the extent_record actually
8589          * reflect what we found in the extent_tree, so we need to take the
8590          * duplicate out and use that as the extent_record since the only way we
8591          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8592          */
8593         remove_cache_extent(extent_cache, &rec->cache);
8594
8595         good = to_extent_record(rec->dups.next);
8596         list_del_init(&good->list);
8597         INIT_LIST_HEAD(&good->backrefs);
8598         INIT_LIST_HEAD(&good->dups);
8599         good->cache.start = good->start;
8600         good->cache.size = good->nr;
8601         good->content_checked = 0;
8602         good->owner_ref_checked = 0;
8603         good->num_duplicates = 0;
8604         good->refs = rec->refs;
8605         list_splice_init(&rec->backrefs, &good->backrefs);
8606         while (1) {
8607                 cache = lookup_cache_extent(extent_cache, good->start,
8608                                             good->nr);
8609                 if (!cache)
8610                         break;
8611                 tmp = container_of(cache, struct extent_record, cache);
8612
8613                 /*
8614                  * If we find another overlapping extent and it's found_rec is
8615                  * set then it's a duplicate and we need to try and delete
8616                  * something.
8617                  */
8618                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8619                         if (list_empty(&good->list))
8620                                 list_add_tail(&good->list,
8621                                               &duplicate_extents);
8622                         good->num_duplicates += tmp->num_duplicates + 1;
8623                         list_splice_init(&tmp->dups, &good->dups);
8624                         list_del_init(&tmp->list);
8625                         list_add_tail(&tmp->list, &good->dups);
8626                         remove_cache_extent(extent_cache, &tmp->cache);
8627                         continue;
8628                 }
8629
8630                 /*
8631                  * Ok we have another non extent item backed extent rec, so lets
8632                  * just add it to this extent and carry on like we did above.
8633                  */
8634                 good->refs += tmp->refs;
8635                 list_splice_init(&tmp->backrefs, &good->backrefs);
8636                 remove_cache_extent(extent_cache, &tmp->cache);
8637                 free(tmp);
8638         }
8639         ret = insert_cache_extent(extent_cache, &good->cache);
8640         BUG_ON(ret);
8641         free(rec);
8642         return good->num_duplicates ? 0 : 1;
8643 }
8644
8645 static int delete_duplicate_records(struct btrfs_root *root,
8646                                     struct extent_record *rec)
8647 {
8648         struct btrfs_trans_handle *trans;
8649         LIST_HEAD(delete_list);
8650         struct btrfs_path path;
8651         struct extent_record *tmp, *good, *n;
8652         int nr_del = 0;
8653         int ret = 0, err;
8654         struct btrfs_key key;
8655
8656         btrfs_init_path(&path);
8657
8658         good = rec;
8659         /* Find the record that covers all of the duplicates. */
8660         list_for_each_entry(tmp, &rec->dups, list) {
8661                 if (good->start < tmp->start)
8662                         continue;
8663                 if (good->nr > tmp->nr)
8664                         continue;
8665
8666                 if (tmp->start + tmp->nr < good->start + good->nr) {
8667                         fprintf(stderr, "Ok we have overlapping extents that "
8668                                 "aren't completely covered by each other, this "
8669                                 "is going to require more careful thought.  "
8670                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8671                                 tmp->start, tmp->nr, good->start, good->nr);
8672                         abort();
8673                 }
8674                 good = tmp;
8675         }
8676
8677         if (good != rec)
8678                 list_add_tail(&rec->list, &delete_list);
8679
8680         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8681                 if (tmp == good)
8682                         continue;
8683                 list_move_tail(&tmp->list, &delete_list);
8684         }
8685
8686         root = root->fs_info->extent_root;
8687         trans = btrfs_start_transaction(root, 1);
8688         if (IS_ERR(trans)) {
8689                 ret = PTR_ERR(trans);
8690                 goto out;
8691         }
8692
8693         list_for_each_entry(tmp, &delete_list, list) {
8694                 if (tmp->found_rec == 0)
8695                         continue;
8696                 key.objectid = tmp->start;
8697                 key.type = BTRFS_EXTENT_ITEM_KEY;
8698                 key.offset = tmp->nr;
8699
8700                 /* Shouldn't happen but just in case */
8701                 if (tmp->metadata) {
8702                         fprintf(stderr, "Well this shouldn't happen, extent "
8703                                 "record overlaps but is metadata? "
8704                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8705                         abort();
8706                 }
8707
8708                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8709                 if (ret) {
8710                         if (ret > 0)
8711                                 ret = -EINVAL;
8712                         break;
8713                 }
8714                 ret = btrfs_del_item(trans, root, &path);
8715                 if (ret)
8716                         break;
8717                 btrfs_release_path(&path);
8718                 nr_del++;
8719         }
8720         err = btrfs_commit_transaction(trans, root);
8721         if (err && !ret)
8722                 ret = err;
8723 out:
8724         while (!list_empty(&delete_list)) {
8725                 tmp = to_extent_record(delete_list.next);
8726                 list_del_init(&tmp->list);
8727                 if (tmp == rec)
8728                         continue;
8729                 free(tmp);
8730         }
8731
8732         while (!list_empty(&rec->dups)) {
8733                 tmp = to_extent_record(rec->dups.next);
8734                 list_del_init(&tmp->list);
8735                 free(tmp);
8736         }
8737
8738         btrfs_release_path(&path);
8739
8740         if (!ret && !nr_del)
8741                 rec->num_duplicates = 0;
8742
8743         return ret ? ret : nr_del;
8744 }
8745
8746 static int find_possible_backrefs(struct btrfs_fs_info *info,
8747                                   struct btrfs_path *path,
8748                                   struct cache_tree *extent_cache,
8749                                   struct extent_record *rec)
8750 {
8751         struct btrfs_root *root;
8752         struct extent_backref *back;
8753         struct data_backref *dback;
8754         struct cache_extent *cache;
8755         struct btrfs_file_extent_item *fi;
8756         struct btrfs_key key;
8757         u64 bytenr, bytes;
8758         int ret;
8759
8760         list_for_each_entry(back, &rec->backrefs, list) {
8761                 /* Don't care about full backrefs (poor unloved backrefs) */
8762                 if (back->full_backref || !back->is_data)
8763                         continue;
8764
8765                 dback = to_data_backref(back);
8766
8767                 /* We found this one, we don't need to do a lookup */
8768                 if (dback->found_ref)
8769                         continue;
8770
8771                 key.objectid = dback->root;
8772                 key.type = BTRFS_ROOT_ITEM_KEY;
8773                 key.offset = (u64)-1;
8774
8775                 root = btrfs_read_fs_root(info, &key);
8776
8777                 /* No root, definitely a bad ref, skip */
8778                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8779                         continue;
8780                 /* Other err, exit */
8781                 if (IS_ERR(root))
8782                         return PTR_ERR(root);
8783
8784                 key.objectid = dback->owner;
8785                 key.type = BTRFS_EXTENT_DATA_KEY;
8786                 key.offset = dback->offset;
8787                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8788                 if (ret) {
8789                         btrfs_release_path(path);
8790                         if (ret < 0)
8791                                 return ret;
8792                         /* Didn't find it, we can carry on */
8793                         ret = 0;
8794                         continue;
8795                 }
8796
8797                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8798                                     struct btrfs_file_extent_item);
8799                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8800                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8801                 btrfs_release_path(path);
8802                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8803                 if (cache) {
8804                         struct extent_record *tmp;
8805                         tmp = container_of(cache, struct extent_record, cache);
8806
8807                         /*
8808                          * If we found an extent record for the bytenr for this
8809                          * particular backref then we can't add it to our
8810                          * current extent record.  We only want to add backrefs
8811                          * that don't have a corresponding extent item in the
8812                          * extent tree since they likely belong to this record
8813                          * and we need to fix it if it doesn't match bytenrs.
8814                          */
8815                         if  (tmp->found_rec)
8816                                 continue;
8817                 }
8818
8819                 dback->found_ref += 1;
8820                 dback->disk_bytenr = bytenr;
8821                 dback->bytes = bytes;
8822
8823                 /*
8824                  * Set this so the verify backref code knows not to trust the
8825                  * values in this backref.
8826                  */
8827                 back->broken = 1;
8828         }
8829
8830         return 0;
8831 }
8832
8833 /*
8834  * Record orphan data ref into corresponding root.
8835  *
8836  * Return 0 if the extent item contains data ref and recorded.
8837  * Return 1 if the extent item contains no useful data ref
8838  *   On that case, it may contains only shared_dataref or metadata backref
8839  *   or the file extent exists(this should be handled by the extent bytenr
8840  *   recovery routine)
8841  * Return <0 if something goes wrong.
8842  */
8843 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8844                                       struct extent_record *rec)
8845 {
8846         struct btrfs_key key;
8847         struct btrfs_root *dest_root;
8848         struct extent_backref *back;
8849         struct data_backref *dback;
8850         struct orphan_data_extent *orphan;
8851         struct btrfs_path path;
8852         int recorded_data_ref = 0;
8853         int ret = 0;
8854
8855         if (rec->metadata)
8856                 return 1;
8857         btrfs_init_path(&path);
8858         list_for_each_entry(back, &rec->backrefs, list) {
8859                 if (back->full_backref || !back->is_data ||
8860                     !back->found_extent_tree)
8861                         continue;
8862                 dback = to_data_backref(back);
8863                 if (dback->found_ref)
8864                         continue;
8865                 key.objectid = dback->root;
8866                 key.type = BTRFS_ROOT_ITEM_KEY;
8867                 key.offset = (u64)-1;
8868
8869                 dest_root = btrfs_read_fs_root(fs_info, &key);
8870
8871                 /* For non-exist root we just skip it */
8872                 if (IS_ERR(dest_root) || !dest_root)
8873                         continue;
8874
8875                 key.objectid = dback->owner;
8876                 key.type = BTRFS_EXTENT_DATA_KEY;
8877                 key.offset = dback->offset;
8878
8879                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8880                 btrfs_release_path(&path);
8881                 /*
8882                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8883                  * we need to record it for inode/file extent rebuild.
8884                  * For ret > 0, we record it only for file extent rebuild.
8885                  * For ret == 0, the file extent exists but only bytenr
8886                  * mismatch, let the original bytenr fix routine to handle,
8887                  * don't record it.
8888                  */
8889                 if (ret == 0)
8890                         continue;
8891                 ret = 0;
8892                 orphan = malloc(sizeof(*orphan));
8893                 if (!orphan) {
8894                         ret = -ENOMEM;
8895                         goto out;
8896                 }
8897                 INIT_LIST_HEAD(&orphan->list);
8898                 orphan->root = dback->root;
8899                 orphan->objectid = dback->owner;
8900                 orphan->offset = dback->offset;
8901                 orphan->disk_bytenr = rec->cache.start;
8902                 orphan->disk_len = rec->cache.size;
8903                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8904                 recorded_data_ref = 1;
8905         }
8906 out:
8907         btrfs_release_path(&path);
8908         if (!ret)
8909                 return !recorded_data_ref;
8910         else
8911                 return ret;
8912 }
8913
8914 /*
8915  * when an incorrect extent item is found, this will delete
8916  * all of the existing entries for it and recreate them
8917  * based on what the tree scan found.
8918  */
8919 static int fixup_extent_refs(struct btrfs_fs_info *info,
8920                              struct cache_tree *extent_cache,
8921                              struct extent_record *rec)
8922 {
8923         struct btrfs_trans_handle *trans = NULL;
8924         int ret;
8925         struct btrfs_path path;
8926         struct list_head *cur = rec->backrefs.next;
8927         struct cache_extent *cache;
8928         struct extent_backref *back;
8929         int allocated = 0;
8930         u64 flags = 0;
8931
8932         if (rec->flag_block_full_backref)
8933                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8934
8935         btrfs_init_path(&path);
8936         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8937                 /*
8938                  * Sometimes the backrefs themselves are so broken they don't
8939                  * get attached to any meaningful rec, so first go back and
8940                  * check any of our backrefs that we couldn't find and throw
8941                  * them into the list if we find the backref so that
8942                  * verify_backrefs can figure out what to do.
8943                  */
8944                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8945                 if (ret < 0)
8946                         goto out;
8947         }
8948
8949         /* step one, make sure all of the backrefs agree */
8950         ret = verify_backrefs(info, &path, rec);
8951         if (ret < 0)
8952                 goto out;
8953
8954         trans = btrfs_start_transaction(info->extent_root, 1);
8955         if (IS_ERR(trans)) {
8956                 ret = PTR_ERR(trans);
8957                 goto out;
8958         }
8959
8960         /* step two, delete all the existing records */
8961         ret = delete_extent_records(trans, info->extent_root, &path,
8962                                     rec->start, rec->max_size);
8963
8964         if (ret < 0)
8965                 goto out;
8966
8967         /* was this block corrupt?  If so, don't add references to it */
8968         cache = lookup_cache_extent(info->corrupt_blocks,
8969                                     rec->start, rec->max_size);
8970         if (cache) {
8971                 ret = 0;
8972                 goto out;
8973         }
8974
8975         /* step three, recreate all the refs we did find */
8976         while(cur != &rec->backrefs) {
8977                 back = to_extent_backref(cur);
8978                 cur = cur->next;
8979
8980                 /*
8981                  * if we didn't find any references, don't create a
8982                  * new extent record
8983                  */
8984                 if (!back->found_ref)
8985                         continue;
8986
8987                 rec->bad_full_backref = 0;
8988                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8989                 allocated = 1;
8990
8991                 if (ret)
8992                         goto out;
8993         }
8994 out:
8995         if (trans) {
8996                 int err = btrfs_commit_transaction(trans, info->extent_root);
8997                 if (!ret)
8998                         ret = err;
8999         }
9000
9001         if (!ret)
9002                 fprintf(stderr, "Repaired extent references for %llu\n",
9003                                 (unsigned long long)rec->start);
9004
9005         btrfs_release_path(&path);
9006         return ret;
9007 }
9008
9009 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9010                               struct extent_record *rec)
9011 {
9012         struct btrfs_trans_handle *trans;
9013         struct btrfs_root *root = fs_info->extent_root;
9014         struct btrfs_path path;
9015         struct btrfs_extent_item *ei;
9016         struct btrfs_key key;
9017         u64 flags;
9018         int ret = 0;
9019
9020         key.objectid = rec->start;
9021         if (rec->metadata) {
9022                 key.type = BTRFS_METADATA_ITEM_KEY;
9023                 key.offset = rec->info_level;
9024         } else {
9025                 key.type = BTRFS_EXTENT_ITEM_KEY;
9026                 key.offset = rec->max_size;
9027         }
9028
9029         trans = btrfs_start_transaction(root, 0);
9030         if (IS_ERR(trans))
9031                 return PTR_ERR(trans);
9032
9033         btrfs_init_path(&path);
9034         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9035         if (ret < 0) {
9036                 btrfs_release_path(&path);
9037                 btrfs_commit_transaction(trans, root);
9038                 return ret;
9039         } else if (ret) {
9040                 fprintf(stderr, "Didn't find extent for %llu\n",
9041                         (unsigned long long)rec->start);
9042                 btrfs_release_path(&path);
9043                 btrfs_commit_transaction(trans, root);
9044                 return -ENOENT;
9045         }
9046
9047         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9048                             struct btrfs_extent_item);
9049         flags = btrfs_extent_flags(path.nodes[0], ei);
9050         if (rec->flag_block_full_backref) {
9051                 fprintf(stderr, "setting full backref on %llu\n",
9052                         (unsigned long long)key.objectid);
9053                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9054         } else {
9055                 fprintf(stderr, "clearing full backref on %llu\n",
9056                         (unsigned long long)key.objectid);
9057                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9058         }
9059         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9060         btrfs_mark_buffer_dirty(path.nodes[0]);
9061         btrfs_release_path(&path);
9062         ret = btrfs_commit_transaction(trans, root);
9063         if (!ret)
9064                 fprintf(stderr, "Repaired extent flags for %llu\n",
9065                                 (unsigned long long)rec->start);
9066
9067         return ret;
9068 }
9069
9070 /* right now we only prune from the extent allocation tree */
9071 static int prune_one_block(struct btrfs_trans_handle *trans,
9072                            struct btrfs_fs_info *info,
9073                            struct btrfs_corrupt_block *corrupt)
9074 {
9075         int ret;
9076         struct btrfs_path path;
9077         struct extent_buffer *eb;
9078         u64 found;
9079         int slot;
9080         int nritems;
9081         int level = corrupt->level + 1;
9082
9083         btrfs_init_path(&path);
9084 again:
9085         /* we want to stop at the parent to our busted block */
9086         path.lowest_level = level;
9087
9088         ret = btrfs_search_slot(trans, info->extent_root,
9089                                 &corrupt->key, &path, -1, 1);
9090
9091         if (ret < 0)
9092                 goto out;
9093
9094         eb = path.nodes[level];
9095         if (!eb) {
9096                 ret = -ENOENT;
9097                 goto out;
9098         }
9099
9100         /*
9101          * hopefully the search gave us the block we want to prune,
9102          * lets try that first
9103          */
9104         slot = path.slots[level];
9105         found =  btrfs_node_blockptr(eb, slot);
9106         if (found == corrupt->cache.start)
9107                 goto del_ptr;
9108
9109         nritems = btrfs_header_nritems(eb);
9110
9111         /* the search failed, lets scan this node and hope we find it */
9112         for (slot = 0; slot < nritems; slot++) {
9113                 found =  btrfs_node_blockptr(eb, slot);
9114                 if (found == corrupt->cache.start)
9115                         goto del_ptr;
9116         }
9117         /*
9118          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9119          * to this block
9120          */
9121         if (eb == info->extent_root->node) {
9122                 ret = -ENOENT;
9123                 goto out;
9124         } else {
9125                 level++;
9126                 btrfs_release_path(&path);
9127                 goto again;
9128         }
9129
9130 del_ptr:
9131         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9132         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
9133
9134 out:
9135         btrfs_release_path(&path);
9136         return ret;
9137 }
9138
9139 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9140 {
9141         struct btrfs_trans_handle *trans = NULL;
9142         struct cache_extent *cache;
9143         struct btrfs_corrupt_block *corrupt;
9144
9145         while (1) {
9146                 cache = search_cache_extent(info->corrupt_blocks, 0);
9147                 if (!cache)
9148                         break;
9149                 if (!trans) {
9150                         trans = btrfs_start_transaction(info->extent_root, 1);
9151                         if (IS_ERR(trans))
9152                                 return PTR_ERR(trans);
9153                 }
9154                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9155                 prune_one_block(trans, info, corrupt);
9156                 remove_cache_extent(info->corrupt_blocks, cache);
9157         }
9158         if (trans)
9159                 return btrfs_commit_transaction(trans, info->extent_root);
9160         return 0;
9161 }
9162
9163 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9164 {
9165         struct btrfs_block_group_cache *cache;
9166         u64 start, end;
9167         int ret;
9168
9169         while (1) {
9170                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9171                                             &start, &end, EXTENT_DIRTY);
9172                 if (ret)
9173                         break;
9174                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9175                                    GFP_NOFS);
9176         }
9177
9178         start = 0;
9179         while (1) {
9180                 cache = btrfs_lookup_first_block_group(fs_info, start);
9181                 if (!cache)
9182                         break;
9183                 if (cache->cached)
9184                         cache->cached = 0;
9185                 start = cache->key.objectid + cache->key.offset;
9186         }
9187 }
9188
9189 static int check_extent_refs(struct btrfs_root *root,
9190                              struct cache_tree *extent_cache)
9191 {
9192         struct extent_record *rec;
9193         struct cache_extent *cache;
9194         int ret = 0;
9195         int had_dups = 0;
9196
9197         if (repair) {
9198                 /*
9199                  * if we're doing a repair, we have to make sure
9200                  * we don't allocate from the problem extents.
9201                  * In the worst case, this will be all the
9202                  * extents in the FS
9203                  */
9204                 cache = search_cache_extent(extent_cache, 0);
9205                 while(cache) {
9206                         rec = container_of(cache, struct extent_record, cache);
9207                         set_extent_dirty(root->fs_info->excluded_extents,
9208                                          rec->start,
9209                                          rec->start + rec->max_size - 1,
9210                                          GFP_NOFS);
9211                         cache = next_cache_extent(cache);
9212                 }
9213
9214                 /* pin down all the corrupted blocks too */
9215                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9216                 while(cache) {
9217                         set_extent_dirty(root->fs_info->excluded_extents,
9218                                          cache->start,
9219                                          cache->start + cache->size - 1,
9220                                          GFP_NOFS);
9221                         cache = next_cache_extent(cache);
9222                 }
9223                 prune_corrupt_blocks(root->fs_info);
9224                 reset_cached_block_groups(root->fs_info);
9225         }
9226
9227         reset_cached_block_groups(root->fs_info);
9228
9229         /*
9230          * We need to delete any duplicate entries we find first otherwise we
9231          * could mess up the extent tree when we have backrefs that actually
9232          * belong to a different extent item and not the weird duplicate one.
9233          */
9234         while (repair && !list_empty(&duplicate_extents)) {
9235                 rec = to_extent_record(duplicate_extents.next);
9236                 list_del_init(&rec->list);
9237
9238                 /* Sometimes we can find a backref before we find an actual
9239                  * extent, so we need to process it a little bit to see if there
9240                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9241                  * if this is a backref screwup.  If we need to delete stuff
9242                  * process_duplicates() will return 0, otherwise it will return
9243                  * 1 and we
9244                  */
9245                 if (process_duplicates(root, extent_cache, rec))
9246                         continue;
9247                 ret = delete_duplicate_records(root, rec);
9248                 if (ret < 0)
9249                         return ret;
9250                 /*
9251                  * delete_duplicate_records will return the number of entries
9252                  * deleted, so if it's greater than 0 then we know we actually
9253                  * did something and we need to remove.
9254                  */
9255                 if (ret)
9256                         had_dups = 1;
9257         }
9258
9259         if (had_dups)
9260                 return -EAGAIN;
9261
9262         while(1) {
9263                 int cur_err = 0;
9264                 int fix = 0;
9265
9266                 cache = search_cache_extent(extent_cache, 0);
9267                 if (!cache)
9268                         break;
9269                 rec = container_of(cache, struct extent_record, cache);
9270                 if (rec->num_duplicates) {
9271                         fprintf(stderr, "extent item %llu has multiple extent "
9272                                 "items\n", (unsigned long long)rec->start);
9273                         cur_err = 1;
9274                 }
9275
9276                 if (rec->refs != rec->extent_item_refs) {
9277                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9278                                 (unsigned long long)rec->start,
9279                                 (unsigned long long)rec->nr);
9280                         fprintf(stderr, "extent item %llu, found %llu\n",
9281                                 (unsigned long long)rec->extent_item_refs,
9282                                 (unsigned long long)rec->refs);
9283                         ret = record_orphan_data_extents(root->fs_info, rec);
9284                         if (ret < 0)
9285                                 goto repair_abort;
9286                         fix = ret;
9287                         cur_err = 1;
9288                 }
9289                 if (all_backpointers_checked(rec, 1)) {
9290                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9291                                 (unsigned long long)rec->start,
9292                                 (unsigned long long)rec->nr);
9293                         fix = 1;
9294                         cur_err = 1;
9295                 }
9296                 if (!rec->owner_ref_checked) {
9297                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9298                                 (unsigned long long)rec->start,
9299                                 (unsigned long long)rec->nr);
9300                         fix = 1;
9301                         cur_err = 1;
9302                 }
9303
9304                 if (repair && fix) {
9305                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9306                         if (ret)
9307                                 goto repair_abort;
9308                 }
9309
9310
9311                 if (rec->bad_full_backref) {
9312                         fprintf(stderr, "bad full backref, on [%llu]\n",
9313                                 (unsigned long long)rec->start);
9314                         if (repair) {
9315                                 ret = fixup_extent_flags(root->fs_info, rec);
9316                                 if (ret)
9317                                         goto repair_abort;
9318                                 fix = 1;
9319                         }
9320                         cur_err = 1;
9321                 }
9322                 /*
9323                  * Although it's not a extent ref's problem, we reuse this
9324                  * routine for error reporting.
9325                  * No repair function yet.
9326                  */
9327                 if (rec->crossing_stripes) {
9328                         fprintf(stderr,
9329                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9330                                 rec->start, rec->start + rec->max_size);
9331                         cur_err = 1;
9332                 }
9333
9334                 if (rec->wrong_chunk_type) {
9335                         fprintf(stderr,
9336                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9337                                 rec->start, rec->start + rec->max_size);
9338                         cur_err = 1;
9339                 }
9340
9341                 remove_cache_extent(extent_cache, cache);
9342                 free_all_extent_backrefs(rec);
9343                 if (!init_extent_tree && repair && (!cur_err || fix))
9344                         clear_extent_dirty(root->fs_info->excluded_extents,
9345                                            rec->start,
9346                                            rec->start + rec->max_size - 1,
9347                                            GFP_NOFS);
9348                 free(rec);
9349         }
9350 repair_abort:
9351         if (repair) {
9352                 if (ret && ret != -EAGAIN) {
9353                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9354                         exit(1);
9355                 } else if (!ret) {
9356                         struct btrfs_trans_handle *trans;
9357
9358                         root = root->fs_info->extent_root;
9359                         trans = btrfs_start_transaction(root, 1);
9360                         if (IS_ERR(trans)) {
9361                                 ret = PTR_ERR(trans);
9362                                 goto repair_abort;
9363                         }
9364
9365                         btrfs_fix_block_accounting(trans, root);
9366                         ret = btrfs_commit_transaction(trans, root);
9367                         if (ret)
9368                                 goto repair_abort;
9369                 }
9370                 return ret;
9371         }
9372         return 0;
9373 }
9374
9375 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9376 {
9377         u64 stripe_size;
9378
9379         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9380                 stripe_size = length;
9381                 stripe_size /= num_stripes;
9382         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9383                 stripe_size = length * 2;
9384                 stripe_size /= num_stripes;
9385         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9386                 stripe_size = length;
9387                 stripe_size /= (num_stripes - 1);
9388         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9389                 stripe_size = length;
9390                 stripe_size /= (num_stripes - 2);
9391         } else {
9392                 stripe_size = length;
9393         }
9394         return stripe_size;
9395 }
9396
9397 /*
9398  * Check the chunk with its block group/dev list ref:
9399  * Return 0 if all refs seems valid.
9400  * Return 1 if part of refs seems valid, need later check for rebuild ref
9401  * like missing block group and needs to search extent tree to rebuild them.
9402  * Return -1 if essential refs are missing and unable to rebuild.
9403  */
9404 static int check_chunk_refs(struct chunk_record *chunk_rec,
9405                             struct block_group_tree *block_group_cache,
9406                             struct device_extent_tree *dev_extent_cache,
9407                             int silent)
9408 {
9409         struct cache_extent *block_group_item;
9410         struct block_group_record *block_group_rec;
9411         struct cache_extent *dev_extent_item;
9412         struct device_extent_record *dev_extent_rec;
9413         u64 devid;
9414         u64 offset;
9415         u64 length;
9416         int metadump_v2 = 0;
9417         int i;
9418         int ret = 0;
9419
9420         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9421                                                chunk_rec->offset,
9422                                                chunk_rec->length);
9423         if (block_group_item) {
9424                 block_group_rec = container_of(block_group_item,
9425                                                struct block_group_record,
9426                                                cache);
9427                 if (chunk_rec->length != block_group_rec->offset ||
9428                     chunk_rec->offset != block_group_rec->objectid ||
9429                     (!metadump_v2 &&
9430                      chunk_rec->type_flags != block_group_rec->flags)) {
9431                         if (!silent)
9432                                 fprintf(stderr,
9433                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9434                                         chunk_rec->objectid,
9435                                         chunk_rec->type,
9436                                         chunk_rec->offset,
9437                                         chunk_rec->length,
9438                                         chunk_rec->offset,
9439                                         chunk_rec->type_flags,
9440                                         block_group_rec->objectid,
9441                                         block_group_rec->type,
9442                                         block_group_rec->offset,
9443                                         block_group_rec->offset,
9444                                         block_group_rec->objectid,
9445                                         block_group_rec->flags);
9446                         ret = -1;
9447                 } else {
9448                         list_del_init(&block_group_rec->list);
9449                         chunk_rec->bg_rec = block_group_rec;
9450                 }
9451         } else {
9452                 if (!silent)
9453                         fprintf(stderr,
9454                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9455                                 chunk_rec->objectid,
9456                                 chunk_rec->type,
9457                                 chunk_rec->offset,
9458                                 chunk_rec->length,
9459                                 chunk_rec->offset,
9460                                 chunk_rec->type_flags);
9461                 ret = 1;
9462         }
9463
9464         if (metadump_v2)
9465                 return ret;
9466
9467         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9468                                     chunk_rec->num_stripes);
9469         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9470                 devid = chunk_rec->stripes[i].devid;
9471                 offset = chunk_rec->stripes[i].offset;
9472                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9473                                                        devid, offset, length);
9474                 if (dev_extent_item) {
9475                         dev_extent_rec = container_of(dev_extent_item,
9476                                                 struct device_extent_record,
9477                                                 cache);
9478                         if (dev_extent_rec->objectid != devid ||
9479                             dev_extent_rec->offset != offset ||
9480                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9481                             dev_extent_rec->length != length) {
9482                                 if (!silent)
9483                                         fprintf(stderr,
9484                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9485                                                 chunk_rec->objectid,
9486                                                 chunk_rec->type,
9487                                                 chunk_rec->offset,
9488                                                 chunk_rec->stripes[i].devid,
9489                                                 chunk_rec->stripes[i].offset,
9490                                                 dev_extent_rec->objectid,
9491                                                 dev_extent_rec->offset,
9492                                                 dev_extent_rec->length);
9493                                 ret = -1;
9494                         } else {
9495                                 list_move(&dev_extent_rec->chunk_list,
9496                                           &chunk_rec->dextents);
9497                         }
9498                 } else {
9499                         if (!silent)
9500                                 fprintf(stderr,
9501                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9502                                         chunk_rec->objectid,
9503                                         chunk_rec->type,
9504                                         chunk_rec->offset,
9505                                         chunk_rec->stripes[i].devid,
9506                                         chunk_rec->stripes[i].offset);
9507                         ret = -1;
9508                 }
9509         }
9510         return ret;
9511 }
9512
9513 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9514 int check_chunks(struct cache_tree *chunk_cache,
9515                  struct block_group_tree *block_group_cache,
9516                  struct device_extent_tree *dev_extent_cache,
9517                  struct list_head *good, struct list_head *bad,
9518                  struct list_head *rebuild, int silent)
9519 {
9520         struct cache_extent *chunk_item;
9521         struct chunk_record *chunk_rec;
9522         struct block_group_record *bg_rec;
9523         struct device_extent_record *dext_rec;
9524         int err;
9525         int ret = 0;
9526
9527         chunk_item = first_cache_extent(chunk_cache);
9528         while (chunk_item) {
9529                 chunk_rec = container_of(chunk_item, struct chunk_record,
9530                                          cache);
9531                 err = check_chunk_refs(chunk_rec, block_group_cache,
9532                                        dev_extent_cache, silent);
9533                 if (err < 0)
9534                         ret = err;
9535                 if (err == 0 && good)
9536                         list_add_tail(&chunk_rec->list, good);
9537                 if (err > 0 && rebuild)
9538                         list_add_tail(&chunk_rec->list, rebuild);
9539                 if (err < 0 && bad)
9540                         list_add_tail(&chunk_rec->list, bad);
9541                 chunk_item = next_cache_extent(chunk_item);
9542         }
9543
9544         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9545                 if (!silent)
9546                         fprintf(stderr,
9547                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9548                                 bg_rec->objectid,
9549                                 bg_rec->offset,
9550                                 bg_rec->flags);
9551                 if (!ret)
9552                         ret = 1;
9553         }
9554
9555         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9556                             chunk_list) {
9557                 if (!silent)
9558                         fprintf(stderr,
9559                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9560                                 dext_rec->objectid,
9561                                 dext_rec->offset,
9562                                 dext_rec->length);
9563                 if (!ret)
9564                         ret = 1;
9565         }
9566         return ret;
9567 }
9568
9569
9570 static int check_device_used(struct device_record *dev_rec,
9571                              struct device_extent_tree *dext_cache)
9572 {
9573         struct cache_extent *cache;
9574         struct device_extent_record *dev_extent_rec;
9575         u64 total_byte = 0;
9576
9577         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9578         while (cache) {
9579                 dev_extent_rec = container_of(cache,
9580                                               struct device_extent_record,
9581                                               cache);
9582                 if (dev_extent_rec->objectid != dev_rec->devid)
9583                         break;
9584
9585                 list_del_init(&dev_extent_rec->device_list);
9586                 total_byte += dev_extent_rec->length;
9587                 cache = next_cache_extent(cache);
9588         }
9589
9590         if (total_byte != dev_rec->byte_used) {
9591                 fprintf(stderr,
9592                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9593                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9594                         dev_rec->type, dev_rec->offset);
9595                 return -1;
9596         } else {
9597                 return 0;
9598         }
9599 }
9600
9601 /* check btrfs_dev_item -> btrfs_dev_extent */
9602 static int check_devices(struct rb_root *dev_cache,
9603                          struct device_extent_tree *dev_extent_cache)
9604 {
9605         struct rb_node *dev_node;
9606         struct device_record *dev_rec;
9607         struct device_extent_record *dext_rec;
9608         int err;
9609         int ret = 0;
9610
9611         dev_node = rb_first(dev_cache);
9612         while (dev_node) {
9613                 dev_rec = container_of(dev_node, struct device_record, node);
9614                 err = check_device_used(dev_rec, dev_extent_cache);
9615                 if (err)
9616                         ret = err;
9617
9618                 dev_node = rb_next(dev_node);
9619         }
9620         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9621                             device_list) {
9622                 fprintf(stderr,
9623                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9624                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9625                 if (!ret)
9626                         ret = 1;
9627         }
9628         return ret;
9629 }
9630
9631 static int add_root_item_to_list(struct list_head *head,
9632                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9633                                   u8 level, u8 drop_level,
9634                                   int level_size, struct btrfs_key *drop_key)
9635 {
9636
9637         struct root_item_record *ri_rec;
9638         ri_rec = malloc(sizeof(*ri_rec));
9639         if (!ri_rec)
9640                 return -ENOMEM;
9641         ri_rec->bytenr = bytenr;
9642         ri_rec->objectid = objectid;
9643         ri_rec->level = level;
9644         ri_rec->level_size = level_size;
9645         ri_rec->drop_level = drop_level;
9646         ri_rec->last_snapshot = last_snapshot;
9647         if (drop_key)
9648                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9649         list_add_tail(&ri_rec->list, head);
9650
9651         return 0;
9652 }
9653
9654 static void free_root_item_list(struct list_head *list)
9655 {
9656         struct root_item_record *ri_rec;
9657
9658         while (!list_empty(list)) {
9659                 ri_rec = list_first_entry(list, struct root_item_record,
9660                                           list);
9661                 list_del_init(&ri_rec->list);
9662                 free(ri_rec);
9663         }
9664 }
9665
9666 static int deal_root_from_list(struct list_head *list,
9667                                struct btrfs_root *root,
9668                                struct block_info *bits,
9669                                int bits_nr,
9670                                struct cache_tree *pending,
9671                                struct cache_tree *seen,
9672                                struct cache_tree *reada,
9673                                struct cache_tree *nodes,
9674                                struct cache_tree *extent_cache,
9675                                struct cache_tree *chunk_cache,
9676                                struct rb_root *dev_cache,
9677                                struct block_group_tree *block_group_cache,
9678                                struct device_extent_tree *dev_extent_cache)
9679 {
9680         int ret = 0;
9681         u64 last;
9682
9683         while (!list_empty(list)) {
9684                 struct root_item_record *rec;
9685                 struct extent_buffer *buf;
9686                 rec = list_entry(list->next,
9687                                  struct root_item_record, list);
9688                 last = 0;
9689                 buf = read_tree_block(root->fs_info->tree_root,
9690                                       rec->bytenr, rec->level_size, 0);
9691                 if (!extent_buffer_uptodate(buf)) {
9692                         free_extent_buffer(buf);
9693                         ret = -EIO;
9694                         break;
9695                 }
9696                 ret = add_root_to_pending(buf, extent_cache, pending,
9697                                     seen, nodes, rec->objectid);
9698                 if (ret < 0)
9699                         break;
9700                 /*
9701                  * To rebuild extent tree, we need deal with snapshot
9702                  * one by one, otherwise we deal with node firstly which
9703                  * can maximize readahead.
9704                  */
9705                 while (1) {
9706                         ret = run_next_block(root, bits, bits_nr, &last,
9707                                              pending, seen, reada, nodes,
9708                                              extent_cache, chunk_cache,
9709                                              dev_cache, block_group_cache,
9710                                              dev_extent_cache, rec);
9711                         if (ret != 0)
9712                                 break;
9713                 }
9714                 free_extent_buffer(buf);
9715                 list_del(&rec->list);
9716                 free(rec);
9717                 if (ret < 0)
9718                         break;
9719         }
9720         while (ret >= 0) {
9721                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9722                                      reada, nodes, extent_cache, chunk_cache,
9723                                      dev_cache, block_group_cache,
9724                                      dev_extent_cache, NULL);
9725                 if (ret != 0) {
9726                         if (ret > 0)
9727                                 ret = 0;
9728                         break;
9729                 }
9730         }
9731         return ret;
9732 }
9733
9734 static int check_chunks_and_extents(struct btrfs_root *root)
9735 {
9736         struct rb_root dev_cache;
9737         struct cache_tree chunk_cache;
9738         struct block_group_tree block_group_cache;
9739         struct device_extent_tree dev_extent_cache;
9740         struct cache_tree extent_cache;
9741         struct cache_tree seen;
9742         struct cache_tree pending;
9743         struct cache_tree reada;
9744         struct cache_tree nodes;
9745         struct extent_io_tree excluded_extents;
9746         struct cache_tree corrupt_blocks;
9747         struct btrfs_path path;
9748         struct btrfs_key key;
9749         struct btrfs_key found_key;
9750         int ret, err = 0;
9751         struct block_info *bits;
9752         int bits_nr;
9753         struct extent_buffer *leaf;
9754         int slot;
9755         struct btrfs_root_item ri;
9756         struct list_head dropping_trees;
9757         struct list_head normal_trees;
9758         struct btrfs_root *root1;
9759         u64 objectid;
9760         u32 level_size;
9761         u8 level;
9762
9763         dev_cache = RB_ROOT;
9764         cache_tree_init(&chunk_cache);
9765         block_group_tree_init(&block_group_cache);
9766         device_extent_tree_init(&dev_extent_cache);
9767
9768         cache_tree_init(&extent_cache);
9769         cache_tree_init(&seen);
9770         cache_tree_init(&pending);
9771         cache_tree_init(&nodes);
9772         cache_tree_init(&reada);
9773         cache_tree_init(&corrupt_blocks);
9774         extent_io_tree_init(&excluded_extents);
9775         INIT_LIST_HEAD(&dropping_trees);
9776         INIT_LIST_HEAD(&normal_trees);
9777
9778         if (repair) {
9779                 root->fs_info->excluded_extents = &excluded_extents;
9780                 root->fs_info->fsck_extent_cache = &extent_cache;
9781                 root->fs_info->free_extent_hook = free_extent_hook;
9782                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9783         }
9784
9785         bits_nr = 1024;
9786         bits = malloc(bits_nr * sizeof(struct block_info));
9787         if (!bits) {
9788                 perror("malloc");
9789                 exit(1);
9790         }
9791
9792         if (ctx.progress_enabled) {
9793                 ctx.tp = TASK_EXTENTS;
9794                 task_start(ctx.info);
9795         }
9796
9797 again:
9798         root1 = root->fs_info->tree_root;
9799         level = btrfs_header_level(root1->node);
9800         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9801                                     root1->node->start, 0, level, 0,
9802                                     root1->nodesize, NULL);
9803         if (ret < 0)
9804                 goto out;
9805         root1 = root->fs_info->chunk_root;
9806         level = btrfs_header_level(root1->node);
9807         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9808                                     root1->node->start, 0, level, 0,
9809                                     root1->nodesize, NULL);
9810         if (ret < 0)
9811                 goto out;
9812         btrfs_init_path(&path);
9813         key.offset = 0;
9814         key.objectid = 0;
9815         key.type = BTRFS_ROOT_ITEM_KEY;
9816         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9817                                         &key, &path, 0, 0);
9818         if (ret < 0)
9819                 goto out;
9820         while(1) {
9821                 leaf = path.nodes[0];
9822                 slot = path.slots[0];
9823                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9824                         ret = btrfs_next_leaf(root, &path);
9825                         if (ret != 0)
9826                                 break;
9827                         leaf = path.nodes[0];
9828                         slot = path.slots[0];
9829                 }
9830                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9831                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9832                         unsigned long offset;
9833                         u64 last_snapshot;
9834
9835                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9836                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9837                         last_snapshot = btrfs_root_last_snapshot(&ri);
9838                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9839                                 level = btrfs_root_level(&ri);
9840                                 level_size = root->nodesize;
9841                                 ret = add_root_item_to_list(&normal_trees,
9842                                                 found_key.objectid,
9843                                                 btrfs_root_bytenr(&ri),
9844                                                 last_snapshot, level,
9845                                                 0, level_size, NULL);
9846                                 if (ret < 0)
9847                                         goto out;
9848                         } else {
9849                                 level = btrfs_root_level(&ri);
9850                                 level_size = root->nodesize;
9851                                 objectid = found_key.objectid;
9852                                 btrfs_disk_key_to_cpu(&found_key,
9853                                                       &ri.drop_progress);
9854                                 ret = add_root_item_to_list(&dropping_trees,
9855                                                 objectid,
9856                                                 btrfs_root_bytenr(&ri),
9857                                                 last_snapshot, level,
9858                                                 ri.drop_level,
9859                                                 level_size, &found_key);
9860                                 if (ret < 0)
9861                                         goto out;
9862                         }
9863                 }
9864                 path.slots[0]++;
9865         }
9866         btrfs_release_path(&path);
9867
9868         /*
9869          * check_block can return -EAGAIN if it fixes something, please keep
9870          * this in mind when dealing with return values from these functions, if
9871          * we get -EAGAIN we want to fall through and restart the loop.
9872          */
9873         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9874                                   &seen, &reada, &nodes, &extent_cache,
9875                                   &chunk_cache, &dev_cache, &block_group_cache,
9876                                   &dev_extent_cache);
9877         if (ret < 0) {
9878                 if (ret == -EAGAIN)
9879                         goto loop;
9880                 goto out;
9881         }
9882         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9883                                   &pending, &seen, &reada, &nodes,
9884                                   &extent_cache, &chunk_cache, &dev_cache,
9885                                   &block_group_cache, &dev_extent_cache);
9886         if (ret < 0) {
9887                 if (ret == -EAGAIN)
9888                         goto loop;
9889                 goto out;
9890         }
9891
9892         ret = check_chunks(&chunk_cache, &block_group_cache,
9893                            &dev_extent_cache, NULL, NULL, NULL, 0);
9894         if (ret) {
9895                 if (ret == -EAGAIN)
9896                         goto loop;
9897                 err = ret;
9898         }
9899
9900         ret = check_extent_refs(root, &extent_cache);
9901         if (ret < 0) {
9902                 if (ret == -EAGAIN)
9903                         goto loop;
9904                 goto out;
9905         }
9906
9907         ret = check_devices(&dev_cache, &dev_extent_cache);
9908         if (ret && err)
9909                 ret = err;
9910
9911 out:
9912         task_stop(ctx.info);
9913         if (repair) {
9914                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9915                 extent_io_tree_cleanup(&excluded_extents);
9916                 root->fs_info->fsck_extent_cache = NULL;
9917                 root->fs_info->free_extent_hook = NULL;
9918                 root->fs_info->corrupt_blocks = NULL;
9919                 root->fs_info->excluded_extents = NULL;
9920         }
9921         free(bits);
9922         free_chunk_cache_tree(&chunk_cache);
9923         free_device_cache_tree(&dev_cache);
9924         free_block_group_tree(&block_group_cache);
9925         free_device_extent_tree(&dev_extent_cache);
9926         free_extent_cache_tree(&seen);
9927         free_extent_cache_tree(&pending);
9928         free_extent_cache_tree(&reada);
9929         free_extent_cache_tree(&nodes);
9930         return ret;
9931 loop:
9932         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9933         free_extent_cache_tree(&seen);
9934         free_extent_cache_tree(&pending);
9935         free_extent_cache_tree(&reada);
9936         free_extent_cache_tree(&nodes);
9937         free_chunk_cache_tree(&chunk_cache);
9938         free_block_group_tree(&block_group_cache);
9939         free_device_cache_tree(&dev_cache);
9940         free_device_extent_tree(&dev_extent_cache);
9941         free_extent_record_cache(root->fs_info, &extent_cache);
9942         free_root_item_list(&normal_trees);
9943         free_root_item_list(&dropping_trees);
9944         extent_io_tree_cleanup(&excluded_extents);
9945         goto again;
9946 }
9947
9948 /*
9949  * Check backrefs of a tree block given by @bytenr or @eb.
9950  *
9951  * @root:       the root containing the @bytenr or @eb
9952  * @eb:         tree block extent buffer, can be NULL
9953  * @bytenr:     bytenr of the tree block to search
9954  * @level:      tree level of the tree block
9955  * @owner:      owner of the tree block
9956  *
9957  * Return >0 for any error found and output error message
9958  * Return 0 for no error found
9959  */
9960 static int check_tree_block_ref(struct btrfs_root *root,
9961                                 struct extent_buffer *eb, u64 bytenr,
9962                                 int level, u64 owner)
9963 {
9964         struct btrfs_key key;
9965         struct btrfs_root *extent_root = root->fs_info->extent_root;
9966         struct btrfs_path path;
9967         struct btrfs_extent_item *ei;
9968         struct btrfs_extent_inline_ref *iref;
9969         struct extent_buffer *leaf;
9970         unsigned long end;
9971         unsigned long ptr;
9972         int slot;
9973         int skinny_level;
9974         int type;
9975         u32 nodesize = root->nodesize;
9976         u32 item_size;
9977         u64 offset;
9978         int tree_reloc_root = 0;
9979         int found_ref = 0;
9980         int err = 0;
9981         int ret;
9982
9983         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9984             btrfs_header_bytenr(root->node) == bytenr)
9985                 tree_reloc_root = 1;
9986
9987         btrfs_init_path(&path);
9988         key.objectid = bytenr;
9989         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9990                 key.type = BTRFS_METADATA_ITEM_KEY;
9991         else
9992                 key.type = BTRFS_EXTENT_ITEM_KEY;
9993         key.offset = (u64)-1;
9994
9995         /* Search for the backref in extent tree */
9996         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9997         if (ret < 0) {
9998                 err |= BACKREF_MISSING;
9999                 goto out;
10000         }
10001         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10002         if (ret) {
10003                 err |= BACKREF_MISSING;
10004                 goto out;
10005         }
10006
10007         leaf = path.nodes[0];
10008         slot = path.slots[0];
10009         btrfs_item_key_to_cpu(leaf, &key, slot);
10010
10011         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10012
10013         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10014                 skinny_level = (int)key.offset;
10015                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10016         } else {
10017                 struct btrfs_tree_block_info *info;
10018
10019                 info = (struct btrfs_tree_block_info *)(ei + 1);
10020                 skinny_level = btrfs_tree_block_level(leaf, info);
10021                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10022         }
10023
10024         if (eb) {
10025                 u64 header_gen;
10026                 u64 extent_gen;
10027
10028                 if (!(btrfs_extent_flags(leaf, ei) &
10029                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10030                         error(
10031                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10032                                 key.objectid, nodesize,
10033                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10034                         err = BACKREF_MISMATCH;
10035                 }
10036                 header_gen = btrfs_header_generation(eb);
10037                 extent_gen = btrfs_extent_generation(leaf, ei);
10038                 if (header_gen != extent_gen) {
10039                         error(
10040         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10041                                 key.objectid, nodesize, header_gen,
10042                                 extent_gen);
10043                         err = BACKREF_MISMATCH;
10044                 }
10045                 if (level != skinny_level) {
10046                         error(
10047                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10048                                 key.objectid, nodesize, level, skinny_level);
10049                         err = BACKREF_MISMATCH;
10050                 }
10051                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10052                         error(
10053                         "extent[%llu %u] is referred by other roots than %llu",
10054                                 key.objectid, nodesize, root->objectid);
10055                         err = BACKREF_MISMATCH;
10056                 }
10057         }
10058
10059         /*
10060          * Iterate the extent/metadata item to find the exact backref
10061          */
10062         item_size = btrfs_item_size_nr(leaf, slot);
10063         ptr = (unsigned long)iref;
10064         end = (unsigned long)ei + item_size;
10065         while (ptr < end) {
10066                 iref = (struct btrfs_extent_inline_ref *)ptr;
10067                 type = btrfs_extent_inline_ref_type(leaf, iref);
10068                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10069
10070                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10071                         (offset == root->objectid || offset == owner)) {
10072                         found_ref = 1;
10073                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10074                         /*
10075                          * Backref of tree reloc root points to itself, no need
10076                          * to check backref any more.
10077                          */
10078                         if (tree_reloc_root)
10079                                 found_ref = 1;
10080                         else
10081                         /* Check if the backref points to valid referencer */
10082                                 found_ref = !check_tree_block_ref(root, NULL,
10083                                                 offset, level + 1, owner);
10084                 }
10085
10086                 if (found_ref)
10087                         break;
10088                 ptr += btrfs_extent_inline_ref_size(type);
10089         }
10090
10091         /*
10092          * Inlined extent item doesn't have what we need, check
10093          * TREE_BLOCK_REF_KEY
10094          */
10095         if (!found_ref) {
10096                 btrfs_release_path(&path);
10097                 key.objectid = bytenr;
10098                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10099                 key.offset = root->objectid;
10100
10101                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10102                 if (!ret)
10103                         found_ref = 1;
10104         }
10105         if (!found_ref)
10106                 err |= BACKREF_MISSING;
10107 out:
10108         btrfs_release_path(&path);
10109         if (eb && (err & BACKREF_MISSING))
10110                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10111                         bytenr, nodesize, owner, level);
10112         return err;
10113 }
10114
10115 /*
10116  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10117  *
10118  * Return >0 any error found and output error message
10119  * Return 0 for no error found
10120  */
10121 static int check_extent_data_item(struct btrfs_root *root,
10122                                   struct extent_buffer *eb, int slot)
10123 {
10124         struct btrfs_file_extent_item *fi;
10125         struct btrfs_path path;
10126         struct btrfs_root *extent_root = root->fs_info->extent_root;
10127         struct btrfs_key fi_key;
10128         struct btrfs_key dbref_key;
10129         struct extent_buffer *leaf;
10130         struct btrfs_extent_item *ei;
10131         struct btrfs_extent_inline_ref *iref;
10132         struct btrfs_extent_data_ref *dref;
10133         u64 owner;
10134         u64 file_extent_gen;
10135         u64 disk_bytenr;
10136         u64 disk_num_bytes;
10137         u64 extent_num_bytes;
10138         u64 extent_flags;
10139         u64 extent_gen;
10140         u32 item_size;
10141         unsigned long end;
10142         unsigned long ptr;
10143         int type;
10144         u64 ref_root;
10145         int found_dbackref = 0;
10146         int err = 0;
10147         int ret;
10148
10149         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10150         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10151         file_extent_gen = btrfs_file_extent_generation(eb, fi);
10152
10153         /* Nothing to check for hole and inline data extents */
10154         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10155             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10156                 return 0;
10157
10158         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10159         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10160         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10161
10162         /* Check unaligned disk_num_bytes and num_bytes */
10163         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10164                 error(
10165 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10166                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10167                         root->sectorsize);
10168                 err |= BYTES_UNALIGNED;
10169         } else {
10170                 data_bytes_allocated += disk_num_bytes;
10171         }
10172         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10173                 error(
10174 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10175                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10176                         root->sectorsize);
10177                 err |= BYTES_UNALIGNED;
10178         } else {
10179                 data_bytes_referenced += extent_num_bytes;
10180         }
10181         owner = btrfs_header_owner(eb);
10182
10183         /* Check the extent item of the file extent in extent tree */
10184         btrfs_init_path(&path);
10185         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10186         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10187         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10188
10189         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10190         if (ret) {
10191                 err |= BACKREF_MISSING;
10192                 goto error;
10193         }
10194
10195         leaf = path.nodes[0];
10196         slot = path.slots[0];
10197         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10198
10199         extent_flags = btrfs_extent_flags(leaf, ei);
10200         extent_gen = btrfs_extent_generation(leaf, ei);
10201
10202         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10203                 error(
10204                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10205                     disk_bytenr, disk_num_bytes,
10206                     BTRFS_EXTENT_FLAG_DATA);
10207                 err |= BACKREF_MISMATCH;
10208         }
10209
10210         if (file_extent_gen < extent_gen) {
10211                 error(
10212 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
10213                         disk_bytenr, disk_num_bytes, file_extent_gen,
10214                         extent_gen);
10215                 err |= BACKREF_MISMATCH;
10216         }
10217
10218         /* Check data backref inside that extent item */
10219         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10220         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10221         ptr = (unsigned long)iref;
10222         end = (unsigned long)ei + item_size;
10223         while (ptr < end) {
10224                 iref = (struct btrfs_extent_inline_ref *)ptr;
10225                 type = btrfs_extent_inline_ref_type(leaf, iref);
10226                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10227
10228                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10229                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10230                         if (ref_root == owner || ref_root == root->objectid)
10231                                 found_dbackref = 1;
10232                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10233                         found_dbackref = !check_tree_block_ref(root, NULL,
10234                                 btrfs_extent_inline_ref_offset(leaf, iref),
10235                                 0, owner);
10236                 }
10237
10238                 if (found_dbackref)
10239                         break;
10240                 ptr += btrfs_extent_inline_ref_size(type);
10241         }
10242
10243         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10244         if (!found_dbackref) {
10245                 btrfs_release_path(&path);
10246
10247                 btrfs_init_path(&path);
10248                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10249                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10250                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10251                                 fi_key.objectid, fi_key.offset);
10252
10253                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10254                                         &dbref_key, &path, 0, 0);
10255                 if (!ret)
10256                         found_dbackref = 1;
10257         }
10258
10259         if (!found_dbackref)
10260                 err |= BACKREF_MISSING;
10261 error:
10262         btrfs_release_path(&path);
10263         if (err & BACKREF_MISSING) {
10264                 error("data extent[%llu %llu] backref lost",
10265                       disk_bytenr, disk_num_bytes);
10266         }
10267         return err;
10268 }
10269
10270 /*
10271  * Get real tree block level for the case like shared block
10272  * Return >= 0 as tree level
10273  * Return <0 for error
10274  */
10275 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10276 {
10277         struct extent_buffer *eb;
10278         struct btrfs_path path;
10279         struct btrfs_key key;
10280         struct btrfs_extent_item *ei;
10281         u64 flags;
10282         u64 transid;
10283         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10284         u8 backref_level;
10285         u8 header_level;
10286         int ret;
10287
10288         /* Search extent tree for extent generation and level */
10289         key.objectid = bytenr;
10290         key.type = BTRFS_METADATA_ITEM_KEY;
10291         key.offset = (u64)-1;
10292
10293         btrfs_init_path(&path);
10294         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10295         if (ret < 0)
10296                 goto release_out;
10297         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10298         if (ret < 0)
10299                 goto release_out;
10300         if (ret > 0) {
10301                 ret = -ENOENT;
10302                 goto release_out;
10303         }
10304
10305         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10306         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10307                             struct btrfs_extent_item);
10308         flags = btrfs_extent_flags(path.nodes[0], ei);
10309         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10310                 ret = -ENOENT;
10311                 goto release_out;
10312         }
10313
10314         /* Get transid for later read_tree_block() check */
10315         transid = btrfs_extent_generation(path.nodes[0], ei);
10316
10317         /* Get backref level as one source */
10318         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10319                 backref_level = key.offset;
10320         } else {
10321                 struct btrfs_tree_block_info *info;
10322
10323                 info = (struct btrfs_tree_block_info *)(ei + 1);
10324                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10325         }
10326         btrfs_release_path(&path);
10327
10328         /* Get level from tree block as an alternative source */
10329         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10330         if (!extent_buffer_uptodate(eb)) {
10331                 free_extent_buffer(eb);
10332                 return -EIO;
10333         }
10334         header_level = btrfs_header_level(eb);
10335         free_extent_buffer(eb);
10336
10337         if (header_level != backref_level)
10338                 return -EIO;
10339         return header_level;
10340
10341 release_out:
10342         btrfs_release_path(&path);
10343         return ret;
10344 }
10345
10346 /*
10347  * Check if a tree block backref is valid (points to a valid tree block)
10348  * if level == -1, level will be resolved
10349  * Return >0 for any error found and print error message
10350  */
10351 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10352                                     u64 bytenr, int level)
10353 {
10354         struct btrfs_root *root;
10355         struct btrfs_key key;
10356         struct btrfs_path path;
10357         struct extent_buffer *eb;
10358         struct extent_buffer *node;
10359         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10360         int err = 0;
10361         int ret;
10362
10363         /* Query level for level == -1 special case */
10364         if (level == -1)
10365                 level = query_tree_block_level(fs_info, bytenr);
10366         if (level < 0) {
10367                 err |= REFERENCER_MISSING;
10368                 goto out;
10369         }
10370
10371         key.objectid = root_id;
10372         key.type = BTRFS_ROOT_ITEM_KEY;
10373         key.offset = (u64)-1;
10374
10375         root = btrfs_read_fs_root(fs_info, &key);
10376         if (IS_ERR(root)) {
10377                 err |= REFERENCER_MISSING;
10378                 goto out;
10379         }
10380
10381         /* Read out the tree block to get item/node key */
10382         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10383         if (!extent_buffer_uptodate(eb)) {
10384                 err |= REFERENCER_MISSING;
10385                 free_extent_buffer(eb);
10386                 goto out;
10387         }
10388
10389         /* Empty tree, no need to check key */
10390         if (!btrfs_header_nritems(eb) && !level) {
10391                 free_extent_buffer(eb);
10392                 goto out;
10393         }
10394
10395         if (level)
10396                 btrfs_node_key_to_cpu(eb, &key, 0);
10397         else
10398                 btrfs_item_key_to_cpu(eb, &key, 0);
10399
10400         free_extent_buffer(eb);
10401
10402         btrfs_init_path(&path);
10403         path.lowest_level = level;
10404         /* Search with the first key, to ensure we can reach it */
10405         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10406         if (ret < 0) {
10407                 err |= REFERENCER_MISSING;
10408                 goto release_out;
10409         }
10410
10411         node = path.nodes[level];
10412         if (btrfs_header_bytenr(node) != bytenr) {
10413                 error(
10414         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10415                         bytenr, nodesize, bytenr,
10416                         btrfs_header_bytenr(node));
10417                 err |= REFERENCER_MISMATCH;
10418         }
10419         if (btrfs_header_level(node) != level) {
10420                 error(
10421         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10422                         bytenr, nodesize, level,
10423                         btrfs_header_level(node));
10424                 err |= REFERENCER_MISMATCH;
10425         }
10426
10427 release_out:
10428         btrfs_release_path(&path);
10429 out:
10430         if (err & REFERENCER_MISSING) {
10431                 if (level < 0)
10432                         error("extent [%llu %d] lost referencer (owner: %llu)",
10433                                 bytenr, nodesize, root_id);
10434                 else
10435                         error(
10436                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10437                                 bytenr, nodesize, root_id, level);
10438         }
10439
10440         return err;
10441 }
10442
10443 /*
10444  * Check if tree block @eb is tree reloc root.
10445  * Return 0 if it's not or any problem happens
10446  * Return 1 if it's a tree reloc root
10447  */
10448 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10449                                  struct extent_buffer *eb)
10450 {
10451         struct btrfs_root *tree_reloc_root;
10452         struct btrfs_key key;
10453         u64 bytenr = btrfs_header_bytenr(eb);
10454         u64 owner = btrfs_header_owner(eb);
10455         int ret = 0;
10456
10457         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10458         key.offset = owner;
10459         key.type = BTRFS_ROOT_ITEM_KEY;
10460
10461         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10462         if (IS_ERR(tree_reloc_root))
10463                 return 0;
10464
10465         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10466                 ret = 1;
10467         btrfs_free_fs_root(tree_reloc_root);
10468         return ret;
10469 }
10470
10471 /*
10472  * Check referencer for shared block backref
10473  * If level == -1, this function will resolve the level.
10474  */
10475 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10476                                      u64 parent, u64 bytenr, int level)
10477 {
10478         struct extent_buffer *eb;
10479         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10480         u32 nr;
10481         int found_parent = 0;
10482         int i;
10483
10484         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10485         if (!extent_buffer_uptodate(eb))
10486                 goto out;
10487
10488         if (level == -1)
10489                 level = query_tree_block_level(fs_info, bytenr);
10490         if (level < 0)
10491                 goto out;
10492
10493         /* It's possible it's a tree reloc root */
10494         if (parent == bytenr) {
10495                 if (is_tree_reloc_root(fs_info, eb))
10496                         found_parent = 1;
10497                 goto out;
10498         }
10499
10500         if (level + 1 != btrfs_header_level(eb))
10501                 goto out;
10502
10503         nr = btrfs_header_nritems(eb);
10504         for (i = 0; i < nr; i++) {
10505                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10506                         found_parent = 1;
10507                         break;
10508                 }
10509         }
10510 out:
10511         free_extent_buffer(eb);
10512         if (!found_parent) {
10513                 error(
10514         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10515                         bytenr, nodesize, parent, level);
10516                 return REFERENCER_MISSING;
10517         }
10518         return 0;
10519 }
10520
10521 /*
10522  * Check referencer for normal (inlined) data ref
10523  * If len == 0, it will be resolved by searching in extent tree
10524  */
10525 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10526                                      u64 root_id, u64 objectid, u64 offset,
10527                                      u64 bytenr, u64 len, u32 count)
10528 {
10529         struct btrfs_root *root;
10530         struct btrfs_root *extent_root = fs_info->extent_root;
10531         struct btrfs_key key;
10532         struct btrfs_path path;
10533         struct extent_buffer *leaf;
10534         struct btrfs_file_extent_item *fi;
10535         u32 found_count = 0;
10536         int slot;
10537         int ret = 0;
10538
10539         if (!len) {
10540                 key.objectid = bytenr;
10541                 key.type = BTRFS_EXTENT_ITEM_KEY;
10542                 key.offset = (u64)-1;
10543
10544                 btrfs_init_path(&path);
10545                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10546                 if (ret < 0)
10547                         goto out;
10548                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10549                 if (ret)
10550                         goto out;
10551                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10552                 if (key.objectid != bytenr ||
10553                     key.type != BTRFS_EXTENT_ITEM_KEY)
10554                         goto out;
10555                 len = key.offset;
10556                 btrfs_release_path(&path);
10557         }
10558         key.objectid = root_id;
10559         key.type = BTRFS_ROOT_ITEM_KEY;
10560         key.offset = (u64)-1;
10561         btrfs_init_path(&path);
10562
10563         root = btrfs_read_fs_root(fs_info, &key);
10564         if (IS_ERR(root))
10565                 goto out;
10566
10567         key.objectid = objectid;
10568         key.type = BTRFS_EXTENT_DATA_KEY;
10569         /*
10570          * It can be nasty as data backref offset is
10571          * file offset - file extent offset, which is smaller or
10572          * equal to original backref offset.  The only special case is
10573          * overflow.  So we need to special check and do further search.
10574          */
10575         key.offset = offset & (1ULL << 63) ? 0 : offset;
10576
10577         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10578         if (ret < 0)
10579                 goto out;
10580
10581         /*
10582          * Search afterwards to get correct one
10583          * NOTE: As we must do a comprehensive check on the data backref to
10584          * make sure the dref count also matches, we must iterate all file
10585          * extents for that inode.
10586          */
10587         while (1) {
10588                 leaf = path.nodes[0];
10589                 slot = path.slots[0];
10590
10591                 btrfs_item_key_to_cpu(leaf, &key, slot);
10592                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10593                         break;
10594                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10595                 /*
10596                  * Except normal disk bytenr and disk num bytes, we still
10597                  * need to do extra check on dbackref offset as
10598                  * dbackref offset = file_offset - file_extent_offset
10599                  */
10600                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10601                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10602                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10603                     offset)
10604                         found_count++;
10605
10606                 ret = btrfs_next_item(root, &path);
10607                 if (ret)
10608                         break;
10609         }
10610 out:
10611         btrfs_release_path(&path);
10612         if (found_count != count) {
10613                 error(
10614 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10615                         bytenr, len, root_id, objectid, offset, count, found_count);
10616                 return REFERENCER_MISSING;
10617         }
10618         return 0;
10619 }
10620
10621 /*
10622  * Check if the referencer of a shared data backref exists
10623  */
10624 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10625                                      u64 parent, u64 bytenr)
10626 {
10627         struct extent_buffer *eb;
10628         struct btrfs_key key;
10629         struct btrfs_file_extent_item *fi;
10630         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10631         u32 nr;
10632         int found_parent = 0;
10633         int i;
10634
10635         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10636         if (!extent_buffer_uptodate(eb))
10637                 goto out;
10638
10639         nr = btrfs_header_nritems(eb);
10640         for (i = 0; i < nr; i++) {
10641                 btrfs_item_key_to_cpu(eb, &key, i);
10642                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10643                         continue;
10644
10645                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10646                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10647                         continue;
10648
10649                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10650                         found_parent = 1;
10651                         break;
10652                 }
10653         }
10654
10655 out:
10656         free_extent_buffer(eb);
10657         if (!found_parent) {
10658                 error("shared extent %llu referencer lost (parent: %llu)",
10659                         bytenr, parent);
10660                 return REFERENCER_MISSING;
10661         }
10662         return 0;
10663 }
10664
10665 /*
10666  * This function will check a given extent item, including its backref and
10667  * itself (like crossing stripe boundary and type)
10668  *
10669  * Since we don't use extent_record anymore, introduce new error bit
10670  */
10671 static int check_extent_item(struct btrfs_fs_info *fs_info,
10672                              struct extent_buffer *eb, int slot)
10673 {
10674         struct btrfs_extent_item *ei;
10675         struct btrfs_extent_inline_ref *iref;
10676         struct btrfs_extent_data_ref *dref;
10677         unsigned long end;
10678         unsigned long ptr;
10679         int type;
10680         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10681         u32 item_size = btrfs_item_size_nr(eb, slot);
10682         u64 flags;
10683         u64 offset;
10684         int metadata = 0;
10685         int level;
10686         struct btrfs_key key;
10687         int ret;
10688         int err = 0;
10689
10690         btrfs_item_key_to_cpu(eb, &key, slot);
10691         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10692                 bytes_used += key.offset;
10693         else
10694                 bytes_used += nodesize;
10695
10696         if (item_size < sizeof(*ei)) {
10697                 /*
10698                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10699                  * old thing when on disk format is still un-determined.
10700                  * No need to care about it anymore
10701                  */
10702                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10703                 return -ENOTTY;
10704         }
10705
10706         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10707         flags = btrfs_extent_flags(eb, ei);
10708
10709         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10710                 metadata = 1;
10711         if (metadata && check_crossing_stripes(global_info, key.objectid,
10712                                                eb->len)) {
10713                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10714                       key.objectid, key.objectid + nodesize);
10715                 err |= CROSSING_STRIPE_BOUNDARY;
10716         }
10717
10718         ptr = (unsigned long)(ei + 1);
10719
10720         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10721                 /* Old EXTENT_ITEM metadata */
10722                 struct btrfs_tree_block_info *info;
10723
10724                 info = (struct btrfs_tree_block_info *)ptr;
10725                 level = btrfs_tree_block_level(eb, info);
10726                 ptr += sizeof(struct btrfs_tree_block_info);
10727         } else {
10728                 /* New METADATA_ITEM */
10729                 level = key.offset;
10730         }
10731         end = (unsigned long)ei + item_size;
10732
10733         if (ptr >= end) {
10734                 err |= ITEM_SIZE_MISMATCH;
10735                 goto out;
10736         }
10737
10738         /* Now check every backref in this extent item */
10739 next:
10740         iref = (struct btrfs_extent_inline_ref *)ptr;
10741         type = btrfs_extent_inline_ref_type(eb, iref);
10742         offset = btrfs_extent_inline_ref_offset(eb, iref);
10743         switch (type) {
10744         case BTRFS_TREE_BLOCK_REF_KEY:
10745                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10746                                                level);
10747                 err |= ret;
10748                 break;
10749         case BTRFS_SHARED_BLOCK_REF_KEY:
10750                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10751                                                  level);
10752                 err |= ret;
10753                 break;
10754         case BTRFS_EXTENT_DATA_REF_KEY:
10755                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10756                 ret = check_extent_data_backref(fs_info,
10757                                 btrfs_extent_data_ref_root(eb, dref),
10758                                 btrfs_extent_data_ref_objectid(eb, dref),
10759                                 btrfs_extent_data_ref_offset(eb, dref),
10760                                 key.objectid, key.offset,
10761                                 btrfs_extent_data_ref_count(eb, dref));
10762                 err |= ret;
10763                 break;
10764         case BTRFS_SHARED_DATA_REF_KEY:
10765                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10766                 err |= ret;
10767                 break;
10768         default:
10769                 error("extent[%llu %d %llu] has unknown ref type: %d",
10770                         key.objectid, key.type, key.offset, type);
10771                 err |= UNKNOWN_TYPE;
10772                 goto out;
10773         }
10774
10775         ptr += btrfs_extent_inline_ref_size(type);
10776         if (ptr < end)
10777                 goto next;
10778
10779 out:
10780         return err;
10781 }
10782
10783 /*
10784  * Check if a dev extent item is referred correctly by its chunk
10785  */
10786 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10787                                  struct extent_buffer *eb, int slot)
10788 {
10789         struct btrfs_root *chunk_root = fs_info->chunk_root;
10790         struct btrfs_dev_extent *ptr;
10791         struct btrfs_path path;
10792         struct btrfs_key chunk_key;
10793         struct btrfs_key devext_key;
10794         struct btrfs_chunk *chunk;
10795         struct extent_buffer *l;
10796         int num_stripes;
10797         u64 length;
10798         int i;
10799         int found_chunk = 0;
10800         int ret;
10801
10802         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10803         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10804         length = btrfs_dev_extent_length(eb, ptr);
10805
10806         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10807         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10808         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10809
10810         btrfs_init_path(&path);
10811         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10812         if (ret)
10813                 goto out;
10814
10815         l = path.nodes[0];
10816         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10817         if (btrfs_chunk_length(l, chunk) != length)
10818                 goto out;
10819
10820         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10821         for (i = 0; i < num_stripes; i++) {
10822                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10823                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10824
10825                 if (devid == devext_key.objectid &&
10826                     offset == devext_key.offset) {
10827                         found_chunk = 1;
10828                         break;
10829                 }
10830         }
10831 out:
10832         btrfs_release_path(&path);
10833         if (!found_chunk) {
10834                 error(
10835                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10836                         devext_key.objectid, devext_key.offset, length);
10837                 return REFERENCER_MISSING;
10838         }
10839         return 0;
10840 }
10841
10842 /*
10843  * Check if the used space is correct with the dev item
10844  */
10845 static int check_dev_item(struct btrfs_fs_info *fs_info,
10846                           struct extent_buffer *eb, int slot)
10847 {
10848         struct btrfs_root *dev_root = fs_info->dev_root;
10849         struct btrfs_dev_item *dev_item;
10850         struct btrfs_path path;
10851         struct btrfs_key key;
10852         struct btrfs_dev_extent *ptr;
10853         u64 dev_id;
10854         u64 used;
10855         u64 total = 0;
10856         int ret;
10857
10858         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10859         dev_id = btrfs_device_id(eb, dev_item);
10860         used = btrfs_device_bytes_used(eb, dev_item);
10861
10862         key.objectid = dev_id;
10863         key.type = BTRFS_DEV_EXTENT_KEY;
10864         key.offset = 0;
10865
10866         btrfs_init_path(&path);
10867         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10868         if (ret < 0) {
10869                 btrfs_item_key_to_cpu(eb, &key, slot);
10870                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10871                         key.objectid, key.type, key.offset);
10872                 btrfs_release_path(&path);
10873                 return REFERENCER_MISSING;
10874         }
10875
10876         /* Iterate dev_extents to calculate the used space of a device */
10877         while (1) {
10878                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10879
10880                 if (key.objectid > dev_id)
10881                         break;
10882                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10883                         goto next;
10884
10885                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10886                                      struct btrfs_dev_extent);
10887                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10888 next:
10889                 ret = btrfs_next_item(dev_root, &path);
10890                 if (ret)
10891                         break;
10892         }
10893         btrfs_release_path(&path);
10894
10895         if (used != total) {
10896                 btrfs_item_key_to_cpu(eb, &key, slot);
10897                 error(
10898 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10899                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10900                         BTRFS_DEV_EXTENT_KEY, dev_id);
10901                 return ACCOUNTING_MISMATCH;
10902         }
10903         return 0;
10904 }
10905
10906 /*
10907  * Check a block group item with its referener (chunk) and its used space
10908  * with extent/metadata item
10909  */
10910 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10911                                   struct extent_buffer *eb, int slot)
10912 {
10913         struct btrfs_root *extent_root = fs_info->extent_root;
10914         struct btrfs_root *chunk_root = fs_info->chunk_root;
10915         struct btrfs_block_group_item *bi;
10916         struct btrfs_block_group_item bg_item;
10917         struct btrfs_path path;
10918         struct btrfs_key bg_key;
10919         struct btrfs_key chunk_key;
10920         struct btrfs_key extent_key;
10921         struct btrfs_chunk *chunk;
10922         struct extent_buffer *leaf;
10923         struct btrfs_extent_item *ei;
10924         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10925         u64 flags;
10926         u64 bg_flags;
10927         u64 used;
10928         u64 total = 0;
10929         int ret;
10930         int err = 0;
10931
10932         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10933         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10934         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10935         used = btrfs_block_group_used(&bg_item);
10936         bg_flags = btrfs_block_group_flags(&bg_item);
10937
10938         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10939         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10940         chunk_key.offset = bg_key.objectid;
10941
10942         btrfs_init_path(&path);
10943         /* Search for the referencer chunk */
10944         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10945         if (ret) {
10946                 error(
10947                 "block group[%llu %llu] did not find the related chunk item",
10948                         bg_key.objectid, bg_key.offset);
10949                 err |= REFERENCER_MISSING;
10950         } else {
10951                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10952                                         struct btrfs_chunk);
10953                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10954                                                 bg_key.offset) {
10955                         error(
10956         "block group[%llu %llu] related chunk item length does not match",
10957                                 bg_key.objectid, bg_key.offset);
10958                         err |= REFERENCER_MISMATCH;
10959                 }
10960         }
10961         btrfs_release_path(&path);
10962
10963         /* Search from the block group bytenr */
10964         extent_key.objectid = bg_key.objectid;
10965         extent_key.type = 0;
10966         extent_key.offset = 0;
10967
10968         btrfs_init_path(&path);
10969         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10970         if (ret < 0)
10971                 goto out;
10972
10973         /* Iterate extent tree to account used space */
10974         while (1) {
10975                 leaf = path.nodes[0];
10976                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10977                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10978                         break;
10979
10980                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10981                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10982                         goto next;
10983                 if (extent_key.objectid < bg_key.objectid)
10984                         goto next;
10985
10986                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10987                         total += nodesize;
10988                 else
10989                         total += extent_key.offset;
10990
10991                 ei = btrfs_item_ptr(leaf, path.slots[0],
10992                                     struct btrfs_extent_item);
10993                 flags = btrfs_extent_flags(leaf, ei);
10994                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10995                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10996                                 error(
10997                         "bad extent[%llu, %llu) type mismatch with chunk",
10998                                         extent_key.objectid,
10999                                         extent_key.objectid + extent_key.offset);
11000                                 err |= CHUNK_TYPE_MISMATCH;
11001                         }
11002                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11003                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11004                                     BTRFS_BLOCK_GROUP_METADATA))) {
11005                                 error(
11006                         "bad extent[%llu, %llu) type mismatch with chunk",
11007                                         extent_key.objectid,
11008                                         extent_key.objectid + nodesize);
11009                                 err |= CHUNK_TYPE_MISMATCH;
11010                         }
11011                 }
11012 next:
11013                 ret = btrfs_next_item(extent_root, &path);
11014                 if (ret)
11015                         break;
11016         }
11017
11018 out:
11019         btrfs_release_path(&path);
11020
11021         if (total != used) {
11022                 error(
11023                 "block group[%llu %llu] used %llu but extent items used %llu",
11024                         bg_key.objectid, bg_key.offset, used, total);
11025                 err |= ACCOUNTING_MISMATCH;
11026         }
11027         return err;
11028 }
11029
11030 /*
11031  * Check a chunk item.
11032  * Including checking all referred dev_extents and block group
11033  */
11034 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11035                             struct extent_buffer *eb, int slot)
11036 {
11037         struct btrfs_root *extent_root = fs_info->extent_root;
11038         struct btrfs_root *dev_root = fs_info->dev_root;
11039         struct btrfs_path path;
11040         struct btrfs_key chunk_key;
11041         struct btrfs_key bg_key;
11042         struct btrfs_key devext_key;
11043         struct btrfs_chunk *chunk;
11044         struct extent_buffer *leaf;
11045         struct btrfs_block_group_item *bi;
11046         struct btrfs_block_group_item bg_item;
11047         struct btrfs_dev_extent *ptr;
11048         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11049         u64 length;
11050         u64 chunk_end;
11051         u64 type;
11052         u64 profile;
11053         int num_stripes;
11054         u64 offset;
11055         u64 objectid;
11056         int i;
11057         int ret;
11058         int err = 0;
11059
11060         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11061         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11062         length = btrfs_chunk_length(eb, chunk);
11063         chunk_end = chunk_key.offset + length;
11064         if (!IS_ALIGNED(length, sectorsize)) {
11065                 error("chunk[%llu %llu) not aligned to %u",
11066                         chunk_key.offset, chunk_end, sectorsize);
11067                 err |= BYTES_UNALIGNED;
11068                 goto out;
11069         }
11070
11071         type = btrfs_chunk_type(eb, chunk);
11072         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11073         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11074                 error("chunk[%llu %llu) has no chunk type",
11075                         chunk_key.offset, chunk_end);
11076                 err |= UNKNOWN_TYPE;
11077         }
11078         if (profile && (profile & (profile - 1))) {
11079                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11080                         chunk_key.offset, chunk_end, profile);
11081                 err |= UNKNOWN_TYPE;
11082         }
11083
11084         bg_key.objectid = chunk_key.offset;
11085         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11086         bg_key.offset = length;
11087
11088         btrfs_init_path(&path);
11089         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11090         if (ret) {
11091                 error(
11092                 "chunk[%llu %llu) did not find the related block group item",
11093                         chunk_key.offset, chunk_end);
11094                 err |= REFERENCER_MISSING;
11095         } else{
11096                 leaf = path.nodes[0];
11097                 bi = btrfs_item_ptr(leaf, path.slots[0],
11098                                     struct btrfs_block_group_item);
11099                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11100                                    sizeof(bg_item));
11101                 if (btrfs_block_group_flags(&bg_item) != type) {
11102                         error(
11103 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11104                                 chunk_key.offset, chunk_end, type,
11105                                 btrfs_block_group_flags(&bg_item));
11106                         err |= REFERENCER_MISSING;
11107                 }
11108         }
11109
11110         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11111         for (i = 0; i < num_stripes; i++) {
11112                 btrfs_release_path(&path);
11113                 btrfs_init_path(&path);
11114                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11115                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11116                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11117
11118                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11119                                         0, 0);
11120                 if (ret)
11121                         goto not_match_dev;
11122
11123                 leaf = path.nodes[0];
11124                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11125                                      struct btrfs_dev_extent);
11126                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11127                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11128                 if (objectid != chunk_key.objectid ||
11129                     offset != chunk_key.offset ||
11130                     btrfs_dev_extent_length(leaf, ptr) != length)
11131                         goto not_match_dev;
11132                 continue;
11133 not_match_dev:
11134                 err |= BACKREF_MISSING;
11135                 error(
11136                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11137                         chunk_key.objectid, chunk_end, i);
11138                 continue;
11139         }
11140         btrfs_release_path(&path);
11141 out:
11142         return err;
11143 }
11144
11145 /*
11146  * Main entry function to check known items and update related accounting info
11147  */
11148 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11149 {
11150         struct btrfs_fs_info *fs_info = root->fs_info;
11151         struct btrfs_key key;
11152         int slot = 0;
11153         int type;
11154         struct btrfs_extent_data_ref *dref;
11155         int ret;
11156         int err = 0;
11157
11158 next:
11159         btrfs_item_key_to_cpu(eb, &key, slot);
11160         type = key.type;
11161
11162         switch (type) {
11163         case BTRFS_EXTENT_DATA_KEY:
11164                 ret = check_extent_data_item(root, eb, slot);
11165                 err |= ret;
11166                 break;
11167         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11168                 ret = check_block_group_item(fs_info, eb, slot);
11169                 err |= ret;
11170                 break;
11171         case BTRFS_DEV_ITEM_KEY:
11172                 ret = check_dev_item(fs_info, eb, slot);
11173                 err |= ret;
11174                 break;
11175         case BTRFS_CHUNK_ITEM_KEY:
11176                 ret = check_chunk_item(fs_info, eb, slot);
11177                 err |= ret;
11178                 break;
11179         case BTRFS_DEV_EXTENT_KEY:
11180                 ret = check_dev_extent_item(fs_info, eb, slot);
11181                 err |= ret;
11182                 break;
11183         case BTRFS_EXTENT_ITEM_KEY:
11184         case BTRFS_METADATA_ITEM_KEY:
11185                 ret = check_extent_item(fs_info, eb, slot);
11186                 err |= ret;
11187                 break;
11188         case BTRFS_EXTENT_CSUM_KEY:
11189                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11190                 break;
11191         case BTRFS_TREE_BLOCK_REF_KEY:
11192                 ret = check_tree_block_backref(fs_info, key.offset,
11193                                                key.objectid, -1);
11194                 err |= ret;
11195                 break;
11196         case BTRFS_EXTENT_DATA_REF_KEY:
11197                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11198                 ret = check_extent_data_backref(fs_info,
11199                                 btrfs_extent_data_ref_root(eb, dref),
11200                                 btrfs_extent_data_ref_objectid(eb, dref),
11201                                 btrfs_extent_data_ref_offset(eb, dref),
11202                                 key.objectid, 0,
11203                                 btrfs_extent_data_ref_count(eb, dref));
11204                 err |= ret;
11205                 break;
11206         case BTRFS_SHARED_BLOCK_REF_KEY:
11207                 ret = check_shared_block_backref(fs_info, key.offset,
11208                                                  key.objectid, -1);
11209                 err |= ret;
11210                 break;
11211         case BTRFS_SHARED_DATA_REF_KEY:
11212                 ret = check_shared_data_backref(fs_info, key.offset,
11213                                                 key.objectid);
11214                 err |= ret;
11215                 break;
11216         default:
11217                 break;
11218         }
11219
11220         if (++slot < btrfs_header_nritems(eb))
11221                 goto next;
11222
11223         return err;
11224 }
11225
11226 /*
11227  * Helper function for later fs/subvol tree check.  To determine if a tree
11228  * block should be checked.
11229  * This function will ensure only the direct referencer with lowest rootid to
11230  * check a fs/subvolume tree block.
11231  *
11232  * Backref check at extent tree would detect errors like missing subvolume
11233  * tree, so we can do aggressive check to reduce duplicated checks.
11234  */
11235 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11236 {
11237         struct btrfs_root *extent_root = root->fs_info->extent_root;
11238         struct btrfs_key key;
11239         struct btrfs_path path;
11240         struct extent_buffer *leaf;
11241         int slot;
11242         struct btrfs_extent_item *ei;
11243         unsigned long ptr;
11244         unsigned long end;
11245         int type;
11246         u32 item_size;
11247         u64 offset;
11248         struct btrfs_extent_inline_ref *iref;
11249         int ret;
11250
11251         btrfs_init_path(&path);
11252         key.objectid = btrfs_header_bytenr(eb);
11253         key.type = BTRFS_METADATA_ITEM_KEY;
11254         key.offset = (u64)-1;
11255
11256         /*
11257          * Any failure in backref resolving means we can't determine
11258          * whom the tree block belongs to.
11259          * So in that case, we need to check that tree block
11260          */
11261         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11262         if (ret < 0)
11263                 goto need_check;
11264
11265         ret = btrfs_previous_extent_item(extent_root, &path,
11266                                          btrfs_header_bytenr(eb));
11267         if (ret)
11268                 goto need_check;
11269
11270         leaf = path.nodes[0];
11271         slot = path.slots[0];
11272         btrfs_item_key_to_cpu(leaf, &key, slot);
11273         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11274
11275         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11276                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11277         } else {
11278                 struct btrfs_tree_block_info *info;
11279
11280                 info = (struct btrfs_tree_block_info *)(ei + 1);
11281                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11282         }
11283
11284         item_size = btrfs_item_size_nr(leaf, slot);
11285         ptr = (unsigned long)iref;
11286         end = (unsigned long)ei + item_size;
11287         while (ptr < end) {
11288                 iref = (struct btrfs_extent_inline_ref *)ptr;
11289                 type = btrfs_extent_inline_ref_type(leaf, iref);
11290                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11291
11292                 /*
11293                  * We only check the tree block if current root is
11294                  * the lowest referencer of it.
11295                  */
11296                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11297                     offset < root->objectid) {
11298                         btrfs_release_path(&path);
11299                         return 0;
11300                 }
11301
11302                 ptr += btrfs_extent_inline_ref_size(type);
11303         }
11304         /*
11305          * Normally we should also check keyed tree block ref, but that may be
11306          * very time consuming.  Inlined ref should already make us skip a lot
11307          * of refs now.  So skip search keyed tree block ref.
11308          */
11309
11310 need_check:
11311         btrfs_release_path(&path);
11312         return 1;
11313 }
11314
11315 /*
11316  * Traversal function for tree block. We will do:
11317  * 1) Skip shared fs/subvolume tree blocks
11318  * 2) Update related bytes accounting
11319  * 3) Pre-order traversal
11320  */
11321 static int traverse_tree_block(struct btrfs_root *root,
11322                                 struct extent_buffer *node)
11323 {
11324         struct extent_buffer *eb;
11325         struct btrfs_key key;
11326         struct btrfs_key drop_key;
11327         int level;
11328         u64 nr;
11329         int i;
11330         int err = 0;
11331         int ret;
11332
11333         /*
11334          * Skip shared fs/subvolume tree block, in that case they will
11335          * be checked by referencer with lowest rootid
11336          */
11337         if (is_fstree(root->objectid) && !should_check(root, node))
11338                 return 0;
11339
11340         /* Update bytes accounting */
11341         total_btree_bytes += node->len;
11342         if (fs_root_objectid(btrfs_header_owner(node)))
11343                 total_fs_tree_bytes += node->len;
11344         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11345                 total_extent_tree_bytes += node->len;
11346         if (!found_old_backref &&
11347             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11348             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11349             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11350                 found_old_backref = 1;
11351
11352         /* pre-order tranversal, check itself first */
11353         level = btrfs_header_level(node);
11354         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11355                                    btrfs_header_level(node),
11356                                    btrfs_header_owner(node));
11357         err |= ret;
11358         if (err)
11359                 error(
11360         "check %s failed root %llu bytenr %llu level %d, force continue check",
11361                         level ? "node":"leaf", root->objectid,
11362                         btrfs_header_bytenr(node), btrfs_header_level(node));
11363
11364         if (!level) {
11365                 btree_space_waste += btrfs_leaf_free_space(root, node);
11366                 ret = check_leaf_items(root, node);
11367                 err |= ret;
11368                 return err;
11369         }
11370
11371         nr = btrfs_header_nritems(node);
11372         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11373         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11374                 sizeof(struct btrfs_key_ptr);
11375
11376         /* Then check all its children */
11377         for (i = 0; i < nr; i++) {
11378                 u64 blocknr = btrfs_node_blockptr(node, i);
11379
11380                 btrfs_node_key_to_cpu(node, &key, i);
11381                 if (level == root->root_item.drop_level &&
11382                     is_dropped_key(&key, &drop_key))
11383                         continue;
11384
11385                 /*
11386                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11387                  * to call the function itself.
11388                  */
11389                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11390                 if (extent_buffer_uptodate(eb)) {
11391                         ret = traverse_tree_block(root, eb);
11392                         err |= ret;
11393                 }
11394                 free_extent_buffer(eb);
11395         }
11396
11397         return err;
11398 }
11399
11400 /*
11401  * Low memory usage version check_chunks_and_extents.
11402  */
11403 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11404 {
11405         struct btrfs_path path;
11406         struct btrfs_key key;
11407         struct btrfs_root *root1;
11408         struct btrfs_root *cur_root;
11409         int err = 0;
11410         int ret;
11411
11412         root1 = root->fs_info->chunk_root;
11413         ret = traverse_tree_block(root1, root1->node);
11414         err |= ret;
11415
11416         root1 = root->fs_info->tree_root;
11417         ret = traverse_tree_block(root1, root1->node);
11418         err |= ret;
11419
11420         btrfs_init_path(&path);
11421         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11422         key.offset = 0;
11423         key.type = BTRFS_ROOT_ITEM_KEY;
11424
11425         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11426         if (ret) {
11427                 error("cannot find extent treet in tree_root");
11428                 goto out;
11429         }
11430
11431         while (1) {
11432                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11433                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11434                         goto next;
11435                 key.offset = (u64)-1;
11436
11437                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11438                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11439                                         &key);
11440                 else
11441                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11442                 if (IS_ERR(cur_root) || !cur_root) {
11443                         error("failed to read tree: %lld", key.objectid);
11444                         goto next;
11445                 }
11446
11447                 ret = traverse_tree_block(cur_root, cur_root->node);
11448                 err |= ret;
11449
11450                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11451                         btrfs_free_fs_root(cur_root);
11452 next:
11453                 ret = btrfs_next_item(root1, &path);
11454                 if (ret)
11455                         goto out;
11456         }
11457
11458 out:
11459         btrfs_release_path(&path);
11460         return err;
11461 }
11462
11463 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11464                            struct btrfs_root *root, int overwrite)
11465 {
11466         struct extent_buffer *c;
11467         struct extent_buffer *old = root->node;
11468         int level;
11469         int ret;
11470         struct btrfs_disk_key disk_key = {0,0,0};
11471
11472         level = 0;
11473
11474         if (overwrite) {
11475                 c = old;
11476                 extent_buffer_get(c);
11477                 goto init;
11478         }
11479         c = btrfs_alloc_free_block(trans, root,
11480                                    root->nodesize,
11481                                    root->root_key.objectid,
11482                                    &disk_key, level, 0, 0);
11483         if (IS_ERR(c)) {
11484                 c = old;
11485                 extent_buffer_get(c);
11486                 overwrite = 1;
11487         }
11488 init:
11489         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11490         btrfs_set_header_level(c, level);
11491         btrfs_set_header_bytenr(c, c->start);
11492         btrfs_set_header_generation(c, trans->transid);
11493         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11494         btrfs_set_header_owner(c, root->root_key.objectid);
11495
11496         write_extent_buffer(c, root->fs_info->fsid,
11497                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11498
11499         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11500                             btrfs_header_chunk_tree_uuid(c),
11501                             BTRFS_UUID_SIZE);
11502
11503         btrfs_mark_buffer_dirty(c);
11504         /*
11505          * this case can happen in the following case:
11506          *
11507          * 1.overwrite previous root.
11508          *
11509          * 2.reinit reloc data root, this is because we skip pin
11510          * down reloc data tree before which means we can allocate
11511          * same block bytenr here.
11512          */
11513         if (old->start == c->start) {
11514                 btrfs_set_root_generation(&root->root_item,
11515                                           trans->transid);
11516                 root->root_item.level = btrfs_header_level(root->node);
11517                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11518                                         &root->root_key, &root->root_item);
11519                 if (ret) {
11520                         free_extent_buffer(c);
11521                         return ret;
11522                 }
11523         }
11524         free_extent_buffer(old);
11525         root->node = c;
11526         add_root_to_dirty_list(root);
11527         return 0;
11528 }
11529
11530 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11531                                 struct extent_buffer *eb, int tree_root)
11532 {
11533         struct extent_buffer *tmp;
11534         struct btrfs_root_item *ri;
11535         struct btrfs_key key;
11536         u64 bytenr;
11537         u32 nodesize;
11538         int level = btrfs_header_level(eb);
11539         int nritems;
11540         int ret;
11541         int i;
11542
11543         /*
11544          * If we have pinned this block before, don't pin it again.
11545          * This can not only avoid forever loop with broken filesystem
11546          * but also give us some speedups.
11547          */
11548         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11549                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11550                 return 0;
11551
11552         btrfs_pin_extent(fs_info, eb->start, eb->len);
11553
11554         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11555         nritems = btrfs_header_nritems(eb);
11556         for (i = 0; i < nritems; i++) {
11557                 if (level == 0) {
11558                         btrfs_item_key_to_cpu(eb, &key, i);
11559                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11560                                 continue;
11561                         /* Skip the extent root and reloc roots */
11562                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11563                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11564                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11565                                 continue;
11566                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11567                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11568
11569                         /*
11570                          * If at any point we start needing the real root we
11571                          * will have to build a stump root for the root we are
11572                          * in, but for now this doesn't actually use the root so
11573                          * just pass in extent_root.
11574                          */
11575                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11576                                               nodesize, 0);
11577                         if (!extent_buffer_uptodate(tmp)) {
11578                                 fprintf(stderr, "Error reading root block\n");
11579                                 return -EIO;
11580                         }
11581                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11582                         free_extent_buffer(tmp);
11583                         if (ret)
11584                                 return ret;
11585                 } else {
11586                         bytenr = btrfs_node_blockptr(eb, i);
11587
11588                         /* If we aren't the tree root don't read the block */
11589                         if (level == 1 && !tree_root) {
11590                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11591                                 continue;
11592                         }
11593
11594                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11595                                               nodesize, 0);
11596                         if (!extent_buffer_uptodate(tmp)) {
11597                                 fprintf(stderr, "Error reading tree block\n");
11598                                 return -EIO;
11599                         }
11600                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11601                         free_extent_buffer(tmp);
11602                         if (ret)
11603                                 return ret;
11604                 }
11605         }
11606
11607         return 0;
11608 }
11609
11610 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11611 {
11612         int ret;
11613
11614         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11615         if (ret)
11616                 return ret;
11617
11618         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11619 }
11620
11621 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11622 {
11623         struct btrfs_block_group_cache *cache;
11624         struct btrfs_path path;
11625         struct extent_buffer *leaf;
11626         struct btrfs_chunk *chunk;
11627         struct btrfs_key key;
11628         int ret;
11629         u64 start;
11630
11631         btrfs_init_path(&path);
11632         key.objectid = 0;
11633         key.type = BTRFS_CHUNK_ITEM_KEY;
11634         key.offset = 0;
11635         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11636         if (ret < 0) {
11637                 btrfs_release_path(&path);
11638                 return ret;
11639         }
11640
11641         /*
11642          * We do this in case the block groups were screwed up and had alloc
11643          * bits that aren't actually set on the chunks.  This happens with
11644          * restored images every time and could happen in real life I guess.
11645          */
11646         fs_info->avail_data_alloc_bits = 0;
11647         fs_info->avail_metadata_alloc_bits = 0;
11648         fs_info->avail_system_alloc_bits = 0;
11649
11650         /* First we need to create the in-memory block groups */
11651         while (1) {
11652                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11653                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11654                         if (ret < 0) {
11655                                 btrfs_release_path(&path);
11656                                 return ret;
11657                         }
11658                         if (ret) {
11659                                 ret = 0;
11660                                 break;
11661                         }
11662                 }
11663                 leaf = path.nodes[0];
11664                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11665                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11666                         path.slots[0]++;
11667                         continue;
11668                 }
11669
11670                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11671                 btrfs_add_block_group(fs_info, 0,
11672                                       btrfs_chunk_type(leaf, chunk),
11673                                       key.objectid, key.offset,
11674                                       btrfs_chunk_length(leaf, chunk));
11675                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11676                                  key.offset + btrfs_chunk_length(leaf, chunk),
11677                                  GFP_NOFS);
11678                 path.slots[0]++;
11679         }
11680         start = 0;
11681         while (1) {
11682                 cache = btrfs_lookup_first_block_group(fs_info, start);
11683                 if (!cache)
11684                         break;
11685                 cache->cached = 1;
11686                 start = cache->key.objectid + cache->key.offset;
11687         }
11688
11689         btrfs_release_path(&path);
11690         return 0;
11691 }
11692
11693 static int reset_balance(struct btrfs_trans_handle *trans,
11694                          struct btrfs_fs_info *fs_info)
11695 {
11696         struct btrfs_root *root = fs_info->tree_root;
11697         struct btrfs_path path;
11698         struct extent_buffer *leaf;
11699         struct btrfs_key key;
11700         int del_slot, del_nr = 0;
11701         int ret;
11702         int found = 0;
11703
11704         btrfs_init_path(&path);
11705         key.objectid = BTRFS_BALANCE_OBJECTID;
11706         key.type = BTRFS_BALANCE_ITEM_KEY;
11707         key.offset = 0;
11708         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11709         if (ret) {
11710                 if (ret > 0)
11711                         ret = 0;
11712                 if (!ret)
11713                         goto reinit_data_reloc;
11714                 else
11715                         goto out;
11716         }
11717
11718         ret = btrfs_del_item(trans, root, &path);
11719         if (ret)
11720                 goto out;
11721         btrfs_release_path(&path);
11722
11723         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11724         key.type = BTRFS_ROOT_ITEM_KEY;
11725         key.offset = 0;
11726         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11727         if (ret < 0)
11728                 goto out;
11729         while (1) {
11730                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11731                         if (!found)
11732                                 break;
11733
11734                         if (del_nr) {
11735                                 ret = btrfs_del_items(trans, root, &path,
11736                                                       del_slot, del_nr);
11737                                 del_nr = 0;
11738                                 if (ret)
11739                                         goto out;
11740                         }
11741                         key.offset++;
11742                         btrfs_release_path(&path);
11743
11744                         found = 0;
11745                         ret = btrfs_search_slot(trans, root, &key, &path,
11746                                                 -1, 1);
11747                         if (ret < 0)
11748                                 goto out;
11749                         continue;
11750                 }
11751                 found = 1;
11752                 leaf = path.nodes[0];
11753                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11754                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11755                         break;
11756                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11757                         path.slots[0]++;
11758                         continue;
11759                 }
11760                 if (!del_nr) {
11761                         del_slot = path.slots[0];
11762                         del_nr = 1;
11763                 } else {
11764                         del_nr++;
11765                 }
11766                 path.slots[0]++;
11767         }
11768
11769         if (del_nr) {
11770                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11771                 if (ret)
11772                         goto out;
11773         }
11774         btrfs_release_path(&path);
11775
11776 reinit_data_reloc:
11777         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11778         key.type = BTRFS_ROOT_ITEM_KEY;
11779         key.offset = (u64)-1;
11780         root = btrfs_read_fs_root(fs_info, &key);
11781         if (IS_ERR(root)) {
11782                 fprintf(stderr, "Error reading data reloc tree\n");
11783                 ret = PTR_ERR(root);
11784                 goto out;
11785         }
11786         record_root_in_trans(trans, root);
11787         ret = btrfs_fsck_reinit_root(trans, root, 0);
11788         if (ret)
11789                 goto out;
11790         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11791 out:
11792         btrfs_release_path(&path);
11793         return ret;
11794 }
11795
11796 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11797                               struct btrfs_fs_info *fs_info)
11798 {
11799         u64 start = 0;
11800         int ret;
11801
11802         /*
11803          * The only reason we don't do this is because right now we're just
11804          * walking the trees we find and pinning down their bytes, we don't look
11805          * at any of the leaves.  In order to do mixed groups we'd have to check
11806          * the leaves of any fs roots and pin down the bytes for any file
11807          * extents we find.  Not hard but why do it if we don't have to?
11808          */
11809         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11810                 fprintf(stderr, "We don't support re-initing the extent tree "
11811                         "for mixed block groups yet, please notify a btrfs "
11812                         "developer you want to do this so they can add this "
11813                         "functionality.\n");
11814                 return -EINVAL;
11815         }
11816
11817         /*
11818          * first we need to walk all of the trees except the extent tree and pin
11819          * down the bytes that are in use so we don't overwrite any existing
11820          * metadata.
11821          */
11822         ret = pin_metadata_blocks(fs_info);
11823         if (ret) {
11824                 fprintf(stderr, "error pinning down used bytes\n");
11825                 return ret;
11826         }
11827
11828         /*
11829          * Need to drop all the block groups since we're going to recreate all
11830          * of them again.
11831          */
11832         btrfs_free_block_groups(fs_info);
11833         ret = reset_block_groups(fs_info);
11834         if (ret) {
11835                 fprintf(stderr, "error resetting the block groups\n");
11836                 return ret;
11837         }
11838
11839         /* Ok we can allocate now, reinit the extent root */
11840         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11841         if (ret) {
11842                 fprintf(stderr, "extent root initialization failed\n");
11843                 /*
11844                  * When the transaction code is updated we should end the
11845                  * transaction, but for now progs only knows about commit so
11846                  * just return an error.
11847                  */
11848                 return ret;
11849         }
11850
11851         /*
11852          * Now we have all the in-memory block groups setup so we can make
11853          * allocations properly, and the metadata we care about is safe since we
11854          * pinned all of it above.
11855          */
11856         while (1) {
11857                 struct btrfs_block_group_cache *cache;
11858
11859                 cache = btrfs_lookup_first_block_group(fs_info, start);
11860                 if (!cache)
11861                         break;
11862                 start = cache->key.objectid + cache->key.offset;
11863                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11864                                         &cache->key, &cache->item,
11865                                         sizeof(cache->item));
11866                 if (ret) {
11867                         fprintf(stderr, "Error adding block group\n");
11868                         return ret;
11869                 }
11870                 btrfs_extent_post_op(trans, fs_info->extent_root);
11871         }
11872
11873         ret = reset_balance(trans, fs_info);
11874         if (ret)
11875                 fprintf(stderr, "error resetting the pending balance\n");
11876
11877         return ret;
11878 }
11879
11880 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11881 {
11882         struct btrfs_path path;
11883         struct btrfs_trans_handle *trans;
11884         struct btrfs_key key;
11885         int ret;
11886
11887         printf("Recowing metadata block %llu\n", eb->start);
11888         key.objectid = btrfs_header_owner(eb);
11889         key.type = BTRFS_ROOT_ITEM_KEY;
11890         key.offset = (u64)-1;
11891
11892         root = btrfs_read_fs_root(root->fs_info, &key);
11893         if (IS_ERR(root)) {
11894                 fprintf(stderr, "Couldn't find owner root %llu\n",
11895                         key.objectid);
11896                 return PTR_ERR(root);
11897         }
11898
11899         trans = btrfs_start_transaction(root, 1);
11900         if (IS_ERR(trans))
11901                 return PTR_ERR(trans);
11902
11903         btrfs_init_path(&path);
11904         path.lowest_level = btrfs_header_level(eb);
11905         if (path.lowest_level)
11906                 btrfs_node_key_to_cpu(eb, &key, 0);
11907         else
11908                 btrfs_item_key_to_cpu(eb, &key, 0);
11909
11910         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11911         btrfs_commit_transaction(trans, root);
11912         btrfs_release_path(&path);
11913         return ret;
11914 }
11915
11916 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11917 {
11918         struct btrfs_path path;
11919         struct btrfs_trans_handle *trans;
11920         struct btrfs_key key;
11921         int ret;
11922
11923         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11924                bad->key.type, bad->key.offset);
11925         key.objectid = bad->root_id;
11926         key.type = BTRFS_ROOT_ITEM_KEY;
11927         key.offset = (u64)-1;
11928
11929         root = btrfs_read_fs_root(root->fs_info, &key);
11930         if (IS_ERR(root)) {
11931                 fprintf(stderr, "Couldn't find owner root %llu\n",
11932                         key.objectid);
11933                 return PTR_ERR(root);
11934         }
11935
11936         trans = btrfs_start_transaction(root, 1);
11937         if (IS_ERR(trans))
11938                 return PTR_ERR(trans);
11939
11940         btrfs_init_path(&path);
11941         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11942         if (ret) {
11943                 if (ret > 0)
11944                         ret = 0;
11945                 goto out;
11946         }
11947         ret = btrfs_del_item(trans, root, &path);
11948 out:
11949         btrfs_commit_transaction(trans, root);
11950         btrfs_release_path(&path);
11951         return ret;
11952 }
11953
11954 static int zero_log_tree(struct btrfs_root *root)
11955 {
11956         struct btrfs_trans_handle *trans;
11957         int ret;
11958
11959         trans = btrfs_start_transaction(root, 1);
11960         if (IS_ERR(trans)) {
11961                 ret = PTR_ERR(trans);
11962                 return ret;
11963         }
11964         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11965         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11966         ret = btrfs_commit_transaction(trans, root);
11967         return ret;
11968 }
11969
11970 static int populate_csum(struct btrfs_trans_handle *trans,
11971                          struct btrfs_root *csum_root, char *buf, u64 start,
11972                          u64 len)
11973 {
11974         u64 offset = 0;
11975         u64 sectorsize;
11976         int ret = 0;
11977
11978         while (offset < len) {
11979                 sectorsize = csum_root->sectorsize;
11980                 ret = read_extent_data(csum_root, buf, start + offset,
11981                                        &sectorsize, 0);
11982                 if (ret)
11983                         break;
11984                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11985                                             start + offset, buf, sectorsize);
11986                 if (ret)
11987                         break;
11988                 offset += sectorsize;
11989         }
11990         return ret;
11991 }
11992
11993 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11994                                       struct btrfs_root *csum_root,
11995                                       struct btrfs_root *cur_root)
11996 {
11997         struct btrfs_path path;
11998         struct btrfs_key key;
11999         struct extent_buffer *node;
12000         struct btrfs_file_extent_item *fi;
12001         char *buf = NULL;
12002         u64 start = 0;
12003         u64 len = 0;
12004         int slot = 0;
12005         int ret = 0;
12006
12007         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12008         if (!buf)
12009                 return -ENOMEM;
12010
12011         btrfs_init_path(&path);
12012         key.objectid = 0;
12013         key.offset = 0;
12014         key.type = 0;
12015         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12016         if (ret < 0)
12017                 goto out;
12018         /* Iterate all regular file extents and fill its csum */
12019         while (1) {
12020                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12021
12022                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12023                         goto next;
12024                 node = path.nodes[0];
12025                 slot = path.slots[0];
12026                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12027                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12028                         goto next;
12029                 start = btrfs_file_extent_disk_bytenr(node, fi);
12030                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12031
12032                 ret = populate_csum(trans, csum_root, buf, start, len);
12033                 if (ret == -EEXIST)
12034                         ret = 0;
12035                 if (ret < 0)
12036                         goto out;
12037 next:
12038                 /*
12039                  * TODO: if next leaf is corrupted, jump to nearest next valid
12040                  * leaf.
12041                  */
12042                 ret = btrfs_next_item(cur_root, &path);
12043                 if (ret < 0)
12044                         goto out;
12045                 if (ret > 0) {
12046                         ret = 0;
12047                         goto out;
12048                 }
12049         }
12050
12051 out:
12052         btrfs_release_path(&path);
12053         free(buf);
12054         return ret;
12055 }
12056
12057 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12058                                   struct btrfs_root *csum_root)
12059 {
12060         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12061         struct btrfs_path path;
12062         struct btrfs_root *tree_root = fs_info->tree_root;
12063         struct btrfs_root *cur_root;
12064         struct extent_buffer *node;
12065         struct btrfs_key key;
12066         int slot = 0;
12067         int ret = 0;
12068
12069         btrfs_init_path(&path);
12070         key.objectid = BTRFS_FS_TREE_OBJECTID;
12071         key.offset = 0;
12072         key.type = BTRFS_ROOT_ITEM_KEY;
12073         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12074         if (ret < 0)
12075                 goto out;
12076         if (ret > 0) {
12077                 ret = -ENOENT;
12078                 goto out;
12079         }
12080
12081         while (1) {
12082                 node = path.nodes[0];
12083                 slot = path.slots[0];
12084                 btrfs_item_key_to_cpu(node, &key, slot);
12085                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12086                         goto out;
12087                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12088                         goto next;
12089                 if (!is_fstree(key.objectid))
12090                         goto next;
12091                 key.offset = (u64)-1;
12092
12093                 cur_root = btrfs_read_fs_root(fs_info, &key);
12094                 if (IS_ERR(cur_root) || !cur_root) {
12095                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12096                                 key.objectid);
12097                         goto out;
12098                 }
12099                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12100                                 cur_root);
12101                 if (ret < 0)
12102                         goto out;
12103 next:
12104                 ret = btrfs_next_item(tree_root, &path);
12105                 if (ret > 0) {
12106                         ret = 0;
12107                         goto out;
12108                 }
12109                 if (ret < 0)
12110                         goto out;
12111         }
12112
12113 out:
12114         btrfs_release_path(&path);
12115         return ret;
12116 }
12117
12118 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12119                                       struct btrfs_root *csum_root)
12120 {
12121         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12122         struct btrfs_path path;
12123         struct btrfs_extent_item *ei;
12124         struct extent_buffer *leaf;
12125         char *buf;
12126         struct btrfs_key key;
12127         int ret;
12128
12129         btrfs_init_path(&path);
12130         key.objectid = 0;
12131         key.type = BTRFS_EXTENT_ITEM_KEY;
12132         key.offset = 0;
12133         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12134         if (ret < 0) {
12135                 btrfs_release_path(&path);
12136                 return ret;
12137         }
12138
12139         buf = malloc(csum_root->sectorsize);
12140         if (!buf) {
12141                 btrfs_release_path(&path);
12142                 return -ENOMEM;
12143         }
12144
12145         while (1) {
12146                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12147                         ret = btrfs_next_leaf(extent_root, &path);
12148                         if (ret < 0)
12149                                 break;
12150                         if (ret) {
12151                                 ret = 0;
12152                                 break;
12153                         }
12154                 }
12155                 leaf = path.nodes[0];
12156
12157                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12158                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12159                         path.slots[0]++;
12160                         continue;
12161                 }
12162
12163                 ei = btrfs_item_ptr(leaf, path.slots[0],
12164                                     struct btrfs_extent_item);
12165                 if (!(btrfs_extent_flags(leaf, ei) &
12166                       BTRFS_EXTENT_FLAG_DATA)) {
12167                         path.slots[0]++;
12168                         continue;
12169                 }
12170
12171                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12172                                     key.offset);
12173                 if (ret)
12174                         break;
12175                 path.slots[0]++;
12176         }
12177
12178         btrfs_release_path(&path);
12179         free(buf);
12180         return ret;
12181 }
12182
12183 /*
12184  * Recalculate the csum and put it into the csum tree.
12185  *
12186  * Extent tree init will wipe out all the extent info, so in that case, we
12187  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12188  * will use fs/subvol trees to init the csum tree.
12189  */
12190 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12191                           struct btrfs_root *csum_root,
12192                           int search_fs_tree)
12193 {
12194         if (search_fs_tree)
12195                 return fill_csum_tree_from_fs(trans, csum_root);
12196         else
12197                 return fill_csum_tree_from_extent(trans, csum_root);
12198 }
12199
12200 static void free_roots_info_cache(void)
12201 {
12202         if (!roots_info_cache)
12203                 return;
12204
12205         while (!cache_tree_empty(roots_info_cache)) {
12206                 struct cache_extent *entry;
12207                 struct root_item_info *rii;
12208
12209                 entry = first_cache_extent(roots_info_cache);
12210                 if (!entry)
12211                         break;
12212                 remove_cache_extent(roots_info_cache, entry);
12213                 rii = container_of(entry, struct root_item_info, cache_extent);
12214                 free(rii);
12215         }
12216
12217         free(roots_info_cache);
12218         roots_info_cache = NULL;
12219 }
12220
12221 static int build_roots_info_cache(struct btrfs_fs_info *info)
12222 {
12223         int ret = 0;
12224         struct btrfs_key key;
12225         struct extent_buffer *leaf;
12226         struct btrfs_path path;
12227
12228         if (!roots_info_cache) {
12229                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12230                 if (!roots_info_cache)
12231                         return -ENOMEM;
12232                 cache_tree_init(roots_info_cache);
12233         }
12234
12235         btrfs_init_path(&path);
12236         key.objectid = 0;
12237         key.type = BTRFS_EXTENT_ITEM_KEY;
12238         key.offset = 0;
12239         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12240         if (ret < 0)
12241                 goto out;
12242         leaf = path.nodes[0];
12243
12244         while (1) {
12245                 struct btrfs_key found_key;
12246                 struct btrfs_extent_item *ei;
12247                 struct btrfs_extent_inline_ref *iref;
12248                 int slot = path.slots[0];
12249                 int type;
12250                 u64 flags;
12251                 u64 root_id;
12252                 u8 level;
12253                 struct cache_extent *entry;
12254                 struct root_item_info *rii;
12255
12256                 if (slot >= btrfs_header_nritems(leaf)) {
12257                         ret = btrfs_next_leaf(info->extent_root, &path);
12258                         if (ret < 0) {
12259                                 break;
12260                         } else if (ret) {
12261                                 ret = 0;
12262                                 break;
12263                         }
12264                         leaf = path.nodes[0];
12265                         slot = path.slots[0];
12266                 }
12267
12268                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12269
12270                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12271                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12272                         goto next;
12273
12274                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12275                 flags = btrfs_extent_flags(leaf, ei);
12276
12277                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12278                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12279                         goto next;
12280
12281                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12282                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12283                         level = found_key.offset;
12284                 } else {
12285                         struct btrfs_tree_block_info *binfo;
12286
12287                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12288                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12289                         level = btrfs_tree_block_level(leaf, binfo);
12290                 }
12291
12292                 /*
12293                  * For a root extent, it must be of the following type and the
12294                  * first (and only one) iref in the item.
12295                  */
12296                 type = btrfs_extent_inline_ref_type(leaf, iref);
12297                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12298                         goto next;
12299
12300                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12301                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12302                 if (!entry) {
12303                         rii = malloc(sizeof(struct root_item_info));
12304                         if (!rii) {
12305                                 ret = -ENOMEM;
12306                                 goto out;
12307                         }
12308                         rii->cache_extent.start = root_id;
12309                         rii->cache_extent.size = 1;
12310                         rii->level = (u8)-1;
12311                         entry = &rii->cache_extent;
12312                         ret = insert_cache_extent(roots_info_cache, entry);
12313                         ASSERT(ret == 0);
12314                 } else {
12315                         rii = container_of(entry, struct root_item_info,
12316                                            cache_extent);
12317                 }
12318
12319                 ASSERT(rii->cache_extent.start == root_id);
12320                 ASSERT(rii->cache_extent.size == 1);
12321
12322                 if (level > rii->level || rii->level == (u8)-1) {
12323                         rii->level = level;
12324                         rii->bytenr = found_key.objectid;
12325                         rii->gen = btrfs_extent_generation(leaf, ei);
12326                         rii->node_count = 1;
12327                 } else if (level == rii->level) {
12328                         rii->node_count++;
12329                 }
12330 next:
12331                 path.slots[0]++;
12332         }
12333
12334 out:
12335         btrfs_release_path(&path);
12336
12337         return ret;
12338 }
12339
12340 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12341                                   struct btrfs_path *path,
12342                                   const struct btrfs_key *root_key,
12343                                   const int read_only_mode)
12344 {
12345         const u64 root_id = root_key->objectid;
12346         struct cache_extent *entry;
12347         struct root_item_info *rii;
12348         struct btrfs_root_item ri;
12349         unsigned long offset;
12350
12351         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12352         if (!entry) {
12353                 fprintf(stderr,
12354                         "Error: could not find extent items for root %llu\n",
12355                         root_key->objectid);
12356                 return -ENOENT;
12357         }
12358
12359         rii = container_of(entry, struct root_item_info, cache_extent);
12360         ASSERT(rii->cache_extent.start == root_id);
12361         ASSERT(rii->cache_extent.size == 1);
12362
12363         if (rii->node_count != 1) {
12364                 fprintf(stderr,
12365                         "Error: could not find btree root extent for root %llu\n",
12366                         root_id);
12367                 return -ENOENT;
12368         }
12369
12370         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12371         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12372
12373         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12374             btrfs_root_level(&ri) != rii->level ||
12375             btrfs_root_generation(&ri) != rii->gen) {
12376
12377                 /*
12378                  * If we're in repair mode but our caller told us to not update
12379                  * the root item, i.e. just check if it needs to be updated, don't
12380                  * print this message, since the caller will call us again shortly
12381                  * for the same root item without read only mode (the caller will
12382                  * open a transaction first).
12383                  */
12384                 if (!(read_only_mode && repair))
12385                         fprintf(stderr,
12386                                 "%sroot item for root %llu,"
12387                                 " current bytenr %llu, current gen %llu, current level %u,"
12388                                 " new bytenr %llu, new gen %llu, new level %u\n",
12389                                 (read_only_mode ? "" : "fixing "),
12390                                 root_id,
12391                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12392                                 btrfs_root_level(&ri),
12393                                 rii->bytenr, rii->gen, rii->level);
12394
12395                 if (btrfs_root_generation(&ri) > rii->gen) {
12396                         fprintf(stderr,
12397                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12398                                 root_id, btrfs_root_generation(&ri), rii->gen);
12399                         return -EINVAL;
12400                 }
12401
12402                 if (!read_only_mode) {
12403                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12404                         btrfs_set_root_level(&ri, rii->level);
12405                         btrfs_set_root_generation(&ri, rii->gen);
12406                         write_extent_buffer(path->nodes[0], &ri,
12407                                             offset, sizeof(ri));
12408                 }
12409
12410                 return 1;
12411         }
12412
12413         return 0;
12414 }
12415
12416 /*
12417  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12418  * caused read-only snapshots to be corrupted if they were created at a moment
12419  * when the source subvolume/snapshot had orphan items. The issue was that the
12420  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12421  * node instead of the post orphan cleanup root node.
12422  * So this function, and its callees, just detects and fixes those cases. Even
12423  * though the regression was for read-only snapshots, this function applies to
12424  * any snapshot/subvolume root.
12425  * This must be run before any other repair code - not doing it so, makes other
12426  * repair code delete or modify backrefs in the extent tree for example, which
12427  * will result in an inconsistent fs after repairing the root items.
12428  */
12429 static int repair_root_items(struct btrfs_fs_info *info)
12430 {
12431         struct btrfs_path path;
12432         struct btrfs_key key;
12433         struct extent_buffer *leaf;
12434         struct btrfs_trans_handle *trans = NULL;
12435         int ret = 0;
12436         int bad_roots = 0;
12437         int need_trans = 0;
12438
12439         btrfs_init_path(&path);
12440
12441         ret = build_roots_info_cache(info);
12442         if (ret)
12443                 goto out;
12444
12445         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12446         key.type = BTRFS_ROOT_ITEM_KEY;
12447         key.offset = 0;
12448
12449 again:
12450         /*
12451          * Avoid opening and committing transactions if a leaf doesn't have
12452          * any root items that need to be fixed, so that we avoid rotating
12453          * backup roots unnecessarily.
12454          */
12455         if (need_trans) {
12456                 trans = btrfs_start_transaction(info->tree_root, 1);
12457                 if (IS_ERR(trans)) {
12458                         ret = PTR_ERR(trans);
12459                         goto out;
12460                 }
12461         }
12462
12463         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12464                                 0, trans ? 1 : 0);
12465         if (ret < 0)
12466                 goto out;
12467         leaf = path.nodes[0];
12468
12469         while (1) {
12470                 struct btrfs_key found_key;
12471
12472                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12473                         int no_more_keys = find_next_key(&path, &key);
12474
12475                         btrfs_release_path(&path);
12476                         if (trans) {
12477                                 ret = btrfs_commit_transaction(trans,
12478                                                                info->tree_root);
12479                                 trans = NULL;
12480                                 if (ret < 0)
12481                                         goto out;
12482                         }
12483                         need_trans = 0;
12484                         if (no_more_keys)
12485                                 break;
12486                         goto again;
12487                 }
12488
12489                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12490
12491                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12492                         goto next;
12493                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12494                         goto next;
12495
12496                 ret = maybe_repair_root_item(info, &path, &found_key,
12497                                              trans ? 0 : 1);
12498                 if (ret < 0)
12499                         goto out;
12500                 if (ret) {
12501                         if (!trans && repair) {
12502                                 need_trans = 1;
12503                                 key = found_key;
12504                                 btrfs_release_path(&path);
12505                                 goto again;
12506                         }
12507                         bad_roots++;
12508                 }
12509 next:
12510                 path.slots[0]++;
12511         }
12512         ret = 0;
12513 out:
12514         free_roots_info_cache();
12515         btrfs_release_path(&path);
12516         if (trans)
12517                 btrfs_commit_transaction(trans, info->tree_root);
12518         if (ret < 0)
12519                 return ret;
12520
12521         return bad_roots;
12522 }
12523
12524 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12525 {
12526         struct btrfs_trans_handle *trans;
12527         struct btrfs_block_group_cache *bg_cache;
12528         u64 current = 0;
12529         int ret = 0;
12530
12531         /* Clear all free space cache inodes and its extent data */
12532         while (1) {
12533                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12534                 if (!bg_cache)
12535                         break;
12536                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12537                 if (ret < 0)
12538                         return ret;
12539                 current = bg_cache->key.objectid + bg_cache->key.offset;
12540         }
12541
12542         /* Don't forget to set cache_generation to -1 */
12543         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12544         if (IS_ERR(trans)) {
12545                 error("failed to update super block cache generation");
12546                 return PTR_ERR(trans);
12547         }
12548         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12549         btrfs_commit_transaction(trans, fs_info->tree_root);
12550
12551         return ret;
12552 }
12553
12554 const char * const cmd_check_usage[] = {
12555         "btrfs check [options] <device>",
12556         "Check structural integrity of a filesystem (unmounted).",
12557         "Check structural integrity of an unmounted filesystem. Verify internal",
12558         "trees' consistency and item connectivity. In the repair mode try to",
12559         "fix the problems found. ",
12560         "WARNING: the repair mode is considered dangerous",
12561         "",
12562         "-s|--super <superblock>     use this superblock copy",
12563         "-b|--backup                 use the first valid backup root copy",
12564         "--repair                    try to repair the filesystem",
12565         "--readonly                  run in read-only mode (default)",
12566         "--init-csum-tree            create a new CRC tree",
12567         "--init-extent-tree          create a new extent tree",
12568         "--mode <MODE>               allows choice of memory/IO trade-offs",
12569         "                            where MODE is one of:",
12570         "                            original - read inodes and extents to memory (requires",
12571         "                                       more memory, does less IO)",
12572         "                            lowmem   - try to use less memory but read blocks again",
12573         "                                       when needed",
12574         "--check-data-csum           verify checksums of data blocks",
12575         "-Q|--qgroup-report          print a report on qgroup consistency",
12576         "-E|--subvol-extents <subvolid>",
12577         "                            print subvolume extents and sharing state",
12578         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12579         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12580         "-p|--progress               indicate progress",
12581         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12582         NULL
12583 };
12584
12585 int cmd_check(int argc, char **argv)
12586 {
12587         struct cache_tree root_cache;
12588         struct btrfs_root *root;
12589         struct btrfs_fs_info *info;
12590         u64 bytenr = 0;
12591         u64 subvolid = 0;
12592         u64 tree_root_bytenr = 0;
12593         u64 chunk_root_bytenr = 0;
12594         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12595         int ret;
12596         int err = 0;
12597         u64 num;
12598         int init_csum_tree = 0;
12599         int readonly = 0;
12600         int clear_space_cache = 0;
12601         int qgroup_report = 0;
12602         int qgroups_repaired = 0;
12603         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12604
12605         while(1) {
12606                 int c;
12607                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12608                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12609                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12610                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12611                 static const struct option long_options[] = {
12612                         { "super", required_argument, NULL, 's' },
12613                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12614                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12615                         { "init-csum-tree", no_argument, NULL,
12616                                 GETOPT_VAL_INIT_CSUM },
12617                         { "init-extent-tree", no_argument, NULL,
12618                                 GETOPT_VAL_INIT_EXTENT },
12619                         { "check-data-csum", no_argument, NULL,
12620                                 GETOPT_VAL_CHECK_CSUM },
12621                         { "backup", no_argument, NULL, 'b' },
12622                         { "subvol-extents", required_argument, NULL, 'E' },
12623                         { "qgroup-report", no_argument, NULL, 'Q' },
12624                         { "tree-root", required_argument, NULL, 'r' },
12625                         { "chunk-root", required_argument, NULL,
12626                                 GETOPT_VAL_CHUNK_TREE },
12627                         { "progress", no_argument, NULL, 'p' },
12628                         { "mode", required_argument, NULL,
12629                                 GETOPT_VAL_MODE },
12630                         { "clear-space-cache", required_argument, NULL,
12631                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12632                         { NULL, 0, NULL, 0}
12633                 };
12634
12635                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12636                 if (c < 0)
12637                         break;
12638                 switch(c) {
12639                         case 'a': /* ignored */ break;
12640                         case 'b':
12641                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12642                                 break;
12643                         case 's':
12644                                 num = arg_strtou64(optarg);
12645                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12646                                         error(
12647                                         "super mirror should be less than %d",
12648                                                 BTRFS_SUPER_MIRROR_MAX);
12649                                         exit(1);
12650                                 }
12651                                 bytenr = btrfs_sb_offset(((int)num));
12652                                 printf("using SB copy %llu, bytenr %llu\n", num,
12653                                        (unsigned long long)bytenr);
12654                                 break;
12655                         case 'Q':
12656                                 qgroup_report = 1;
12657                                 break;
12658                         case 'E':
12659                                 subvolid = arg_strtou64(optarg);
12660                                 break;
12661                         case 'r':
12662                                 tree_root_bytenr = arg_strtou64(optarg);
12663                                 break;
12664                         case GETOPT_VAL_CHUNK_TREE:
12665                                 chunk_root_bytenr = arg_strtou64(optarg);
12666                                 break;
12667                         case 'p':
12668                                 ctx.progress_enabled = true;
12669                                 break;
12670                         case '?':
12671                         case 'h':
12672                                 usage(cmd_check_usage);
12673                         case GETOPT_VAL_REPAIR:
12674                                 printf("enabling repair mode\n");
12675                                 repair = 1;
12676                                 ctree_flags |= OPEN_CTREE_WRITES;
12677                                 break;
12678                         case GETOPT_VAL_READONLY:
12679                                 readonly = 1;
12680                                 break;
12681                         case GETOPT_VAL_INIT_CSUM:
12682                                 printf("Creating a new CRC tree\n");
12683                                 init_csum_tree = 1;
12684                                 repair = 1;
12685                                 ctree_flags |= OPEN_CTREE_WRITES;
12686                                 break;
12687                         case GETOPT_VAL_INIT_EXTENT:
12688                                 init_extent_tree = 1;
12689                                 ctree_flags |= (OPEN_CTREE_WRITES |
12690                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12691                                 repair = 1;
12692                                 break;
12693                         case GETOPT_VAL_CHECK_CSUM:
12694                                 check_data_csum = 1;
12695                                 break;
12696                         case GETOPT_VAL_MODE:
12697                                 check_mode = parse_check_mode(optarg);
12698                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12699                                         error("unknown mode: %s", optarg);
12700                                         exit(1);
12701                                 }
12702                                 break;
12703                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12704                                 if (strcmp(optarg, "v1") == 0) {
12705                                         clear_space_cache = 1;
12706                                 } else if (strcmp(optarg, "v2") == 0) {
12707                                         clear_space_cache = 2;
12708                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12709                                 } else {
12710                                         error(
12711                 "invalid argument to --clear-space-cache, must be v1 or v2");
12712                                         exit(1);
12713                                 }
12714                                 ctree_flags |= OPEN_CTREE_WRITES;
12715                                 break;
12716                 }
12717         }
12718
12719         if (check_argc_exact(argc - optind, 1))
12720                 usage(cmd_check_usage);
12721
12722         if (ctx.progress_enabled) {
12723                 ctx.tp = TASK_NOTHING;
12724                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12725         }
12726
12727         /* This check is the only reason for --readonly to exist */
12728         if (readonly && repair) {
12729                 error("repair options are not compatible with --readonly");
12730                 exit(1);
12731         }
12732
12733         /*
12734          * Not supported yet
12735          */
12736         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12737                 error("low memory mode doesn't support repair yet");
12738                 exit(1);
12739         }
12740
12741         radix_tree_init();
12742         cache_tree_init(&root_cache);
12743
12744         if((ret = check_mounted(argv[optind])) < 0) {
12745                 error("could not check mount status: %s", strerror(-ret));
12746                 err |= !!ret;
12747                 goto err_out;
12748         } else if(ret) {
12749                 error("%s is currently mounted, aborting", argv[optind]);
12750                 ret = -EBUSY;
12751                 err |= !!ret;
12752                 goto err_out;
12753         }
12754
12755         /* only allow partial opening under repair mode */
12756         if (repair)
12757                 ctree_flags |= OPEN_CTREE_PARTIAL;
12758
12759         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12760                                   chunk_root_bytenr, ctree_flags);
12761         if (!info) {
12762                 error("cannot open file system");
12763                 ret = -EIO;
12764                 err |= !!ret;
12765                 goto err_out;
12766         }
12767
12768         global_info = info;
12769         root = info->fs_root;
12770         if (clear_space_cache == 1) {
12771                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12772                         error(
12773                 "free space cache v2 detected, use --clear-space-cache v2");
12774                         ret = 1;
12775                         goto close_out;
12776                 }
12777                 printf("Clearing free space cache\n");
12778                 ret = clear_free_space_cache(info);
12779                 if (ret) {
12780                         error("failed to clear free space cache");
12781                         ret = 1;
12782                 } else {
12783                         printf("Free space cache cleared\n");
12784                 }
12785                 goto close_out;
12786         } else if (clear_space_cache == 2) {
12787                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12788                         printf("no free space cache v2 to clear\n");
12789                         ret = 0;
12790                         goto close_out;
12791                 }
12792                 printf("Clear free space cache v2\n");
12793                 ret = btrfs_clear_free_space_tree(info);
12794                 if (ret) {
12795                         error("failed to clear free space cache v2: %d", ret);
12796                         ret = 1;
12797                 } else {
12798                         printf("free space cache v2 cleared\n");
12799                 }
12800                 goto close_out;
12801         }
12802
12803         /*
12804          * repair mode will force us to commit transaction which
12805          * will make us fail to load log tree when mounting.
12806          */
12807         if (repair && btrfs_super_log_root(info->super_copy)) {
12808                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12809                 if (!ret) {
12810                         ret = 1;
12811                         err |= !!ret;
12812                         goto close_out;
12813                 }
12814                 ret = zero_log_tree(root);
12815                 err |= !!ret;
12816                 if (ret) {
12817                         error("failed to zero log tree: %d", ret);
12818                         goto close_out;
12819                 }
12820         }
12821
12822         uuid_unparse(info->super_copy->fsid, uuidbuf);
12823         if (qgroup_report) {
12824                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12825                        uuidbuf);
12826                 ret = qgroup_verify_all(info);
12827                 err |= !!ret;
12828                 if (ret == 0)
12829                         report_qgroups(1);
12830                 goto close_out;
12831         }
12832         if (subvolid) {
12833                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12834                        subvolid, argv[optind], uuidbuf);
12835                 ret = print_extent_state(info, subvolid);
12836                 err |= !!ret;
12837                 goto close_out;
12838         }
12839         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12840
12841         if (!extent_buffer_uptodate(info->tree_root->node) ||
12842             !extent_buffer_uptodate(info->dev_root->node) ||
12843             !extent_buffer_uptodate(info->chunk_root->node)) {
12844                 error("critical roots corrupted, unable to check the filesystem");
12845                 err |= !!ret;
12846                 ret = -EIO;
12847                 goto close_out;
12848         }
12849
12850         if (init_extent_tree || init_csum_tree) {
12851                 struct btrfs_trans_handle *trans;
12852
12853                 trans = btrfs_start_transaction(info->extent_root, 0);
12854                 if (IS_ERR(trans)) {
12855                         error("error starting transaction");
12856                         ret = PTR_ERR(trans);
12857                         err |= !!ret;
12858                         goto close_out;
12859                 }
12860
12861                 if (init_extent_tree) {
12862                         printf("Creating a new extent tree\n");
12863                         ret = reinit_extent_tree(trans, info);
12864                         err |= !!ret;
12865                         if (ret)
12866                                 goto close_out;
12867                 }
12868
12869                 if (init_csum_tree) {
12870                         printf("Reinitialize checksum tree\n");
12871                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12872                         if (ret) {
12873                                 error("checksum tree initialization failed: %d",
12874                                                 ret);
12875                                 ret = -EIO;
12876                                 err |= !!ret;
12877                                 goto close_out;
12878                         }
12879
12880                         ret = fill_csum_tree(trans, info->csum_root,
12881                                              init_extent_tree);
12882                         err |= !!ret;
12883                         if (ret) {
12884                                 error("checksum tree refilling failed: %d", ret);
12885                                 return -EIO;
12886                         }
12887                 }
12888                 /*
12889                  * Ok now we commit and run the normal fsck, which will add
12890                  * extent entries for all of the items it finds.
12891                  */
12892                 ret = btrfs_commit_transaction(trans, info->extent_root);
12893                 err |= !!ret;
12894                 if (ret)
12895                         goto close_out;
12896         }
12897         if (!extent_buffer_uptodate(info->extent_root->node)) {
12898                 error("critical: extent_root, unable to check the filesystem");
12899                 ret = -EIO;
12900                 err |= !!ret;
12901                 goto close_out;
12902         }
12903         if (!extent_buffer_uptodate(info->csum_root->node)) {
12904                 error("critical: csum_root, unable to check the filesystem");
12905                 ret = -EIO;
12906                 err |= !!ret;
12907                 goto close_out;
12908         }
12909
12910         if (!ctx.progress_enabled)
12911                 fprintf(stderr, "checking extents\n");
12912         if (check_mode == CHECK_MODE_LOWMEM)
12913                 ret = check_chunks_and_extents_v2(root);
12914         else
12915                 ret = check_chunks_and_extents(root);
12916         err |= !!ret;
12917         if (ret)
12918                 error(
12919                 "errors found in extent allocation tree or chunk allocation");
12920
12921         ret = repair_root_items(info);
12922         err |= !!ret;
12923         if (ret < 0)
12924                 goto close_out;
12925         if (repair) {
12926                 fprintf(stderr, "Fixed %d roots.\n", ret);
12927                 ret = 0;
12928         } else if (ret > 0) {
12929                 fprintf(stderr,
12930                        "Found %d roots with an outdated root item.\n",
12931                        ret);
12932                 fprintf(stderr,
12933                         "Please run a filesystem check with the option --repair to fix them.\n");
12934                 ret = 1;
12935                 err |= !!ret;
12936                 goto close_out;
12937         }
12938
12939         if (!ctx.progress_enabled) {
12940                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12941                         fprintf(stderr, "checking free space tree\n");
12942                 else
12943                         fprintf(stderr, "checking free space cache\n");
12944         }
12945         ret = check_space_cache(root);
12946         err |= !!ret;
12947         if (ret)
12948                 goto out;
12949
12950         /*
12951          * We used to have to have these hole extents in between our real
12952          * extents so if we don't have this flag set we need to make sure there
12953          * are no gaps in the file extents for inodes, otherwise we can just
12954          * ignore it when this happens.
12955          */
12956         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12957         if (!ctx.progress_enabled)
12958                 fprintf(stderr, "checking fs roots\n");
12959         if (check_mode == CHECK_MODE_LOWMEM)
12960                 ret = check_fs_roots_v2(root->fs_info);
12961         else
12962                 ret = check_fs_roots(root, &root_cache);
12963         err |= !!ret;
12964         if (ret)
12965                 goto out;
12966
12967         fprintf(stderr, "checking csums\n");
12968         ret = check_csums(root);
12969         err |= !!ret;
12970         if (ret)
12971                 goto out;
12972
12973         fprintf(stderr, "checking root refs\n");
12974         /* For low memory mode, check_fs_roots_v2 handles root refs */
12975         if (check_mode != CHECK_MODE_LOWMEM) {
12976                 ret = check_root_refs(root, &root_cache);
12977                 err |= !!ret;
12978                 if (ret)
12979                         goto out;
12980         }
12981
12982         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12983                 struct extent_buffer *eb;
12984
12985                 eb = list_first_entry(&root->fs_info->recow_ebs,
12986                                       struct extent_buffer, recow);
12987                 list_del_init(&eb->recow);
12988                 ret = recow_extent_buffer(root, eb);
12989                 err |= !!ret;
12990                 if (ret)
12991                         break;
12992         }
12993
12994         while (!list_empty(&delete_items)) {
12995                 struct bad_item *bad;
12996
12997                 bad = list_first_entry(&delete_items, struct bad_item, list);
12998                 list_del_init(&bad->list);
12999                 if (repair) {
13000                         ret = delete_bad_item(root, bad);
13001                         err |= !!ret;
13002                 }
13003                 free(bad);
13004         }
13005
13006         if (info->quota_enabled) {
13007                 fprintf(stderr, "checking quota groups\n");
13008                 ret = qgroup_verify_all(info);
13009                 err |= !!ret;
13010                 if (ret)
13011                         goto out;
13012                 report_qgroups(0);
13013                 ret = repair_qgroups(info, &qgroups_repaired);
13014                 err |= !!ret;
13015                 if (err)
13016                         goto out;
13017                 ret = 0;
13018         }
13019
13020         if (!list_empty(&root->fs_info->recow_ebs)) {
13021                 error("transid errors in file system");
13022                 ret = 1;
13023                 err |= !!ret;
13024         }
13025 out:
13026         if (found_old_backref) { /*
13027                  * there was a disk format change when mixed
13028                  * backref was in testing tree. The old format
13029                  * existed about one week.
13030                  */
13031                 printf("\n * Found old mixed backref format. "
13032                        "The old format is not supported! *"
13033                        "\n * Please mount the FS in readonly mode, "
13034                        "backup data and re-format the FS. *\n\n");
13035                 err |= 1;
13036         }
13037         printf("found %llu bytes used err is %d\n",
13038                (unsigned long long)bytes_used, ret);
13039         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13040         printf("total tree bytes: %llu\n",
13041                (unsigned long long)total_btree_bytes);
13042         printf("total fs tree bytes: %llu\n",
13043                (unsigned long long)total_fs_tree_bytes);
13044         printf("total extent tree bytes: %llu\n",
13045                (unsigned long long)total_extent_tree_bytes);
13046         printf("btree space waste bytes: %llu\n",
13047                (unsigned long long)btree_space_waste);
13048         printf("file data blocks allocated: %llu\n referenced %llu\n",
13049                 (unsigned long long)data_bytes_allocated,
13050                 (unsigned long long)data_bytes_referenced);
13051
13052         free_qgroup_counts();
13053         free_root_recs_tree(&root_cache);
13054 close_out:
13055         close_ctree(root);
13056 err_out:
13057         if (ctx.progress_enabled)
13058                 task_deinit(ctx.info);
13059
13060         return err;
13061 }