btrfs-progs: check: get the highest inode for lost+found
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         int level_size;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize, root->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct btrfs_root *root,
1481                             struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(root, eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct btrfs_root *root,
1987                             struct extent_buffer *parent, int slot,
1988                             struct extent_buffer *child)
1989 {
1990         struct btrfs_key parent_key;
1991         struct btrfs_key child_key;
1992         int ret = 0;
1993
1994         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1995         if (btrfs_header_level(child) == 0)
1996                 btrfs_item_key_to_cpu(child, &child_key, 0);
1997         else
1998                 btrfs_node_key_to_cpu(child, &child_key, 0);
1999
2000         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2001                 ret = -EINVAL;
2002                 fprintf(stderr,
2003                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2004                         parent_key.objectid, parent_key.type, parent_key.offset,
2005                         child_key.objectid, child_key.type, child_key.offset);
2006         }
2007         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008                 ret = -EINVAL;
2009                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2010                         btrfs_node_blockptr(parent, slot),
2011                         btrfs_header_bytenr(child));
2012         }
2013         if (btrfs_node_ptr_generation(parent, slot) !=
2014             btrfs_header_generation(child)) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2017                         btrfs_header_generation(child),
2018                         btrfs_node_ptr_generation(parent, slot));
2019         }
2020         return ret;
2021 }
2022
2023 /*
2024  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2025  * in every fs or file tree check. Here we find its all root ids, and only check
2026  * it in the fs or file tree which has the smallest root id.
2027  */
2028 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 {
2030         struct rb_node *node;
2031         struct ulist_node *u;
2032
2033         if (roots->nnodes == 1)
2034                 return 1;
2035
2036         node = rb_first(&roots->root);
2037         u = rb_entry(node, struct ulist_node, rb_node);
2038         /*
2039          * current root id is not smallest, we skip it and let it be checked
2040          * in the fs or file tree who hash the smallest root id.
2041          */
2042         if (root->objectid != u->val)
2043                 return 0;
2044
2045         return 1;
2046 }
2047
2048 /*
2049  * for a tree node or leaf, we record its reference count, so later if we still
2050  * process this node or leaf, don't need to compute its reference count again.
2051  */
2052 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2053                              struct node_refs *nrefs, u64 level)
2054 {
2055         int check, ret;
2056         u64 refs;
2057         struct ulist *roots;
2058
2059         if (nrefs->bytenr[level] != bytenr) {
2060                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2061                                        level, 1, &refs, NULL);
2062                 if (ret < 0)
2063                         return ret;
2064
2065                 nrefs->bytenr[level] = bytenr;
2066                 nrefs->refs[level] = refs;
2067                 if (refs > 1) {
2068                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2069                                                    0, &roots);
2070                         if (ret)
2071                                 return -EIO;
2072
2073                         check = need_check(root, roots);
2074                         ulist_free(roots);
2075                         nrefs->need_check[level] = check;
2076                 } else {
2077                         nrefs->need_check[level] = 1;
2078                 }
2079         }
2080
2081         return 0;
2082 }
2083
2084 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2085                           struct walk_control *wc, int *level,
2086                           struct node_refs *nrefs)
2087 {
2088         enum btrfs_tree_block_status status;
2089         u64 bytenr;
2090         u64 ptr_gen;
2091         struct extent_buffer *next;
2092         struct extent_buffer *cur;
2093         u32 blocksize;
2094         int ret, err = 0;
2095         u64 refs;
2096
2097         WARN_ON(*level < 0);
2098         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099
2100         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2101                 refs = nrefs->refs[*level];
2102                 ret = 0;
2103         } else {
2104                 ret = btrfs_lookup_extent_info(NULL, root,
2105                                        path->nodes[*level]->start,
2106                                        *level, 1, &refs, NULL);
2107                 if (ret < 0) {
2108                         err = ret;
2109                         goto out;
2110                 }
2111                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2112                 nrefs->refs[*level] = refs;
2113         }
2114
2115         if (refs > 1) {
2116                 ret = enter_shared_node(root, path->nodes[*level]->start,
2117                                         refs, wc, *level);
2118                 if (ret > 0) {
2119                         err = ret;
2120                         goto out;
2121                 }
2122         }
2123
2124         while (*level >= 0) {
2125                 WARN_ON(*level < 0);
2126                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2127                 cur = path->nodes[*level];
2128
2129                 if (btrfs_header_level(cur) != *level)
2130                         WARN_ON(1);
2131
2132                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2133                         break;
2134                 if (*level == 0) {
2135                         ret = process_one_leaf(root, cur, wc);
2136                         if (ret < 0)
2137                                 err = ret;
2138                         break;
2139                 }
2140                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2141                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2142                 blocksize = root->nodesize;
2143
2144                 if (bytenr == nrefs->bytenr[*level - 1]) {
2145                         refs = nrefs->refs[*level - 1];
2146                 } else {
2147                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2148                                         *level - 1, 1, &refs, NULL);
2149                         if (ret < 0) {
2150                                 refs = 0;
2151                         } else {
2152                                 nrefs->bytenr[*level - 1] = bytenr;
2153                                 nrefs->refs[*level - 1] = refs;
2154                         }
2155                 }
2156
2157                 if (refs > 1) {
2158                         ret = enter_shared_node(root, bytenr, refs,
2159                                                 wc, *level - 1);
2160                         if (ret > 0) {
2161                                 path->slots[*level]++;
2162                                 continue;
2163                         }
2164                 }
2165
2166                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2167                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2168                         free_extent_buffer(next);
2169                         reada_walk_down(root, cur, path->slots[*level]);
2170                         next = read_tree_block(root, bytenr, blocksize,
2171                                                ptr_gen);
2172                         if (!extent_buffer_uptodate(next)) {
2173                                 struct btrfs_key node_key;
2174
2175                                 btrfs_node_key_to_cpu(path->nodes[*level],
2176                                                       &node_key,
2177                                                       path->slots[*level]);
2178                                 btrfs_add_corrupt_extent_record(root->fs_info,
2179                                                 &node_key,
2180                                                 path->nodes[*level]->start,
2181                                                 root->nodesize, *level);
2182                                 err = -EIO;
2183                                 goto out;
2184                         }
2185                 }
2186
2187                 ret = check_child_node(root, cur, path->slots[*level], next);
2188                 if (ret) {
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2217                              int *level, struct node_refs *nrefs, int ext_ref)
2218 {
2219         enum btrfs_tree_block_status status;
2220         u64 bytenr;
2221         u64 ptr_gen;
2222         struct extent_buffer *next;
2223         struct extent_buffer *cur;
2224         u32 blocksize;
2225         int ret;
2226
2227         WARN_ON(*level < 0);
2228         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229
2230         ret = update_nodes_refs(root, path->nodes[*level]->start,
2231                                 nrefs, *level);
2232         if (ret < 0)
2233                 return ret;
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 /* Don't forgot to check leaf/node validation */
2246                 if (*level == 0) {
2247                         ret = btrfs_check_leaf(root, NULL, cur);
2248                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2249                                 ret = -EIO;
2250                                 break;
2251                         }
2252                         ret = process_one_leaf_v2(root, path, nrefs,
2253                                                   level, ext_ref);
2254                         break;
2255                 } else {
2256                         ret = btrfs_check_node(root, NULL, cur);
2257                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2258                                 ret = -EIO;
2259                                 break;
2260                         }
2261                 }
2262                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2263                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2264                 blocksize = root->nodesize;
2265
2266                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2267                 if (ret)
2268                         break;
2269                 if (!nrefs->need_check[*level - 1]) {
2270                         path->slots[*level]++;
2271                         continue;
2272                 }
2273
2274                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2275                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2276                         free_extent_buffer(next);
2277                         reada_walk_down(root, cur, path->slots[*level]);
2278                         next = read_tree_block(root, bytenr, blocksize,
2279                                                ptr_gen);
2280                         if (!extent_buffer_uptodate(next)) {
2281                                 struct btrfs_key node_key;
2282
2283                                 btrfs_node_key_to_cpu(path->nodes[*level],
2284                                                       &node_key,
2285                                                       path->slots[*level]);
2286                                 btrfs_add_corrupt_extent_record(root->fs_info,
2287                                                 &node_key,
2288                                                 path->nodes[*level]->start,
2289                                                 root->nodesize, *level);
2290                                 ret = -EIO;
2291                                 break;
2292                         }
2293                 }
2294
2295                 ret = check_child_node(root, cur, path->slots[*level], next);
2296                 if (ret < 0) 
2297                         break;
2298
2299                 if (btrfs_is_leaf(next))
2300                         status = btrfs_check_leaf(root, NULL, next);
2301                 else
2302                         status = btrfs_check_node(root, NULL, next);
2303                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2304                         free_extent_buffer(next);
2305                         ret = -EIO;
2306                         break;
2307                 }
2308
2309                 *level = *level - 1;
2310                 free_extent_buffer(path->nodes[*level]);
2311                 path->nodes[*level] = next;
2312                 path->slots[*level] = 0;
2313         }
2314         return ret;
2315 }
2316
2317 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2318                         struct walk_control *wc, int *level)
2319 {
2320         int i;
2321         struct extent_buffer *leaf;
2322
2323         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2324                 leaf = path->nodes[i];
2325                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2326                         path->slots[i]++;
2327                         *level = i;
2328                         return 0;
2329                 } else {
2330                         free_extent_buffer(path->nodes[*level]);
2331                         path->nodes[*level] = NULL;
2332                         BUG_ON(*level > wc->active_node);
2333                         if (*level == wc->active_node)
2334                                 leave_shared_node(root, wc, *level);
2335                         *level = i + 1;
2336                 }
2337         }
2338         return 1;
2339 }
2340
2341 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2342                            int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         *level = i + 1;
2357                 }
2358         }
2359         return 1;
2360 }
2361
2362 static int check_root_dir(struct inode_record *rec)
2363 {
2364         struct inode_backref *backref;
2365         int ret = -1;
2366
2367         if (!rec->found_inode_item || rec->errors)
2368                 goto out;
2369         if (rec->nlink != 1 || rec->found_link != 0)
2370                 goto out;
2371         if (list_empty(&rec->backrefs))
2372                 goto out;
2373         backref = to_inode_backref(rec->backrefs.next);
2374         if (!backref->found_inode_ref)
2375                 goto out;
2376         if (backref->index != 0 || backref->namelen != 2 ||
2377             memcmp(backref->name, "..", 2))
2378                 goto out;
2379         if (backref->found_dir_index || backref->found_dir_item)
2380                 goto out;
2381         ret = 0;
2382 out:
2383         return ret;
2384 }
2385
2386 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2387                               struct btrfs_root *root, struct btrfs_path *path,
2388                               struct inode_record *rec)
2389 {
2390         struct btrfs_inode_item *ei;
2391         struct btrfs_key key;
2392         int ret;
2393
2394         key.objectid = rec->ino;
2395         key.type = BTRFS_INODE_ITEM_KEY;
2396         key.offset = (u64)-1;
2397
2398         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2399         if (ret < 0)
2400                 goto out;
2401         if (ret) {
2402                 if (!path->slots[0]) {
2403                         ret = -ENOENT;
2404                         goto out;
2405                 }
2406                 path->slots[0]--;
2407                 ret = 0;
2408         }
2409         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2410         if (key.objectid != rec->ino) {
2411                 ret = -ENOENT;
2412                 goto out;
2413         }
2414
2415         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2416                             struct btrfs_inode_item);
2417         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2418         btrfs_mark_buffer_dirty(path->nodes[0]);
2419         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2420         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2421                root->root_key.objectid);
2422 out:
2423         btrfs_release_path(path);
2424         return ret;
2425 }
2426
2427 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2428                                     struct btrfs_root *root,
2429                                     struct btrfs_path *path,
2430                                     struct inode_record *rec)
2431 {
2432         int ret;
2433
2434         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2435         btrfs_release_path(path);
2436         if (!ret)
2437                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2438         return ret;
2439 }
2440
2441 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2442                                struct btrfs_root *root,
2443                                struct btrfs_path *path,
2444                                struct inode_record *rec)
2445 {
2446         struct btrfs_inode_item *ei;
2447         struct btrfs_key key;
2448         int ret = 0;
2449
2450         key.objectid = rec->ino;
2451         key.type = BTRFS_INODE_ITEM_KEY;
2452         key.offset = 0;
2453
2454         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2455         if (ret) {
2456                 if (ret > 0)
2457                         ret = -ENOENT;
2458                 goto out;
2459         }
2460
2461         /* Since ret == 0, no need to check anything */
2462         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463                             struct btrfs_inode_item);
2464         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2465         btrfs_mark_buffer_dirty(path->nodes[0]);
2466         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2467         printf("reset nbytes for ino %llu root %llu\n",
2468                rec->ino, root->root_key.objectid);
2469 out:
2470         btrfs_release_path(path);
2471         return ret;
2472 }
2473
2474 static int add_missing_dir_index(struct btrfs_root *root,
2475                                  struct cache_tree *inode_cache,
2476                                  struct inode_record *rec,
2477                                  struct inode_backref *backref)
2478 {
2479         struct btrfs_path path;
2480         struct btrfs_trans_handle *trans;
2481         struct btrfs_dir_item *dir_item;
2482         struct extent_buffer *leaf;
2483         struct btrfs_key key;
2484         struct btrfs_disk_key disk_key;
2485         struct inode_record *dir_rec;
2486         unsigned long name_ptr;
2487         u32 data_size = sizeof(*dir_item) + backref->namelen;
2488         int ret;
2489
2490         trans = btrfs_start_transaction(root, 1);
2491         if (IS_ERR(trans))
2492                 return PTR_ERR(trans);
2493
2494         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2495                 (unsigned long long)rec->ino);
2496
2497         btrfs_init_path(&path);
2498         key.objectid = backref->dir;
2499         key.type = BTRFS_DIR_INDEX_KEY;
2500         key.offset = backref->index;
2501         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2502         BUG_ON(ret);
2503
2504         leaf = path.nodes[0];
2505         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506
2507         disk_key.objectid = cpu_to_le64(rec->ino);
2508         disk_key.type = BTRFS_INODE_ITEM_KEY;
2509         disk_key.offset = 0;
2510
2511         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2512         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2513         btrfs_set_dir_data_len(leaf, dir_item, 0);
2514         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2515         name_ptr = (unsigned long)(dir_item + 1);
2516         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2517         btrfs_mark_buffer_dirty(leaf);
2518         btrfs_release_path(&path);
2519         btrfs_commit_transaction(trans, root);
2520
2521         backref->found_dir_index = 1;
2522         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2523         BUG_ON(IS_ERR(dir_rec));
2524         if (!dir_rec)
2525                 return 0;
2526         dir_rec->found_size += backref->namelen;
2527         if (dir_rec->found_size == dir_rec->isize &&
2528             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2529                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2530         if (dir_rec->found_size != dir_rec->isize)
2531                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532
2533         return 0;
2534 }
2535
2536 static int delete_dir_index(struct btrfs_root *root,
2537                             struct cache_tree *inode_cache,
2538                             struct inode_record *rec,
2539                             struct inode_backref *backref)
2540 {
2541         struct btrfs_trans_handle *trans;
2542         struct btrfs_dir_item *di;
2543         struct btrfs_path path;
2544         int ret = 0;
2545
2546         trans = btrfs_start_transaction(root, 1);
2547         if (IS_ERR(trans))
2548                 return PTR_ERR(trans);
2549
2550         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2551                 (unsigned long long)backref->dir,
2552                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2553                 (unsigned long long)root->objectid);
2554
2555         btrfs_init_path(&path);
2556         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2557                                     backref->name, backref->namelen,
2558                                     backref->index, -1);
2559         if (IS_ERR(di)) {
2560                 ret = PTR_ERR(di);
2561                 btrfs_release_path(&path);
2562                 btrfs_commit_transaction(trans, root);
2563                 if (ret == -ENOENT)
2564                         return 0;
2565                 return ret;
2566         }
2567
2568         if (!di)
2569                 ret = btrfs_del_item(trans, root, &path);
2570         else
2571                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2572         BUG_ON(ret);
2573         btrfs_release_path(&path);
2574         btrfs_commit_transaction(trans, root);
2575         return ret;
2576 }
2577
2578 static int create_inode_item(struct btrfs_root *root,
2579                              struct inode_record *rec,
2580                              struct inode_backref *backref, int root_dir)
2581 {
2582         struct btrfs_trans_handle *trans;
2583         struct btrfs_inode_item inode_item;
2584         time_t now = time(NULL);
2585         int ret;
2586
2587         trans = btrfs_start_transaction(root, 1);
2588         if (IS_ERR(trans)) {
2589                 ret = PTR_ERR(trans);
2590                 return ret;
2591         }
2592
2593         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2594                 "be incomplete, please check permissions and content after "
2595                 "the fsck completes.\n", (unsigned long long)root->objectid,
2596                 (unsigned long long)rec->ino);
2597
2598         memset(&inode_item, 0, sizeof(inode_item));
2599         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2600         if (root_dir)
2601                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2602         else
2603                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2604         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2605         if (rec->found_dir_item) {
2606                 if (rec->found_file_extent)
2607                         fprintf(stderr, "root %llu inode %llu has both a dir "
2608                                 "item and extents, unsure if it is a dir or a "
2609                                 "regular file so setting it as a directory\n",
2610                                 (unsigned long long)root->objectid,
2611                                 (unsigned long long)rec->ino);
2612                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2613                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2614         } else if (!rec->found_dir_item) {
2615                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2616                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2617         }
2618         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2619         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2620         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2621         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2622         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2623         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2624         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2625         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2626
2627         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2628         BUG_ON(ret);
2629         btrfs_commit_transaction(trans, root);
2630         return 0;
2631 }
2632
2633 static int repair_inode_backrefs(struct btrfs_root *root,
2634                                  struct inode_record *rec,
2635                                  struct cache_tree *inode_cache,
2636                                  int delete)
2637 {
2638         struct inode_backref *tmp, *backref;
2639         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2640         int ret = 0;
2641         int repaired = 0;
2642
2643         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2644                 if (!delete && rec->ino == root_dirid) {
2645                         if (!rec->found_inode_item) {
2646                                 ret = create_inode_item(root, rec, backref, 1);
2647                                 if (ret)
2648                                         break;
2649                                 repaired++;
2650                         }
2651                 }
2652
2653                 /* Index 0 for root dir's are special, don't mess with it */
2654                 if (rec->ino == root_dirid && backref->index == 0)
2655                         continue;
2656
2657                 if (delete &&
2658                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2659                      (backref->found_dir_index && backref->found_inode_ref &&
2660                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2661                         ret = delete_dir_index(root, inode_cache, rec, backref);
2662                         if (ret)
2663                                 break;
2664                         repaired++;
2665                         list_del(&backref->list);
2666                         free(backref);
2667                 }
2668
2669                 if (!delete && !backref->found_dir_index &&
2670                     backref->found_dir_item && backref->found_inode_ref) {
2671                         ret = add_missing_dir_index(root, inode_cache, rec,
2672                                                     backref);
2673                         if (ret)
2674                                 break;
2675                         repaired++;
2676                         if (backref->found_dir_item &&
2677                             backref->found_dir_index &&
2678                             backref->found_dir_index) {
2679                                 if (!backref->errors &&
2680                                     backref->found_inode_ref) {
2681                                         list_del(&backref->list);
2682                                         free(backref);
2683                                 }
2684                         }
2685                 }
2686
2687                 if (!delete && (!backref->found_dir_index &&
2688                                 !backref->found_dir_item &&
2689                                 backref->found_inode_ref)) {
2690                         struct btrfs_trans_handle *trans;
2691                         struct btrfs_key location;
2692
2693                         ret = check_dir_conflict(root, backref->name,
2694                                                  backref->namelen,
2695                                                  backref->dir,
2696                                                  backref->index);
2697                         if (ret) {
2698                                 /*
2699                                  * let nlink fixing routine to handle it,
2700                                  * which can do it better.
2701                                  */
2702                                 ret = 0;
2703                                 break;
2704                         }
2705                         location.objectid = rec->ino;
2706                         location.type = BTRFS_INODE_ITEM_KEY;
2707                         location.offset = 0;
2708
2709                         trans = btrfs_start_transaction(root, 1);
2710                         if (IS_ERR(trans)) {
2711                                 ret = PTR_ERR(trans);
2712                                 break;
2713                         }
2714                         fprintf(stderr, "adding missing dir index/item pair "
2715                                 "for inode %llu\n",
2716                                 (unsigned long long)rec->ino);
2717                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2718                                                     backref->namelen,
2719                                                     backref->dir, &location,
2720                                                     imode_to_type(rec->imode),
2721                                                     backref->index);
2722                         BUG_ON(ret);
2723                         btrfs_commit_transaction(trans, root);
2724                         repaired++;
2725                 }
2726
2727                 if (!delete && (backref->found_inode_ref &&
2728                                 backref->found_dir_index &&
2729                                 backref->found_dir_item &&
2730                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2731                                 !rec->found_inode_item)) {
2732                         ret = create_inode_item(root, rec, backref, 0);
2733                         if (ret)
2734                                 break;
2735                         repaired++;
2736                 }
2737
2738         }
2739         return ret ? ret : repaired;
2740 }
2741
2742 /*
2743  * To determine the file type for nlink/inode_item repair
2744  *
2745  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2746  * Return -ENOENT if file type is not found.
2747  */
2748 static int find_file_type(struct inode_record *rec, u8 *type)
2749 {
2750         struct inode_backref *backref;
2751
2752         /* For inode item recovered case */
2753         if (rec->found_inode_item) {
2754                 *type = imode_to_type(rec->imode);
2755                 return 0;
2756         }
2757
2758         list_for_each_entry(backref, &rec->backrefs, list) {
2759                 if (backref->found_dir_index || backref->found_dir_item) {
2760                         *type = backref->filetype;
2761                         return 0;
2762                 }
2763         }
2764         return -ENOENT;
2765 }
2766
2767 /*
2768  * To determine the file name for nlink repair
2769  *
2770  * Return 0 if file name is found, set name and namelen.
2771  * Return -ENOENT if file name is not found.
2772  */
2773 static int find_file_name(struct inode_record *rec,
2774                           char *name, int *namelen)
2775 {
2776         struct inode_backref *backref;
2777
2778         list_for_each_entry(backref, &rec->backrefs, list) {
2779                 if (backref->found_dir_index || backref->found_dir_item ||
2780                     backref->found_inode_ref) {
2781                         memcpy(name, backref->name, backref->namelen);
2782                         *namelen = backref->namelen;
2783                         return 0;
2784                 }
2785         }
2786         return -ENOENT;
2787 }
2788
2789 /* Reset the nlink of the inode to the correct one */
2790 static int reset_nlink(struct btrfs_trans_handle *trans,
2791                        struct btrfs_root *root,
2792                        struct btrfs_path *path,
2793                        struct inode_record *rec)
2794 {
2795         struct inode_backref *backref;
2796         struct inode_backref *tmp;
2797         struct btrfs_key key;
2798         struct btrfs_inode_item *inode_item;
2799         int ret = 0;
2800
2801         /* We don't believe this either, reset it and iterate backref */
2802         rec->found_link = 0;
2803
2804         /* Remove all backref including the valid ones */
2805         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2806                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2807                                    backref->index, backref->name,
2808                                    backref->namelen, 0);
2809                 if (ret < 0)
2810                         goto out;
2811
2812                 /* remove invalid backref, so it won't be added back */
2813                 if (!(backref->found_dir_index &&
2814                       backref->found_dir_item &&
2815                       backref->found_inode_ref)) {
2816                         list_del(&backref->list);
2817                         free(backref);
2818                 } else {
2819                         rec->found_link++;
2820                 }
2821         }
2822
2823         /* Set nlink to 0 */
2824         key.objectid = rec->ino;
2825         key.type = BTRFS_INODE_ITEM_KEY;
2826         key.offset = 0;
2827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2828         if (ret < 0)
2829                 goto out;
2830         if (ret > 0) {
2831                 ret = -ENOENT;
2832                 goto out;
2833         }
2834         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2835                                     struct btrfs_inode_item);
2836         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2837         btrfs_mark_buffer_dirty(path->nodes[0]);
2838         btrfs_release_path(path);
2839
2840         /*
2841          * Add back valid inode_ref/dir_item/dir_index,
2842          * add_link() will handle the nlink inc, so new nlink must be correct
2843          */
2844         list_for_each_entry(backref, &rec->backrefs, list) {
2845                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2846                                      backref->name, backref->namelen,
2847                                      backref->filetype, &backref->index, 1);
2848                 if (ret < 0)
2849                         goto out;
2850         }
2851 out:
2852         btrfs_release_path(path);
2853         return ret;
2854 }
2855
2856 static int get_highest_inode(struct btrfs_trans_handle *trans,
2857                                 struct btrfs_root *root,
2858                                 struct btrfs_path *path,
2859                                 u64 *highest_ino)
2860 {
2861         struct btrfs_key key, found_key;
2862         int ret;
2863
2864         btrfs_init_path(path);
2865         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2866         key.offset = -1;
2867         key.type = BTRFS_INODE_ITEM_KEY;
2868         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2869         if (ret == 1) {
2870                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2871                                 path->slots[0] - 1);
2872                 *highest_ino = found_key.objectid;
2873                 ret = 0;
2874         }
2875         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2876                 ret = -EOVERFLOW;
2877         btrfs_release_path(path);
2878         return ret;
2879 }
2880
2881 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2882                                struct btrfs_root *root,
2883                                struct btrfs_path *path,
2884                                struct inode_record *rec)
2885 {
2886         char *dir_name = "lost+found";
2887         char namebuf[BTRFS_NAME_LEN] = {0};
2888         u64 lost_found_ino;
2889         u32 mode = 0700;
2890         u8 type = 0;
2891         int namelen = 0;
2892         int name_recovered = 0;
2893         int type_recovered = 0;
2894         int ret = 0;
2895
2896         /*
2897          * Get file name and type first before these invalid inode ref
2898          * are deleted by remove_all_invalid_backref()
2899          */
2900         name_recovered = !find_file_name(rec, namebuf, &namelen);
2901         type_recovered = !find_file_type(rec, &type);
2902
2903         if (!name_recovered) {
2904                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2905                        rec->ino, rec->ino);
2906                 namelen = count_digits(rec->ino);
2907                 sprintf(namebuf, "%llu", rec->ino);
2908                 name_recovered = 1;
2909         }
2910         if (!type_recovered) {
2911                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2912                        rec->ino);
2913                 type = BTRFS_FT_REG_FILE;
2914                 type_recovered = 1;
2915         }
2916
2917         ret = reset_nlink(trans, root, path, rec);
2918         if (ret < 0) {
2919                 fprintf(stderr,
2920                         "Failed to reset nlink for inode %llu: %s\n",
2921                         rec->ino, strerror(-ret));
2922                 goto out;
2923         }
2924
2925         if (rec->found_link == 0) {
2926                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2927                 if (ret < 0)
2928                         goto out;
2929                 lost_found_ino++;
2930                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2931                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2932                                   mode);
2933                 if (ret < 0) {
2934                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2935                                 dir_name, strerror(-ret));
2936                         goto out;
2937                 }
2938                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2939                                      namebuf, namelen, type, NULL, 1);
2940                 /*
2941                  * Add ".INO" suffix several times to handle case where
2942                  * "FILENAME.INO" is already taken by another file.
2943                  */
2944                 while (ret == -EEXIST) {
2945                         /*
2946                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2947                          */
2948                         if (namelen + count_digits(rec->ino) + 1 >
2949                             BTRFS_NAME_LEN) {
2950                                 ret = -EFBIG;
2951                                 goto out;
2952                         }
2953                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2954                                  ".%llu", rec->ino);
2955                         namelen += count_digits(rec->ino) + 1;
2956                         ret = btrfs_add_link(trans, root, rec->ino,
2957                                              lost_found_ino, namebuf,
2958                                              namelen, type, NULL, 1);
2959                 }
2960                 if (ret < 0) {
2961                         fprintf(stderr,
2962                                 "Failed to link the inode %llu to %s dir: %s\n",
2963                                 rec->ino, dir_name, strerror(-ret));
2964                         goto out;
2965                 }
2966                 /*
2967                  * Just increase the found_link, don't actually add the
2968                  * backref. This will make things easier and this inode
2969                  * record will be freed after the repair is done.
2970                  * So fsck will not report problem about this inode.
2971                  */
2972                 rec->found_link++;
2973                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2974                        namelen, namebuf, dir_name);
2975         }
2976         printf("Fixed the nlink of inode %llu\n", rec->ino);
2977 out:
2978         /*
2979          * Clear the flag anyway, or we will loop forever for the same inode
2980          * as it will not be removed from the bad inode list and the dead loop
2981          * happens.
2982          */
2983         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2984         btrfs_release_path(path);
2985         return ret;
2986 }
2987
2988 /*
2989  * Check if there is any normal(reg or prealloc) file extent for given
2990  * ino.
2991  * This is used to determine the file type when neither its dir_index/item or
2992  * inode_item exists.
2993  *
2994  * This will *NOT* report error, if any error happens, just consider it does
2995  * not have any normal file extent.
2996  */
2997 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2998 {
2999         struct btrfs_path path;
3000         struct btrfs_key key;
3001         struct btrfs_key found_key;
3002         struct btrfs_file_extent_item *fi;
3003         u8 type;
3004         int ret = 0;
3005
3006         btrfs_init_path(&path);
3007         key.objectid = ino;
3008         key.type = BTRFS_EXTENT_DATA_KEY;
3009         key.offset = 0;
3010
3011         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3012         if (ret < 0) {
3013                 ret = 0;
3014                 goto out;
3015         }
3016         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3017                 ret = btrfs_next_leaf(root, &path);
3018                 if (ret) {
3019                         ret = 0;
3020                         goto out;
3021                 }
3022         }
3023         while (1) {
3024                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3025                                       path.slots[0]);
3026                 if (found_key.objectid != ino ||
3027                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3028                         break;
3029                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3030                                     struct btrfs_file_extent_item);
3031                 type = btrfs_file_extent_type(path.nodes[0], fi);
3032                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3033                         ret = 1;
3034                         goto out;
3035                 }
3036         }
3037 out:
3038         btrfs_release_path(&path);
3039         return ret;
3040 }
3041
3042 static u32 btrfs_type_to_imode(u8 type)
3043 {
3044         static u32 imode_by_btrfs_type[] = {
3045                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3046                 [BTRFS_FT_DIR]          = S_IFDIR,
3047                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3048                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3049                 [BTRFS_FT_FIFO]         = S_IFIFO,
3050                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3051                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3052         };
3053
3054         return imode_by_btrfs_type[(type)];
3055 }
3056
3057 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3058                                 struct btrfs_root *root,
3059                                 struct btrfs_path *path,
3060                                 struct inode_record *rec)
3061 {
3062         u8 filetype;
3063         u32 mode = 0700;
3064         int type_recovered = 0;
3065         int ret = 0;
3066
3067         printf("Trying to rebuild inode:%llu\n", rec->ino);
3068
3069         type_recovered = !find_file_type(rec, &filetype);
3070
3071         /*
3072          * Try to determine inode type if type not found.
3073          *
3074          * For found regular file extent, it must be FILE.
3075          * For found dir_item/index, it must be DIR.
3076          *
3077          * For undetermined one, use FILE as fallback.
3078          *
3079          * TODO:
3080          * 1. If found backref(inode_index/item is already handled) to it,
3081          *    it must be DIR.
3082          *    Need new inode-inode ref structure to allow search for that.
3083          */
3084         if (!type_recovered) {
3085                 if (rec->found_file_extent &&
3086                     find_normal_file_extent(root, rec->ino)) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_REG_FILE;
3089                 } else if (rec->found_dir_item) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_DIR;
3092                 } else if (!list_empty(&rec->orphan_extents)) {
3093                         type_recovered = 1;
3094                         filetype = BTRFS_FT_REG_FILE;
3095                 } else{
3096                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3097                                rec->ino);
3098                         type_recovered = 1;
3099                         filetype = BTRFS_FT_REG_FILE;
3100                 }
3101         }
3102
3103         ret = btrfs_new_inode(trans, root, rec->ino,
3104                               mode | btrfs_type_to_imode(filetype));
3105         if (ret < 0)
3106                 goto out;
3107
3108         /*
3109          * Here inode rebuild is done, we only rebuild the inode item,
3110          * don't repair the nlink(like move to lost+found).
3111          * That is the job of nlink repair.
3112          *
3113          * We just fill the record and return
3114          */
3115         rec->found_dir_item = 1;
3116         rec->imode = mode | btrfs_type_to_imode(filetype);
3117         rec->nlink = 0;
3118         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3119         /* Ensure the inode_nlinks repair function will be called */
3120         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3121 out:
3122         return ret;
3123 }
3124
3125 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3126                                       struct btrfs_root *root,
3127                                       struct btrfs_path *path,
3128                                       struct inode_record *rec)
3129 {
3130         struct orphan_data_extent *orphan;
3131         struct orphan_data_extent *tmp;
3132         int ret = 0;
3133
3134         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3135                 /*
3136                  * Check for conflicting file extents
3137                  *
3138                  * Here we don't know whether the extents is compressed or not,
3139                  * so we can only assume it not compressed nor data offset,
3140                  * and use its disk_len as extent length.
3141                  */
3142                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3143                                        orphan->offset, orphan->disk_len, 0);
3144                 btrfs_release_path(path);
3145                 if (ret < 0)
3146                         goto out;
3147                 if (!ret) {
3148                         fprintf(stderr,
3149                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3150                                 orphan->disk_bytenr, orphan->disk_len);
3151                         ret = btrfs_free_extent(trans,
3152                                         root->fs_info->extent_root,
3153                                         orphan->disk_bytenr, orphan->disk_len,
3154                                         0, root->objectid, orphan->objectid,
3155                                         orphan->offset);
3156                         if (ret < 0)
3157                                 goto out;
3158                 }
3159                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3160                                 orphan->offset, orphan->disk_bytenr,
3161                                 orphan->disk_len, orphan->disk_len);
3162                 if (ret < 0)
3163                         goto out;
3164
3165                 /* Update file size info */
3166                 rec->found_size += orphan->disk_len;
3167                 if (rec->found_size == rec->nbytes)
3168                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3169
3170                 /* Update the file extent hole info too */
3171                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3172                                            orphan->disk_len);
3173                 if (ret < 0)
3174                         goto out;
3175                 if (RB_EMPTY_ROOT(&rec->holes))
3176                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3177
3178                 list_del(&orphan->list);
3179                 free(orphan);
3180         }
3181         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3182 out:
3183         return ret;
3184 }
3185
3186 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3187                                         struct btrfs_root *root,
3188                                         struct btrfs_path *path,
3189                                         struct inode_record *rec)
3190 {
3191         struct rb_node *node;
3192         struct file_extent_hole *hole;
3193         int found = 0;
3194         int ret = 0;
3195
3196         node = rb_first(&rec->holes);
3197
3198         while (node) {
3199                 found = 1;
3200                 hole = rb_entry(node, struct file_extent_hole, node);
3201                 ret = btrfs_punch_hole(trans, root, rec->ino,
3202                                        hole->start, hole->len);
3203                 if (ret < 0)
3204                         goto out;
3205                 ret = del_file_extent_hole(&rec->holes, hole->start,
3206                                            hole->len);
3207                 if (ret < 0)
3208                         goto out;
3209                 if (RB_EMPTY_ROOT(&rec->holes))
3210                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3211                 node = rb_first(&rec->holes);
3212         }
3213         /* special case for a file losing all its file extent */
3214         if (!found) {
3215                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3216                                        round_up(rec->isize, root->sectorsize));
3217                 if (ret < 0)
3218                         goto out;
3219         }
3220         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3221                rec->ino, root->objectid);
3222 out:
3223         return ret;
3224 }
3225
3226 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3227 {
3228         struct btrfs_trans_handle *trans;
3229         struct btrfs_path path;
3230         int ret = 0;
3231
3232         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3233                              I_ERR_NO_ORPHAN_ITEM |
3234                              I_ERR_LINK_COUNT_WRONG |
3235                              I_ERR_NO_INODE_ITEM |
3236                              I_ERR_FILE_EXTENT_ORPHAN |
3237                              I_ERR_FILE_EXTENT_DISCOUNT|
3238                              I_ERR_FILE_NBYTES_WRONG)))
3239                 return rec->errors;
3240
3241         /*
3242          * For nlink repair, it may create a dir and add link, so
3243          * 2 for parent(256)'s dir_index and dir_item
3244          * 2 for lost+found dir's inode_item and inode_ref
3245          * 1 for the new inode_ref of the file
3246          * 2 for lost+found dir's dir_index and dir_item for the file
3247          */
3248         trans = btrfs_start_transaction(root, 7);
3249         if (IS_ERR(trans))
3250                 return PTR_ERR(trans);
3251
3252         btrfs_init_path(&path);
3253         if (rec->errors & I_ERR_NO_INODE_ITEM)
3254                 ret = repair_inode_no_item(trans, root, &path, rec);
3255         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3256                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3257         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3258                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3259         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3260                 ret = repair_inode_isize(trans, root, &path, rec);
3261         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3262                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3263         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3264                 ret = repair_inode_nlinks(trans, root, &path, rec);
3265         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3266                 ret = repair_inode_nbytes(trans, root, &path, rec);
3267         btrfs_commit_transaction(trans, root);
3268         btrfs_release_path(&path);
3269         return ret;
3270 }
3271
3272 static int check_inode_recs(struct btrfs_root *root,
3273                             struct cache_tree *inode_cache)
3274 {
3275         struct cache_extent *cache;
3276         struct ptr_node *node;
3277         struct inode_record *rec;
3278         struct inode_backref *backref;
3279         int stage = 0;
3280         int ret = 0;
3281         int err = 0;
3282         u64 error = 0;
3283         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3284
3285         if (btrfs_root_refs(&root->root_item) == 0) {
3286                 if (!cache_tree_empty(inode_cache))
3287                         fprintf(stderr, "warning line %d\n", __LINE__);
3288                 return 0;
3289         }
3290
3291         /*
3292          * We need to repair backrefs first because we could change some of the
3293          * errors in the inode recs.
3294          *
3295          * We also need to go through and delete invalid backrefs first and then
3296          * add the correct ones second.  We do this because we may get EEXIST
3297          * when adding back the correct index because we hadn't yet deleted the
3298          * invalid index.
3299          *
3300          * For example, if we were missing a dir index then the directories
3301          * isize would be wrong, so if we fixed the isize to what we thought it
3302          * would be and then fixed the backref we'd still have a invalid fs, so
3303          * we need to add back the dir index and then check to see if the isize
3304          * is still wrong.
3305          */
3306         while (stage < 3) {
3307                 stage++;
3308                 if (stage == 3 && !err)
3309                         break;
3310
3311                 cache = search_cache_extent(inode_cache, 0);
3312                 while (repair && cache) {
3313                         node = container_of(cache, struct ptr_node, cache);
3314                         rec = node->data;
3315                         cache = next_cache_extent(cache);
3316
3317                         /* Need to free everything up and rescan */
3318                         if (stage == 3) {
3319                                 remove_cache_extent(inode_cache, &node->cache);
3320                                 free(node);
3321                                 free_inode_rec(rec);
3322                                 continue;
3323                         }
3324
3325                         if (list_empty(&rec->backrefs))
3326                                 continue;
3327
3328                         ret = repair_inode_backrefs(root, rec, inode_cache,
3329                                                     stage == 1);
3330                         if (ret < 0) {
3331                                 err = ret;
3332                                 stage = 2;
3333                                 break;
3334                         } if (ret > 0) {
3335                                 err = -EAGAIN;
3336                         }
3337                 }
3338         }
3339         if (err)
3340                 return err;
3341
3342         rec = get_inode_rec(inode_cache, root_dirid, 0);
3343         BUG_ON(IS_ERR(rec));
3344         if (rec) {
3345                 ret = check_root_dir(rec);
3346                 if (ret) {
3347                         fprintf(stderr, "root %llu root dir %llu error\n",
3348                                 (unsigned long long)root->root_key.objectid,
3349                                 (unsigned long long)root_dirid);
3350                         print_inode_error(root, rec);
3351                         error++;
3352                 }
3353         } else {
3354                 if (repair) {
3355                         struct btrfs_trans_handle *trans;
3356
3357                         trans = btrfs_start_transaction(root, 1);
3358                         if (IS_ERR(trans)) {
3359                                 err = PTR_ERR(trans);
3360                                 return err;
3361                         }
3362
3363                         fprintf(stderr,
3364                                 "root %llu missing its root dir, recreating\n",
3365                                 (unsigned long long)root->objectid);
3366
3367                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3368                         BUG_ON(ret);
3369
3370                         btrfs_commit_transaction(trans, root);
3371                         return -EAGAIN;
3372                 }
3373
3374                 fprintf(stderr, "root %llu root dir %llu not found\n",
3375                         (unsigned long long)root->root_key.objectid,
3376                         (unsigned long long)root_dirid);
3377         }
3378
3379         while (1) {
3380                 cache = search_cache_extent(inode_cache, 0);
3381                 if (!cache)
3382                         break;
3383                 node = container_of(cache, struct ptr_node, cache);
3384                 rec = node->data;
3385                 remove_cache_extent(inode_cache, &node->cache);
3386                 free(node);
3387                 if (rec->ino == root_dirid ||
3388                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3389                         free_inode_rec(rec);
3390                         continue;
3391                 }
3392
3393                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3394                         ret = check_orphan_item(root, rec->ino);
3395                         if (ret == 0)
3396                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3397                         if (can_free_inode_rec(rec)) {
3398                                 free_inode_rec(rec);
3399                                 continue;
3400                         }
3401                 }
3402
3403                 if (!rec->found_inode_item)
3404                         rec->errors |= I_ERR_NO_INODE_ITEM;
3405                 if (rec->found_link != rec->nlink)
3406                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3407                 if (repair) {
3408                         ret = try_repair_inode(root, rec);
3409                         if (ret == 0 && can_free_inode_rec(rec)) {
3410                                 free_inode_rec(rec);
3411                                 continue;
3412                         }
3413                         ret = 0;
3414                 }
3415
3416                 if (!(repair && ret == 0))
3417                         error++;
3418                 print_inode_error(root, rec);
3419                 list_for_each_entry(backref, &rec->backrefs, list) {
3420                         if (!backref->found_dir_item)
3421                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3422                         if (!backref->found_dir_index)
3423                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3424                         if (!backref->found_inode_ref)
3425                                 backref->errors |= REF_ERR_NO_INODE_REF;
3426                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3427                                 " namelen %u name %s filetype %d errors %x",
3428                                 (unsigned long long)backref->dir,
3429                                 (unsigned long long)backref->index,
3430                                 backref->namelen, backref->name,
3431                                 backref->filetype, backref->errors);
3432                         print_ref_error(backref->errors);
3433                 }
3434                 free_inode_rec(rec);
3435         }
3436         return (error > 0) ? -1 : 0;
3437 }
3438
3439 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3440                                         u64 objectid)
3441 {
3442         struct cache_extent *cache;
3443         struct root_record *rec = NULL;
3444         int ret;
3445
3446         cache = lookup_cache_extent(root_cache, objectid, 1);
3447         if (cache) {
3448                 rec = container_of(cache, struct root_record, cache);
3449         } else {
3450                 rec = calloc(1, sizeof(*rec));
3451                 if (!rec)
3452                         return ERR_PTR(-ENOMEM);
3453                 rec->objectid = objectid;
3454                 INIT_LIST_HEAD(&rec->backrefs);
3455                 rec->cache.start = objectid;
3456                 rec->cache.size = 1;
3457
3458                 ret = insert_cache_extent(root_cache, &rec->cache);
3459                 if (ret)
3460                         return ERR_PTR(-EEXIST);
3461         }
3462         return rec;
3463 }
3464
3465 static struct root_backref *get_root_backref(struct root_record *rec,
3466                                              u64 ref_root, u64 dir, u64 index,
3467                                              const char *name, int namelen)
3468 {
3469         struct root_backref *backref;
3470
3471         list_for_each_entry(backref, &rec->backrefs, list) {
3472                 if (backref->ref_root != ref_root || backref->dir != dir ||
3473                     backref->namelen != namelen)
3474                         continue;
3475                 if (memcmp(name, backref->name, namelen))
3476                         continue;
3477                 return backref;
3478         }
3479
3480         backref = calloc(1, sizeof(*backref) + namelen + 1);
3481         if (!backref)
3482                 return NULL;
3483         backref->ref_root = ref_root;
3484         backref->dir = dir;
3485         backref->index = index;
3486         backref->namelen = namelen;
3487         memcpy(backref->name, name, namelen);
3488         backref->name[namelen] = '\0';
3489         list_add_tail(&backref->list, &rec->backrefs);
3490         return backref;
3491 }
3492
3493 static void free_root_record(struct cache_extent *cache)
3494 {
3495         struct root_record *rec;
3496         struct root_backref *backref;
3497
3498         rec = container_of(cache, struct root_record, cache);
3499         while (!list_empty(&rec->backrefs)) {
3500                 backref = to_root_backref(rec->backrefs.next);
3501                 list_del(&backref->list);
3502                 free(backref);
3503         }
3504
3505         free(rec);
3506 }
3507
3508 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3509
3510 static int add_root_backref(struct cache_tree *root_cache,
3511                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3512                             const char *name, int namelen,
3513                             int item_type, int errors)
3514 {
3515         struct root_record *rec;
3516         struct root_backref *backref;
3517
3518         rec = get_root_rec(root_cache, root_id);
3519         BUG_ON(IS_ERR(rec));
3520         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3521         BUG_ON(!backref);
3522
3523         backref->errors |= errors;
3524
3525         if (item_type != BTRFS_DIR_ITEM_KEY) {
3526                 if (backref->found_dir_index || backref->found_back_ref ||
3527                     backref->found_forward_ref) {
3528                         if (backref->index != index)
3529                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3530                 } else {
3531                         backref->index = index;
3532                 }
3533         }
3534
3535         if (item_type == BTRFS_DIR_ITEM_KEY) {
3536                 if (backref->found_forward_ref)
3537                         rec->found_ref++;
3538                 backref->found_dir_item = 1;
3539         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3540                 backref->found_dir_index = 1;
3541         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3542                 if (backref->found_forward_ref)
3543                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3544                 else if (backref->found_dir_item)
3545                         rec->found_ref++;
3546                 backref->found_forward_ref = 1;
3547         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3548                 if (backref->found_back_ref)
3549                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3550                 backref->found_back_ref = 1;
3551         } else {
3552                 BUG_ON(1);
3553         }
3554
3555         if (backref->found_forward_ref && backref->found_dir_item)
3556                 backref->reachable = 1;
3557         return 0;
3558 }
3559
3560 static int merge_root_recs(struct btrfs_root *root,
3561                            struct cache_tree *src_cache,
3562                            struct cache_tree *dst_cache)
3563 {
3564         struct cache_extent *cache;
3565         struct ptr_node *node;
3566         struct inode_record *rec;
3567         struct inode_backref *backref;
3568         int ret = 0;
3569
3570         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3571                 free_inode_recs_tree(src_cache);
3572                 return 0;
3573         }
3574
3575         while (1) {
3576                 cache = search_cache_extent(src_cache, 0);
3577                 if (!cache)
3578                         break;
3579                 node = container_of(cache, struct ptr_node, cache);
3580                 rec = node->data;
3581                 remove_cache_extent(src_cache, &node->cache);
3582                 free(node);
3583
3584                 ret = is_child_root(root, root->objectid, rec->ino);
3585                 if (ret < 0)
3586                         break;
3587                 else if (ret == 0)
3588                         goto skip;
3589
3590                 list_for_each_entry(backref, &rec->backrefs, list) {
3591                         BUG_ON(backref->found_inode_ref);
3592                         if (backref->found_dir_item)
3593                                 add_root_backref(dst_cache, rec->ino,
3594                                         root->root_key.objectid, backref->dir,
3595                                         backref->index, backref->name,
3596                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3597                                         backref->errors);
3598                         if (backref->found_dir_index)
3599                                 add_root_backref(dst_cache, rec->ino,
3600                                         root->root_key.objectid, backref->dir,
3601                                         backref->index, backref->name,
3602                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3603                                         backref->errors);
3604                 }
3605 skip:
3606                 free_inode_rec(rec);
3607         }
3608         if (ret < 0)
3609                 return ret;
3610         return 0;
3611 }
3612
3613 static int check_root_refs(struct btrfs_root *root,
3614                            struct cache_tree *root_cache)
3615 {
3616         struct root_record *rec;
3617         struct root_record *ref_root;
3618         struct root_backref *backref;
3619         struct cache_extent *cache;
3620         int loop = 1;
3621         int ret;
3622         int error;
3623         int errors = 0;
3624
3625         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3626         BUG_ON(IS_ERR(rec));
3627         rec->found_ref = 1;
3628
3629         /* fixme: this can not detect circular references */
3630         while (loop) {
3631                 loop = 0;
3632                 cache = search_cache_extent(root_cache, 0);
3633                 while (1) {
3634                         if (!cache)
3635                                 break;
3636                         rec = container_of(cache, struct root_record, cache);
3637                         cache = next_cache_extent(cache);
3638
3639                         if (rec->found_ref == 0)
3640                                 continue;
3641
3642                         list_for_each_entry(backref, &rec->backrefs, list) {
3643                                 if (!backref->reachable)
3644                                         continue;
3645
3646                                 ref_root = get_root_rec(root_cache,
3647                                                         backref->ref_root);
3648                                 BUG_ON(IS_ERR(ref_root));
3649                                 if (ref_root->found_ref > 0)
3650                                         continue;
3651
3652                                 backref->reachable = 0;
3653                                 rec->found_ref--;
3654                                 if (rec->found_ref == 0)
3655                                         loop = 1;
3656                         }
3657                 }
3658         }
3659
3660         cache = search_cache_extent(root_cache, 0);
3661         while (1) {
3662                 if (!cache)
3663                         break;
3664                 rec = container_of(cache, struct root_record, cache);
3665                 cache = next_cache_extent(cache);
3666
3667                 if (rec->found_ref == 0 &&
3668                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3669                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3670                         ret = check_orphan_item(root->fs_info->tree_root,
3671                                                 rec->objectid);
3672                         if (ret == 0)
3673                                 continue;
3674
3675                         /*
3676                          * If we don't have a root item then we likely just have
3677                          * a dir item in a snapshot for this root but no actual
3678                          * ref key or anything so it's meaningless.
3679                          */
3680                         if (!rec->found_root_item)
3681                                 continue;
3682                         errors++;
3683                         fprintf(stderr, "fs tree %llu not referenced\n",
3684                                 (unsigned long long)rec->objectid);
3685                 }
3686
3687                 error = 0;
3688                 if (rec->found_ref > 0 && !rec->found_root_item)
3689                         error = 1;
3690                 list_for_each_entry(backref, &rec->backrefs, list) {
3691                         if (!backref->found_dir_item)
3692                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3693                         if (!backref->found_dir_index)
3694                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3695                         if (!backref->found_back_ref)
3696                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3697                         if (!backref->found_forward_ref)
3698                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3699                         if (backref->reachable && backref->errors)
3700                                 error = 1;
3701                 }
3702                 if (!error)
3703                         continue;
3704
3705                 errors++;
3706                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3707                         (unsigned long long)rec->objectid, rec->found_ref,
3708                          rec->found_root_item ? "" : "not found");
3709
3710                 list_for_each_entry(backref, &rec->backrefs, list) {
3711                         if (!backref->reachable)
3712                                 continue;
3713                         if (!backref->errors && rec->found_root_item)
3714                                 continue;
3715                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3716                                 " index %llu namelen %u name %s errors %x\n",
3717                                 (unsigned long long)backref->ref_root,
3718                                 (unsigned long long)backref->dir,
3719                                 (unsigned long long)backref->index,
3720                                 backref->namelen, backref->name,
3721                                 backref->errors);
3722                         print_ref_error(backref->errors);
3723                 }
3724         }
3725         return errors > 0 ? 1 : 0;
3726 }
3727
3728 static int process_root_ref(struct extent_buffer *eb, int slot,
3729                             struct btrfs_key *key,
3730                             struct cache_tree *root_cache)
3731 {
3732         u64 dirid;
3733         u64 index;
3734         u32 len;
3735         u32 name_len;
3736         struct btrfs_root_ref *ref;
3737         char namebuf[BTRFS_NAME_LEN];
3738         int error;
3739
3740         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3741
3742         dirid = btrfs_root_ref_dirid(eb, ref);
3743         index = btrfs_root_ref_sequence(eb, ref);
3744         name_len = btrfs_root_ref_name_len(eb, ref);
3745
3746         if (name_len <= BTRFS_NAME_LEN) {
3747                 len = name_len;
3748                 error = 0;
3749         } else {
3750                 len = BTRFS_NAME_LEN;
3751                 error = REF_ERR_NAME_TOO_LONG;
3752         }
3753         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3754
3755         if (key->type == BTRFS_ROOT_REF_KEY) {
3756                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         } else {
3759                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3760                                  index, namebuf, len, key->type, error);
3761         }
3762         return 0;
3763 }
3764
3765 static void free_corrupt_block(struct cache_extent *cache)
3766 {
3767         struct btrfs_corrupt_block *corrupt;
3768
3769         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3770         free(corrupt);
3771 }
3772
3773 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3774
3775 /*
3776  * Repair the btree of the given root.
3777  *
3778  * The fix is to remove the node key in corrupt_blocks cache_tree.
3779  * and rebalance the tree.
3780  * After the fix, the btree should be writeable.
3781  */
3782 static int repair_btree(struct btrfs_root *root,
3783                         struct cache_tree *corrupt_blocks)
3784 {
3785         struct btrfs_trans_handle *trans;
3786         struct btrfs_path path;
3787         struct btrfs_corrupt_block *corrupt;
3788         struct cache_extent *cache;
3789         struct btrfs_key key;
3790         u64 offset;
3791         int level;
3792         int ret = 0;
3793
3794         if (cache_tree_empty(corrupt_blocks))
3795                 return 0;
3796
3797         trans = btrfs_start_transaction(root, 1);
3798         if (IS_ERR(trans)) {
3799                 ret = PTR_ERR(trans);
3800                 fprintf(stderr, "Error starting transaction: %s\n",
3801                         strerror(-ret));
3802                 return ret;
3803         }
3804         btrfs_init_path(&path);
3805         cache = first_cache_extent(corrupt_blocks);
3806         while (cache) {
3807                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3808                                        cache);
3809                 level = corrupt->level;
3810                 path.lowest_level = level;
3811                 key.objectid = corrupt->key.objectid;
3812                 key.type = corrupt->key.type;
3813                 key.offset = corrupt->key.offset;
3814
3815                 /*
3816                  * Here we don't want to do any tree balance, since it may
3817                  * cause a balance with corrupted brother leaf/node,
3818                  * so ins_len set to 0 here.
3819                  * Balance will be done after all corrupt node/leaf is deleted.
3820                  */
3821                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3822                 if (ret < 0)
3823                         goto out;
3824                 offset = btrfs_node_blockptr(path.nodes[level],
3825                                              path.slots[level]);
3826
3827                 /* Remove the ptr */
3828                 ret = btrfs_del_ptr(trans, root, &path, level,
3829                                     path.slots[level]);
3830                 if (ret < 0)
3831                         goto out;
3832                 /*
3833                  * Remove the corresponding extent
3834                  * return value is not concerned.
3835                  */
3836                 btrfs_release_path(&path);
3837                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3838                                         0, root->root_key.objectid,
3839                                         level - 1, 0);
3840                 cache = next_cache_extent(cache);
3841         }
3842
3843         /* Balance the btree using btrfs_search_slot() */
3844         cache = first_cache_extent(corrupt_blocks);
3845         while (cache) {
3846                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3847                                        cache);
3848                 memcpy(&key, &corrupt->key, sizeof(key));
3849                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3850                 if (ret < 0)
3851                         goto out;
3852                 /* return will always >0 since it won't find the item */
3853                 ret = 0;
3854                 btrfs_release_path(&path);
3855                 cache = next_cache_extent(cache);
3856         }
3857 out:
3858         btrfs_commit_transaction(trans, root);
3859         btrfs_release_path(&path);
3860         return ret;
3861 }
3862
3863 static int check_fs_root(struct btrfs_root *root,
3864                          struct cache_tree *root_cache,
3865                          struct walk_control *wc)
3866 {
3867         int ret = 0;
3868         int err = 0;
3869         int wret;
3870         int level;
3871         struct btrfs_path path;
3872         struct shared_node root_node;
3873         struct root_record *rec;
3874         struct btrfs_root_item *root_item = &root->root_item;
3875         struct cache_tree corrupt_blocks;
3876         struct orphan_data_extent *orphan;
3877         struct orphan_data_extent *tmp;
3878         enum btrfs_tree_block_status status;
3879         struct node_refs nrefs;
3880
3881         /*
3882          * Reuse the corrupt_block cache tree to record corrupted tree block
3883          *
3884          * Unlike the usage in extent tree check, here we do it in a per
3885          * fs/subvol tree base.
3886          */
3887         cache_tree_init(&corrupt_blocks);
3888         root->fs_info->corrupt_blocks = &corrupt_blocks;
3889
3890         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3891                 rec = get_root_rec(root_cache, root->root_key.objectid);
3892                 BUG_ON(IS_ERR(rec));
3893                 if (btrfs_root_refs(root_item) > 0)
3894                         rec->found_root_item = 1;
3895         }
3896
3897         btrfs_init_path(&path);
3898         memset(&root_node, 0, sizeof(root_node));
3899         cache_tree_init(&root_node.root_cache);
3900         cache_tree_init(&root_node.inode_cache);
3901         memset(&nrefs, 0, sizeof(nrefs));
3902
3903         /* Move the orphan extent record to corresponding inode_record */
3904         list_for_each_entry_safe(orphan, tmp,
3905                                  &root->orphan_data_extents, list) {
3906                 struct inode_record *inode;
3907
3908                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3909                                       1);
3910                 BUG_ON(IS_ERR(inode));
3911                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3912                 list_move(&orphan->list, &inode->orphan_extents);
3913         }
3914
3915         level = btrfs_header_level(root->node);
3916         memset(wc->nodes, 0, sizeof(wc->nodes));
3917         wc->nodes[level] = &root_node;
3918         wc->active_node = level;
3919         wc->root_level = level;
3920
3921         /* We may not have checked the root block, lets do that now */
3922         if (btrfs_is_leaf(root->node))
3923                 status = btrfs_check_leaf(root, NULL, root->node);
3924         else
3925                 status = btrfs_check_node(root, NULL, root->node);
3926         if (status != BTRFS_TREE_BLOCK_CLEAN)
3927                 return -EIO;
3928
3929         if (btrfs_root_refs(root_item) > 0 ||
3930             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3931                 path.nodes[level] = root->node;
3932                 extent_buffer_get(root->node);
3933                 path.slots[level] = 0;
3934         } else {
3935                 struct btrfs_key key;
3936                 struct btrfs_disk_key found_key;
3937
3938                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3939                 level = root_item->drop_level;
3940                 path.lowest_level = level;
3941                 if (level > btrfs_header_level(root->node) ||
3942                     level >= BTRFS_MAX_LEVEL) {
3943                         error("ignoring invalid drop level: %u", level);
3944                         goto skip_walking;
3945                 }
3946                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3947                 if (wret < 0)
3948                         goto skip_walking;
3949                 btrfs_node_key(path.nodes[level], &found_key,
3950                                 path.slots[level]);
3951                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3952                                         sizeof(found_key)));
3953         }
3954
3955         while (1) {
3956                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3957                 if (wret < 0)
3958                         ret = wret;
3959                 if (wret != 0)
3960                         break;
3961
3962                 wret = walk_up_tree(root, &path, wc, &level);
3963                 if (wret < 0)
3964                         ret = wret;
3965                 if (wret != 0)
3966                         break;
3967         }
3968 skip_walking:
3969         btrfs_release_path(&path);
3970
3971         if (!cache_tree_empty(&corrupt_blocks)) {
3972                 struct cache_extent *cache;
3973                 struct btrfs_corrupt_block *corrupt;
3974
3975                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3976                        root->root_key.objectid);
3977                 cache = first_cache_extent(&corrupt_blocks);
3978                 while (cache) {
3979                         corrupt = container_of(cache,
3980                                                struct btrfs_corrupt_block,
3981                                                cache);
3982                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3983                                cache->start, corrupt->level,
3984                                corrupt->key.objectid, corrupt->key.type,
3985                                corrupt->key.offset);
3986                         cache = next_cache_extent(cache);
3987                 }
3988                 if (repair) {
3989                         printf("Try to repair the btree for root %llu\n",
3990                                root->root_key.objectid);
3991                         ret = repair_btree(root, &corrupt_blocks);
3992                         if (ret < 0)
3993                                 fprintf(stderr, "Failed to repair btree: %s\n",
3994                                         strerror(-ret));
3995                         if (!ret)
3996                                 printf("Btree for root %llu is fixed\n",
3997                                        root->root_key.objectid);
3998                 }
3999         }
4000
4001         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4002         if (err < 0)
4003                 ret = err;
4004
4005         if (root_node.current) {
4006                 root_node.current->checked = 1;
4007                 maybe_free_inode_rec(&root_node.inode_cache,
4008                                 root_node.current);
4009         }
4010
4011         err = check_inode_recs(root, &root_node.inode_cache);
4012         if (!ret)
4013                 ret = err;
4014
4015         free_corrupt_blocks_tree(&corrupt_blocks);
4016         root->fs_info->corrupt_blocks = NULL;
4017         free_orphan_data_extents(&root->orphan_data_extents);
4018         return ret;
4019 }
4020
4021 static int fs_root_objectid(u64 objectid)
4022 {
4023         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4024             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4025                 return 1;
4026         return is_fstree(objectid);
4027 }
4028
4029 static int check_fs_roots(struct btrfs_root *root,
4030                           struct cache_tree *root_cache)
4031 {
4032         struct btrfs_path path;
4033         struct btrfs_key key;
4034         struct walk_control wc;
4035         struct extent_buffer *leaf, *tree_node;
4036         struct btrfs_root *tmp_root;
4037         struct btrfs_root *tree_root = root->fs_info->tree_root;
4038         int ret;
4039         int err = 0;
4040
4041         if (ctx.progress_enabled) {
4042                 ctx.tp = TASK_FS_ROOTS;
4043                 task_start(ctx.info);
4044         }
4045
4046         /*
4047          * Just in case we made any changes to the extent tree that weren't
4048          * reflected into the free space cache yet.
4049          */
4050         if (repair)
4051                 reset_cached_block_groups(root->fs_info);
4052         memset(&wc, 0, sizeof(wc));
4053         cache_tree_init(&wc.shared);
4054         btrfs_init_path(&path);
4055
4056 again:
4057         key.offset = 0;
4058         key.objectid = 0;
4059         key.type = BTRFS_ROOT_ITEM_KEY;
4060         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4061         if (ret < 0) {
4062                 err = 1;
4063                 goto out;
4064         }
4065         tree_node = tree_root->node;
4066         while (1) {
4067                 if (tree_node != tree_root->node) {
4068                         free_root_recs_tree(root_cache);
4069                         btrfs_release_path(&path);
4070                         goto again;
4071                 }
4072                 leaf = path.nodes[0];
4073                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4074                         ret = btrfs_next_leaf(tree_root, &path);
4075                         if (ret) {
4076                                 if (ret < 0)
4077                                         err = 1;
4078                                 break;
4079                         }
4080                         leaf = path.nodes[0];
4081                 }
4082                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4083                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4084                     fs_root_objectid(key.objectid)) {
4085                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4086                                 tmp_root = btrfs_read_fs_root_no_cache(
4087                                                 root->fs_info, &key);
4088                         } else {
4089                                 key.offset = (u64)-1;
4090                                 tmp_root = btrfs_read_fs_root(
4091                                                 root->fs_info, &key);
4092                         }
4093                         if (IS_ERR(tmp_root)) {
4094                                 err = 1;
4095                                 goto next;
4096                         }
4097                         ret = check_fs_root(tmp_root, root_cache, &wc);
4098                         if (ret == -EAGAIN) {
4099                                 free_root_recs_tree(root_cache);
4100                                 btrfs_release_path(&path);
4101                                 goto again;
4102                         }
4103                         if (ret)
4104                                 err = 1;
4105                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4106                                 btrfs_free_fs_root(tmp_root);
4107                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4108                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4109                         process_root_ref(leaf, path.slots[0], &key,
4110                                          root_cache);
4111                 }
4112 next:
4113                 path.slots[0]++;
4114         }
4115 out:
4116         btrfs_release_path(&path);
4117         if (err)
4118                 free_extent_cache_tree(&wc.shared);
4119         if (!cache_tree_empty(&wc.shared))
4120                 fprintf(stderr, "warning line %d\n", __LINE__);
4121
4122         task_stop(ctx.info);
4123
4124         return err;
4125 }
4126
4127 /*
4128  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4129  * INODE_REF/INODE_EXTREF match.
4130  *
4131  * @root:       the root of the fs/file tree
4132  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4133  * @key:        the key of the DIR_ITEM/DIR_INDEX
4134  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4135  *              distinguish root_dir between normal dir/file
4136  * @name:       the name in the INODE_REF/INODE_EXTREF
4137  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4138  * @mode:       the st_mode of INODE_ITEM
4139  *
4140  * Return 0 if no error occurred.
4141  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4142  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4143  * dir/file.
4144  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4145  * not match for normal dir/file.
4146  */
4147 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4148                          struct btrfs_key *key, u64 index, char *name,
4149                          u32 namelen, u32 mode)
4150 {
4151         struct btrfs_path path;
4152         struct extent_buffer *node;
4153         struct btrfs_dir_item *di;
4154         struct btrfs_key location;
4155         char namebuf[BTRFS_NAME_LEN] = {0};
4156         u32 total;
4157         u32 cur = 0;
4158         u32 len;
4159         u32 name_len;
4160         u32 data_len;
4161         u8 filetype;
4162         int slot;
4163         int ret;
4164
4165         btrfs_init_path(&path);
4166         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4167         if (ret < 0) {
4168                 ret = DIR_ITEM_MISSING;
4169                 goto out;
4170         }
4171
4172         /* Process root dir and goto out*/
4173         if (index == 0) {
4174                 if (ret == 0) {
4175                         ret = ROOT_DIR_ERROR;
4176                         error(
4177                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4178                                 root->objectid,
4179                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4180                                         "REF" : "EXTREF",
4181                                 ref_key->objectid, ref_key->offset,
4182                                 key->type == BTRFS_DIR_ITEM_KEY ?
4183                                         "DIR_ITEM" : "DIR_INDEX");
4184                 } else {
4185                         ret = 0;
4186                 }
4187
4188                 goto out;
4189         }
4190
4191         /* Process normal file/dir */
4192         if (ret > 0) {
4193                 ret = DIR_ITEM_MISSING;
4194                 error(
4195                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4196                         root->objectid,
4197                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4198                         ref_key->objectid, ref_key->offset,
4199                         key->type == BTRFS_DIR_ITEM_KEY ?
4200                                 "DIR_ITEM" : "DIR_INDEX",
4201                         key->objectid, key->offset, namelen, name,
4202                         imode_to_type(mode));
4203                 goto out;
4204         }
4205
4206         /* Check whether inode_id/filetype/name match */
4207         node = path.nodes[0];
4208         slot = path.slots[0];
4209         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4210         total = btrfs_item_size_nr(node, slot);
4211         while (cur < total) {
4212                 ret = DIR_ITEM_MISMATCH;
4213                 name_len = btrfs_dir_name_len(node, di);
4214                 data_len = btrfs_dir_data_len(node, di);
4215
4216                 btrfs_dir_item_key_to_cpu(node, di, &location);
4217                 if (location.objectid != ref_key->objectid ||
4218                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4219                     location.offset != 0)
4220                         goto next;
4221
4222                 filetype = btrfs_dir_type(node, di);
4223                 if (imode_to_type(mode) != filetype)
4224                         goto next;
4225
4226                 if (name_len <= BTRFS_NAME_LEN) {
4227                         len = name_len;
4228                 } else {
4229                         len = BTRFS_NAME_LEN;
4230                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4231                         root->objectid,
4232                         key->type == BTRFS_DIR_ITEM_KEY ?
4233                         "DIR_ITEM" : "DIR_INDEX",
4234                         key->objectid, key->offset, name_len);
4235                 }
4236                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4237                 if (len != namelen || strncmp(namebuf, name, len))
4238                         goto next;
4239
4240                 ret = 0;
4241                 goto out;
4242 next:
4243                 len = sizeof(*di) + name_len + data_len;
4244                 di = (struct btrfs_dir_item *)((char *)di + len);
4245                 cur += len;
4246         }
4247         if (ret == DIR_ITEM_MISMATCH)
4248                 error(
4249                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4250                         root->objectid,
4251                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4252                         ref_key->objectid, ref_key->offset,
4253                         key->type == BTRFS_DIR_ITEM_KEY ?
4254                                 "DIR_ITEM" : "DIR_INDEX",
4255                         key->objectid, key->offset, namelen, name,
4256                         imode_to_type(mode));
4257 out:
4258         btrfs_release_path(&path);
4259         return ret;
4260 }
4261
4262 /*
4263  * Traverse the given INODE_REF and call find_dir_item() to find related
4264  * DIR_ITEM/DIR_INDEX.
4265  *
4266  * @root:       the root of the fs/file tree
4267  * @ref_key:    the key of the INODE_REF
4268  * @refs:       the count of INODE_REF
4269  * @mode:       the st_mode of INODE_ITEM
4270  *
4271  * Return 0 if no error occurred.
4272  */
4273 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4274                            struct extent_buffer *node, int slot, u64 *refs,
4275                            int mode)
4276 {
4277         struct btrfs_key key;
4278         struct btrfs_inode_ref *ref;
4279         char namebuf[BTRFS_NAME_LEN] = {0};
4280         u32 total;
4281         u32 cur = 0;
4282         u32 len;
4283         u32 name_len;
4284         u64 index;
4285         int ret, err = 0;
4286
4287         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4288         total = btrfs_item_size_nr(node, slot);
4289
4290 next:
4291         /* Update inode ref count */
4292         (*refs)++;
4293
4294         index = btrfs_inode_ref_index(node, ref);
4295         name_len = btrfs_inode_ref_name_len(node, ref);
4296         if (name_len <= BTRFS_NAME_LEN) {
4297                 len = name_len;
4298         } else {
4299                 len = BTRFS_NAME_LEN;
4300                 warning("root %llu INODE_REF[%llu %llu] name too long",
4301                         root->objectid, ref_key->objectid, ref_key->offset);
4302         }
4303
4304         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4305
4306         /* Check root dir ref name */
4307         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4308                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4309                       root->objectid, ref_key->objectid, ref_key->offset,
4310                       namebuf);
4311                 err |= ROOT_DIR_ERROR;
4312         }
4313
4314         /* Find related DIR_INDEX */
4315         key.objectid = ref_key->offset;
4316         key.type = BTRFS_DIR_INDEX_KEY;
4317         key.offset = index;
4318         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4319         err |= ret;
4320
4321         /* Find related dir_item */
4322         key.objectid = ref_key->offset;
4323         key.type = BTRFS_DIR_ITEM_KEY;
4324         key.offset = btrfs_name_hash(namebuf, len);
4325         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4326         err |= ret;
4327
4328         len = sizeof(*ref) + name_len;
4329         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4330         cur += len;
4331         if (cur < total)
4332                 goto next;
4333
4334         return err;
4335 }
4336
4337 /*
4338  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4339  * DIR_ITEM/DIR_INDEX.
4340  *
4341  * @root:       the root of the fs/file tree
4342  * @ref_key:    the key of the INODE_EXTREF
4343  * @refs:       the count of INODE_EXTREF
4344  * @mode:       the st_mode of INODE_ITEM
4345  *
4346  * Return 0 if no error occurred.
4347  */
4348 static int check_inode_extref(struct btrfs_root *root,
4349                               struct btrfs_key *ref_key,
4350                               struct extent_buffer *node, int slot, u64 *refs,
4351                               int mode)
4352 {
4353         struct btrfs_key key;
4354         struct btrfs_inode_extref *extref;
4355         char namebuf[BTRFS_NAME_LEN] = {0};
4356         u32 total;
4357         u32 cur = 0;
4358         u32 len;
4359         u32 name_len;
4360         u64 index;
4361         u64 parent;
4362         int ret;
4363         int err = 0;
4364
4365         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4366         total = btrfs_item_size_nr(node, slot);
4367
4368 next:
4369         /* update inode ref count */
4370         (*refs)++;
4371         name_len = btrfs_inode_extref_name_len(node, extref);
4372         index = btrfs_inode_extref_index(node, extref);
4373         parent = btrfs_inode_extref_parent(node, extref);
4374         if (name_len <= BTRFS_NAME_LEN) {
4375                 len = name_len;
4376         } else {
4377                 len = BTRFS_NAME_LEN;
4378                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4379                         root->objectid, ref_key->objectid, ref_key->offset);
4380         }
4381         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4382
4383         /* Check root dir ref name */
4384         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4385                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4386                       root->objectid, ref_key->objectid, ref_key->offset,
4387                       namebuf);
4388                 err |= ROOT_DIR_ERROR;
4389         }
4390
4391         /* find related dir_index */
4392         key.objectid = parent;
4393         key.type = BTRFS_DIR_INDEX_KEY;
4394         key.offset = index;
4395         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4396         err |= ret;
4397
4398         /* find related dir_item */
4399         key.objectid = parent;
4400         key.type = BTRFS_DIR_ITEM_KEY;
4401         key.offset = btrfs_name_hash(namebuf, len);
4402         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4403         err |= ret;
4404
4405         len = sizeof(*extref) + name_len;
4406         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4407         cur += len;
4408
4409         if (cur < total)
4410                 goto next;
4411
4412         return err;
4413 }
4414
4415 /*
4416  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4417  * DIR_ITEM/DIR_INDEX match.
4418  *
4419  * @root:       the root of the fs/file tree
4420  * @key:        the key of the INODE_REF/INODE_EXTREF
4421  * @name:       the name in the INODE_REF/INODE_EXTREF
4422  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4423  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4424  * to (u64)-1
4425  * @ext_ref:    the EXTENDED_IREF feature
4426  *
4427  * Return 0 if no error occurred.
4428  * Return >0 for error bitmap
4429  */
4430 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4431                           char *name, int namelen, u64 index,
4432                           unsigned int ext_ref)
4433 {
4434         struct btrfs_path path;
4435         struct btrfs_inode_ref *ref;
4436         struct btrfs_inode_extref *extref;
4437         struct extent_buffer *node;
4438         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4439         u32 total;
4440         u32 cur = 0;
4441         u32 len;
4442         u32 ref_namelen;
4443         u64 ref_index;
4444         u64 parent;
4445         u64 dir_id;
4446         int slot;
4447         int ret;
4448
4449         btrfs_init_path(&path);
4450         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4451         if (ret) {
4452                 ret = INODE_REF_MISSING;
4453                 goto extref;
4454         }
4455
4456         node = path.nodes[0];
4457         slot = path.slots[0];
4458
4459         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4460         total = btrfs_item_size_nr(node, slot);
4461
4462         /* Iterate all entry of INODE_REF */
4463         while (cur < total) {
4464                 ret = INODE_REF_MISSING;
4465
4466                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4467                 ref_index = btrfs_inode_ref_index(node, ref);
4468                 if (index != (u64)-1 && index != ref_index)
4469                         goto next_ref;
4470
4471                 if (ref_namelen <= BTRFS_NAME_LEN) {
4472                         len = ref_namelen;
4473                 } else {
4474                         len = BTRFS_NAME_LEN;
4475                         warning("root %llu INODE %s[%llu %llu] name too long",
4476                                 root->objectid,
4477                                 key->type == BTRFS_INODE_REF_KEY ?
4478                                         "REF" : "EXTREF",
4479                                 key->objectid, key->offset);
4480                 }
4481                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4482                                    len);
4483
4484                 if (len != namelen || strncmp(ref_namebuf, name, len))
4485                         goto next_ref;
4486
4487                 ret = 0;
4488                 goto out;
4489 next_ref:
4490                 len = sizeof(*ref) + ref_namelen;
4491                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4492                 cur += len;
4493         }
4494
4495 extref:
4496         /* Skip if not support EXTENDED_IREF feature */
4497         if (!ext_ref)
4498                 goto out;
4499
4500         btrfs_release_path(&path);
4501         btrfs_init_path(&path);
4502
4503         dir_id = key->offset;
4504         key->type = BTRFS_INODE_EXTREF_KEY;
4505         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4506
4507         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4508         if (ret) {
4509                 ret = INODE_REF_MISSING;
4510                 goto out;
4511         }
4512
4513         node = path.nodes[0];
4514         slot = path.slots[0];
4515
4516         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4517         cur = 0;
4518         total = btrfs_item_size_nr(node, slot);
4519
4520         /* Iterate all entry of INODE_EXTREF */
4521         while (cur < total) {
4522                 ret = INODE_REF_MISSING;
4523
4524                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4525                 ref_index = btrfs_inode_extref_index(node, extref);
4526                 parent = btrfs_inode_extref_parent(node, extref);
4527                 if (index != (u64)-1 && index != ref_index)
4528                         goto next_extref;
4529
4530                 if (parent != dir_id)
4531                         goto next_extref;
4532
4533                 if (ref_namelen <= BTRFS_NAME_LEN) {
4534                         len = ref_namelen;
4535                 } else {
4536                         len = BTRFS_NAME_LEN;
4537                         warning("root %llu INODE %s[%llu %llu] name too long",
4538                                 root->objectid,
4539                                 key->type == BTRFS_INODE_REF_KEY ?
4540                                         "REF" : "EXTREF",
4541                                 key->objectid, key->offset);
4542                 }
4543                 read_extent_buffer(node, ref_namebuf,
4544                                    (unsigned long)(extref + 1), len);
4545
4546                 if (len != namelen || strncmp(ref_namebuf, name, len))
4547                         goto next_extref;
4548
4549                 ret = 0;
4550                 goto out;
4551
4552 next_extref:
4553                 len = sizeof(*extref) + ref_namelen;
4554                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4555                 cur += len;
4556
4557         }
4558 out:
4559         btrfs_release_path(&path);
4560         return ret;
4561 }
4562
4563 /*
4564  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4565  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4566  *
4567  * @root:       the root of the fs/file tree
4568  * @key:        the key of the INODE_REF/INODE_EXTREF
4569  * @size:       the st_size of the INODE_ITEM
4570  * @ext_ref:    the EXTENDED_IREF feature
4571  *
4572  * Return 0 if no error occurred.
4573  */
4574 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4575                           struct extent_buffer *node, int slot, u64 *size,
4576                           unsigned int ext_ref)
4577 {
4578         struct btrfs_dir_item *di;
4579         struct btrfs_inode_item *ii;
4580         struct btrfs_path path;
4581         struct btrfs_key location;
4582         char namebuf[BTRFS_NAME_LEN] = {0};
4583         u32 total;
4584         u32 cur = 0;
4585         u32 len;
4586         u32 name_len;
4587         u32 data_len;
4588         u8 filetype;
4589         u32 mode;
4590         u64 index;
4591         int ret;
4592         int err = 0;
4593
4594         /*
4595          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4596          * ignore index check.
4597          */
4598         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4599
4600         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4601         total = btrfs_item_size_nr(node, slot);
4602
4603         while (cur < total) {
4604                 data_len = btrfs_dir_data_len(node, di);
4605                 if (data_len)
4606                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4607                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4608                               "DIR_ITEM" : "DIR_INDEX",
4609                               key->objectid, key->offset, data_len);
4610
4611                 name_len = btrfs_dir_name_len(node, di);
4612                 if (name_len <= BTRFS_NAME_LEN) {
4613                         len = name_len;
4614                 } else {
4615                         len = BTRFS_NAME_LEN;
4616                         warning("root %llu %s[%llu %llu] name too long",
4617                                 root->objectid,
4618                                 key->type == BTRFS_DIR_ITEM_KEY ?
4619                                 "DIR_ITEM" : "DIR_INDEX",
4620                                 key->objectid, key->offset);
4621                 }
4622                 (*size) += name_len;
4623
4624                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4625                 filetype = btrfs_dir_type(node, di);
4626
4627                 btrfs_init_path(&path);
4628                 btrfs_dir_item_key_to_cpu(node, di, &location);
4629
4630                 /* Ignore related ROOT_ITEM check */
4631                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4632                         goto next;
4633
4634                 /* Check relative INODE_ITEM(existence/filetype) */
4635                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4636                 if (ret) {
4637                         err |= INODE_ITEM_MISSING;
4638                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4639                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4640                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4641                               key->offset, location.objectid, name_len,
4642                               namebuf, filetype);
4643                         goto next;
4644                 }
4645
4646                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4647                                     struct btrfs_inode_item);
4648                 mode = btrfs_inode_mode(path.nodes[0], ii);
4649
4650                 if (imode_to_type(mode) != filetype) {
4651                         err |= INODE_ITEM_MISMATCH;
4652                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4653                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4654                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4655                               key->offset, name_len, namebuf, filetype);
4656                 }
4657
4658                 /* Check relative INODE_REF/INODE_EXTREF */
4659                 location.type = BTRFS_INODE_REF_KEY;
4660                 location.offset = key->objectid;
4661                 ret = find_inode_ref(root, &location, namebuf, len,
4662                                        index, ext_ref);
4663                 err |= ret;
4664                 if (ret & INODE_REF_MISSING)
4665                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4666                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4667                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4668                               key->offset, name_len, namebuf, filetype);
4669
4670 next:
4671                 btrfs_release_path(&path);
4672                 len = sizeof(*di) + name_len + data_len;
4673                 di = (struct btrfs_dir_item *)((char *)di + len);
4674                 cur += len;
4675
4676                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4677                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4678                               root->objectid, key->objectid, key->offset);
4679                         break;
4680                 }
4681         }
4682
4683         return err;
4684 }
4685
4686 /*
4687  * Check file extent datasum/hole, update the size of the file extents,
4688  * check and update the last offset of the file extent.
4689  *
4690  * @root:       the root of fs/file tree.
4691  * @fkey:       the key of the file extent.
4692  * @nodatasum:  INODE_NODATASUM feature.
4693  * @size:       the sum of all EXTENT_DATA items size for this inode.
4694  * @end:        the offset of the last extent.
4695  *
4696  * Return 0 if no error occurred.
4697  */
4698 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4699                              struct extent_buffer *node, int slot,
4700                              unsigned int nodatasum, u64 *size, u64 *end)
4701 {
4702         struct btrfs_file_extent_item *fi;
4703         u64 disk_bytenr;
4704         u64 disk_num_bytes;
4705         u64 extent_num_bytes;
4706         u64 found;
4707         unsigned int extent_type;
4708         unsigned int is_hole;
4709         int ret;
4710         int err = 0;
4711
4712         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4713
4714         extent_type = btrfs_file_extent_type(node, fi);
4715         /* Skip if file extent is inline */
4716         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717                 struct btrfs_item *e = btrfs_item_nr(slot);
4718                 u32 item_inline_len;
4719
4720                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722                 if (extent_num_bytes == 0 ||
4723                     extent_num_bytes != item_inline_len)
4724                         err |= FILE_EXTENT_ERROR;
4725                 *size += extent_num_bytes;
4726                 return err;
4727         }
4728
4729         /* Check extent type */
4730         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4731                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4732                 err |= FILE_EXTENT_ERROR;
4733                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4734                       root->objectid, fkey->objectid, fkey->offset);
4735                 return err;
4736         }
4737
4738         /* Check REG_EXTENT/PREALLOC_EXTENT */
4739         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4740         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4741         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4742         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4743
4744         /* Check EXTENT_DATA datasum */
4745         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4746         if (found > 0 && nodatasum) {
4747                 err |= ODD_CSUM_ITEM;
4748                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4749                       root->objectid, fkey->objectid, fkey->offset);
4750         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4751                    !is_hole &&
4752                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4753                 err |= CSUM_ITEM_MISSING;
4754                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4757                 err |= ODD_CSUM_ITEM;
4758                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4759                       root->objectid, fkey->objectid, fkey->offset);
4760         }
4761
4762         /* Check EXTENT_DATA hole */
4763         if (no_holes && is_hole) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         } else if (!no_holes && *end != fkey->offset) {
4768                 err |= FILE_EXTENT_ERROR;
4769                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4770                       root->objectid, fkey->objectid, fkey->offset);
4771         }
4772
4773         *end += extent_num_bytes;
4774         if (!is_hole)
4775                 *size += extent_num_bytes;
4776
4777         return err;
4778 }
4779
4780 /*
4781  * Check INODE_ITEM and related ITEMs (the same inode number)
4782  * 1. check link count
4783  * 2. check inode ref/extref
4784  * 3. check dir item/index
4785  *
4786  * @ext_ref:    the EXTENDED_IREF feature
4787  *
4788  * Return 0 if no error occurred.
4789  * Return >0 for error or hit the traversal is done(by error bitmap)
4790  */
4791 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4792                             unsigned int ext_ref)
4793 {
4794         struct extent_buffer *node;
4795         struct btrfs_inode_item *ii;
4796         struct btrfs_key key;
4797         u64 inode_id;
4798         u32 mode;
4799         u64 nlink;
4800         u64 nbytes;
4801         u64 isize;
4802         u64 size = 0;
4803         u64 refs = 0;
4804         u64 extent_end = 0;
4805         u64 extent_size = 0;
4806         unsigned int dir;
4807         unsigned int nodatasum;
4808         int slot;
4809         int ret;
4810         int err = 0;
4811
4812         node = path->nodes[0];
4813         slot = path->slots[0];
4814
4815         btrfs_item_key_to_cpu(node, &key, slot);
4816         inode_id = key.objectid;
4817
4818         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4819                 ret = btrfs_next_item(root, path);
4820                 if (ret > 0)
4821                         err |= LAST_ITEM;
4822                 return err;
4823         }
4824
4825         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4826         isize = btrfs_inode_size(node, ii);
4827         nbytes = btrfs_inode_nbytes(node, ii);
4828         mode = btrfs_inode_mode(node, ii);
4829         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4830         nlink = btrfs_inode_nlink(node, ii);
4831         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4832
4833         while (1) {
4834                 ret = btrfs_next_item(root, path);
4835                 if (ret < 0) {
4836                         /* out will fill 'err' rusing current statistics */
4837                         goto out;
4838                 } else if (ret > 0) {
4839                         err |= LAST_ITEM;
4840                         goto out;
4841                 }
4842
4843                 node = path->nodes[0];
4844                 slot = path->slots[0];
4845                 btrfs_item_key_to_cpu(node, &key, slot);
4846                 if (key.objectid != inode_id)
4847                         goto out;
4848
4849                 switch (key.type) {
4850                 case BTRFS_INODE_REF_KEY:
4851                         ret = check_inode_ref(root, &key, node, slot, &refs,
4852                                               mode);
4853                         err |= ret;
4854                         break;
4855                 case BTRFS_INODE_EXTREF_KEY:
4856                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4857                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4858                                         root->objectid, key.objectid,
4859                                         key.offset);
4860                         ret = check_inode_extref(root, &key, node, slot, &refs,
4861                                                  mode);
4862                         err |= ret;
4863                         break;
4864                 case BTRFS_DIR_ITEM_KEY:
4865                 case BTRFS_DIR_INDEX_KEY:
4866                         if (!dir) {
4867                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4868                                         root->objectid, inode_id,
4869                                         imode_to_type(mode), key.objectid,
4870                                         key.offset);
4871                         }
4872                         ret = check_dir_item(root, &key, node, slot, &size,
4873                                              ext_ref);
4874                         err |= ret;
4875                         break;
4876                 case BTRFS_EXTENT_DATA_KEY:
4877                         if (dir) {
4878                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4879                                         root->objectid, inode_id, key.objectid,
4880                                         key.offset);
4881                         }
4882                         ret = check_file_extent(root, &key, node, slot,
4883                                                 nodatasum, &extent_size,
4884                                                 &extent_end);
4885                         err |= ret;
4886                         break;
4887                 case BTRFS_XATTR_ITEM_KEY:
4888                         break;
4889                 default:
4890                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4891                               key.objectid, key.type, key.offset);
4892                 }
4893         }
4894
4895 out:
4896         /* verify INODE_ITEM nlink/isize/nbytes */
4897         if (dir) {
4898                 if (nlink != 1) {
4899                         err |= LINK_COUNT_ERROR;
4900                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4901                               root->objectid, inode_id, nlink);
4902                 }
4903
4904                 /*
4905                  * Just a warning, as dir inode nbytes is just an
4906                  * instructive value.
4907                  */
4908                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4909                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4910                                 root->objectid, inode_id, root->nodesize);
4911                 }
4912
4913                 if (isize != size) {
4914                         err |= ISIZE_ERROR;
4915                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4916                               root->objectid, inode_id, isize, size);
4917                 }
4918         } else {
4919                 if (nlink != refs) {
4920                         err |= LINK_COUNT_ERROR;
4921                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4922                               root->objectid, inode_id, nlink, refs);
4923                 } else if (!nlink) {
4924                         err |= ORPHAN_ITEM;
4925                 }
4926
4927                 if (!nbytes && !no_holes && extent_end < isize) {
4928                         err |= NBYTES_ERROR;
4929                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4930                               root->objectid, inode_id, isize);
4931                 }
4932
4933                 if (nbytes != extent_size) {
4934                         err |= NBYTES_ERROR;
4935                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4936                               root->objectid, inode_id, nbytes, extent_size);
4937                 }
4938         }
4939
4940         return err;
4941 }
4942
4943 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4944 {
4945         struct btrfs_path path;
4946         struct btrfs_key key;
4947         int err = 0;
4948         int ret;
4949
4950         btrfs_init_path(&path);
4951         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4952         key.type = BTRFS_INODE_ITEM_KEY;
4953         key.offset = 0;
4954
4955         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4956         if (ret < 0)
4957                 goto out;
4958         if (ret > 0) {
4959                 ret = 0;
4960                 err |= INODE_ITEM_MISSING;
4961         }
4962
4963         err |= check_inode_item(root, &path, ext_ref);
4964         err &= ~LAST_ITEM;
4965         if (err && !ret)
4966                 ret = -EIO;
4967 out:
4968         btrfs_release_path(&path);
4969         return ret;
4970 }
4971
4972 /*
4973  * Iterate all item on the tree and call check_inode_item() to check.
4974  *
4975  * @root:       the root of the tree to be checked.
4976  * @ext_ref:    the EXTENDED_IREF feature
4977  *
4978  * Return 0 if no error found.
4979  * Return <0 for error.
4980  */
4981 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4982 {
4983         struct btrfs_path path;
4984         struct node_refs nrefs;
4985         struct btrfs_root_item *root_item = &root->root_item;
4986         int ret, wret;
4987         int level;
4988
4989         /*
4990          * We need to manually check the first inode item(256)
4991          * As the following traversal function will only start from
4992          * the first inode item in the leaf, if inode item(256) is missing
4993          * we will just skip it forever.
4994          */
4995         ret = check_fs_first_inode(root, ext_ref);
4996         if (ret < 0)
4997                 return ret;
4998
4999         memset(&nrefs, 0, sizeof(nrefs));
5000         level = btrfs_header_level(root->node);
5001         btrfs_init_path(&path);
5002
5003         if (btrfs_root_refs(root_item) > 0 ||
5004             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5005                 path.nodes[level] = root->node;
5006                 path.slots[level] = 0;
5007                 extent_buffer_get(root->node);
5008         } else {
5009                 struct btrfs_key key;
5010
5011                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5012                 level = root_item->drop_level;
5013                 path.lowest_level = level;
5014                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5015                 if (ret < 0)
5016                         goto out;
5017                 ret = 0;
5018         }
5019
5020         while (1) {
5021                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5022                 if (wret < 0)
5023                         ret = wret;
5024                 if (wret != 0)
5025                         break;
5026
5027                 wret = walk_up_tree_v2(root, &path, &level);
5028                 if (wret < 0)
5029                         ret = wret;
5030                 if (wret != 0)
5031                         break;
5032         }
5033
5034 out:
5035         btrfs_release_path(&path);
5036         return ret;
5037 }
5038
5039 /*
5040  * Find the relative ref for root_ref and root_backref.
5041  *
5042  * @root:       the root of the root tree.
5043  * @ref_key:    the key of the root ref.
5044  *
5045  * Return 0 if no error occurred.
5046  */
5047 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5048                           struct extent_buffer *node, int slot)
5049 {
5050         struct btrfs_path path;
5051         struct btrfs_key key;
5052         struct btrfs_root_ref *ref;
5053         struct btrfs_root_ref *backref;
5054         char ref_name[BTRFS_NAME_LEN] = {0};
5055         char backref_name[BTRFS_NAME_LEN] = {0};
5056         u64 ref_dirid;
5057         u64 ref_seq;
5058         u32 ref_namelen;
5059         u64 backref_dirid;
5060         u64 backref_seq;
5061         u32 backref_namelen;
5062         u32 len;
5063         int ret;
5064         int err = 0;
5065
5066         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5067         ref_dirid = btrfs_root_ref_dirid(node, ref);
5068         ref_seq = btrfs_root_ref_sequence(node, ref);
5069         ref_namelen = btrfs_root_ref_name_len(node, ref);
5070
5071         if (ref_namelen <= BTRFS_NAME_LEN) {
5072                 len = ref_namelen;
5073         } else {
5074                 len = BTRFS_NAME_LEN;
5075                 warning("%s[%llu %llu] ref_name too long",
5076                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5077                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5078                         ref_key->offset);
5079         }
5080         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5081
5082         /* Find relative root_ref */
5083         key.objectid = ref_key->offset;
5084         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5085         key.offset = ref_key->objectid;
5086
5087         btrfs_init_path(&path);
5088         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5089         if (ret) {
5090                 err |= ROOT_REF_MISSING;
5091                 error("%s[%llu %llu] couldn't find relative ref",
5092                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5093                       "ROOT_REF" : "ROOT_BACKREF",
5094                       ref_key->objectid, ref_key->offset);
5095                 goto out;
5096         }
5097
5098         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5099                                  struct btrfs_root_ref);
5100         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5101         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5102         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5103
5104         if (backref_namelen <= BTRFS_NAME_LEN) {
5105                 len = backref_namelen;
5106         } else {
5107                 len = BTRFS_NAME_LEN;
5108                 warning("%s[%llu %llu] ref_name too long",
5109                         key.type == BTRFS_ROOT_REF_KEY ?
5110                         "ROOT_REF" : "ROOT_BACKREF",
5111                         key.objectid, key.offset);
5112         }
5113         read_extent_buffer(path.nodes[0], backref_name,
5114                            (unsigned long)(backref + 1), len);
5115
5116         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5117             ref_namelen != backref_namelen ||
5118             strncmp(ref_name, backref_name, len)) {
5119                 err |= ROOT_REF_MISMATCH;
5120                 error("%s[%llu %llu] mismatch relative ref",
5121                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5122                       "ROOT_REF" : "ROOT_BACKREF",
5123                       ref_key->objectid, ref_key->offset);
5124         }
5125 out:
5126         btrfs_release_path(&path);
5127         return err;
5128 }
5129
5130 /*
5131  * Check all fs/file tree in low_memory mode.
5132  *
5133  * 1. for fs tree root item, call check_fs_root_v2()
5134  * 2. for fs tree root ref/backref, call check_root_ref()
5135  *
5136  * Return 0 if no error occurred.
5137  */
5138 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5139 {
5140         struct btrfs_root *tree_root = fs_info->tree_root;
5141         struct btrfs_root *cur_root = NULL;
5142         struct btrfs_path path;
5143         struct btrfs_key key;
5144         struct extent_buffer *node;
5145         unsigned int ext_ref;
5146         int slot;
5147         int ret;
5148         int err = 0;
5149
5150         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5151
5152         btrfs_init_path(&path);
5153         key.objectid = BTRFS_FS_TREE_OBJECTID;
5154         key.offset = 0;
5155         key.type = BTRFS_ROOT_ITEM_KEY;
5156
5157         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5158         if (ret < 0) {
5159                 err = ret;
5160                 goto out;
5161         } else if (ret > 0) {
5162                 err = -ENOENT;
5163                 goto out;
5164         }
5165
5166         while (1) {
5167                 node = path.nodes[0];
5168                 slot = path.slots[0];
5169                 btrfs_item_key_to_cpu(node, &key, slot);
5170                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5171                         goto out;
5172                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5173                     fs_root_objectid(key.objectid)) {
5174                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5175                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5176                                                                        &key);
5177                         } else {
5178                                 key.offset = (u64)-1;
5179                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5180                         }
5181
5182                         if (IS_ERR(cur_root)) {
5183                                 error("Fail to read fs/subvol tree: %lld",
5184                                       key.objectid);
5185                                 err = -EIO;
5186                                 goto next;
5187                         }
5188
5189                         ret = check_fs_root_v2(cur_root, ext_ref);
5190                         err |= ret;
5191
5192                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5193                                 btrfs_free_fs_root(cur_root);
5194                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5195                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5196                         ret = check_root_ref(tree_root, &key, node, slot);
5197                         err |= ret;
5198                 }
5199 next:
5200                 ret = btrfs_next_item(tree_root, &path);
5201                 if (ret > 0)
5202                         goto out;
5203                 if (ret < 0) {
5204                         err = ret;
5205                         goto out;
5206                 }
5207         }
5208
5209 out:
5210         btrfs_release_path(&path);
5211         return err;
5212 }
5213
5214 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5215 {
5216         struct list_head *cur = rec->backrefs.next;
5217         struct extent_backref *back;
5218         struct tree_backref *tback;
5219         struct data_backref *dback;
5220         u64 found = 0;
5221         int err = 0;
5222
5223         while(cur != &rec->backrefs) {
5224                 back = to_extent_backref(cur);
5225                 cur = cur->next;
5226                 if (!back->found_extent_tree) {
5227                         err = 1;
5228                         if (!print_errs)
5229                                 goto out;
5230                         if (back->is_data) {
5231                                 dback = to_data_backref(back);
5232                                 fprintf(stderr, "Backref %llu %s %llu"
5233                                         " owner %llu offset %llu num_refs %lu"
5234                                         " not found in extent tree\n",
5235                                         (unsigned long long)rec->start,
5236                                         back->full_backref ?
5237                                         "parent" : "root",
5238                                         back->full_backref ?
5239                                         (unsigned long long)dback->parent:
5240                                         (unsigned long long)dback->root,
5241                                         (unsigned long long)dback->owner,
5242                                         (unsigned long long)dback->offset,
5243                                         (unsigned long)dback->num_refs);
5244                         } else {
5245                                 tback = to_tree_backref(back);
5246                                 fprintf(stderr, "Backref %llu parent %llu"
5247                                         " root %llu not found in extent tree\n",
5248                                         (unsigned long long)rec->start,
5249                                         (unsigned long long)tback->parent,
5250                                         (unsigned long long)tback->root);
5251                         }
5252                 }
5253                 if (!back->is_data && !back->found_ref) {
5254                         err = 1;
5255                         if (!print_errs)
5256                                 goto out;
5257                         tback = to_tree_backref(back);
5258                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5259                                 (unsigned long long)rec->start,
5260                                 back->full_backref ? "parent" : "root",
5261                                 back->full_backref ?
5262                                 (unsigned long long)tback->parent :
5263                                 (unsigned long long)tback->root, back);
5264                 }
5265                 if (back->is_data) {
5266                         dback = to_data_backref(back);
5267                         if (dback->found_ref != dback->num_refs) {
5268                                 err = 1;
5269                                 if (!print_errs)
5270                                         goto out;
5271                                 fprintf(stderr, "Incorrect local backref count"
5272                                         " on %llu %s %llu owner %llu"
5273                                         " offset %llu found %u wanted %u back %p\n",
5274                                         (unsigned long long)rec->start,
5275                                         back->full_backref ?
5276                                         "parent" : "root",
5277                                         back->full_backref ?
5278                                         (unsigned long long)dback->parent:
5279                                         (unsigned long long)dback->root,
5280                                         (unsigned long long)dback->owner,
5281                                         (unsigned long long)dback->offset,
5282                                         dback->found_ref, dback->num_refs, back);
5283                         }
5284                         if (dback->disk_bytenr != rec->start) {
5285                                 err = 1;
5286                                 if (!print_errs)
5287                                         goto out;
5288                                 fprintf(stderr, "Backref disk bytenr does not"
5289                                         " match extent record, bytenr=%llu, "
5290                                         "ref bytenr=%llu\n",
5291                                         (unsigned long long)rec->start,
5292                                         (unsigned long long)dback->disk_bytenr);
5293                         }
5294
5295                         if (dback->bytes != rec->nr) {
5296                                 err = 1;
5297                                 if (!print_errs)
5298                                         goto out;
5299                                 fprintf(stderr, "Backref bytes do not match "
5300                                         "extent backref, bytenr=%llu, ref "
5301                                         "bytes=%llu, backref bytes=%llu\n",
5302                                         (unsigned long long)rec->start,
5303                                         (unsigned long long)rec->nr,
5304                                         (unsigned long long)dback->bytes);
5305                         }
5306                 }
5307                 if (!back->is_data) {
5308                         found += 1;
5309                 } else {
5310                         dback = to_data_backref(back);
5311                         found += dback->found_ref;
5312                 }
5313         }
5314         if (found != rec->refs) {
5315                 err = 1;
5316                 if (!print_errs)
5317                         goto out;
5318                 fprintf(stderr, "Incorrect global backref count "
5319                         "on %llu found %llu wanted %llu\n",
5320                         (unsigned long long)rec->start,
5321                         (unsigned long long)found,
5322                         (unsigned long long)rec->refs);
5323         }
5324 out:
5325         return err;
5326 }
5327
5328 static int free_all_extent_backrefs(struct extent_record *rec)
5329 {
5330         struct extent_backref *back;
5331         struct list_head *cur;
5332         while (!list_empty(&rec->backrefs)) {
5333                 cur = rec->backrefs.next;
5334                 back = to_extent_backref(cur);
5335                 list_del(cur);
5336                 free(back);
5337         }
5338         return 0;
5339 }
5340
5341 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5342                                      struct cache_tree *extent_cache)
5343 {
5344         struct cache_extent *cache;
5345         struct extent_record *rec;
5346
5347         while (1) {
5348                 cache = first_cache_extent(extent_cache);
5349                 if (!cache)
5350                         break;
5351                 rec = container_of(cache, struct extent_record, cache);
5352                 remove_cache_extent(extent_cache, cache);
5353                 free_all_extent_backrefs(rec);
5354                 free(rec);
5355         }
5356 }
5357
5358 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5359                                  struct extent_record *rec)
5360 {
5361         if (rec->content_checked && rec->owner_ref_checked &&
5362             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5363             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5364             !rec->bad_full_backref && !rec->crossing_stripes &&
5365             !rec->wrong_chunk_type) {
5366                 remove_cache_extent(extent_cache, &rec->cache);
5367                 free_all_extent_backrefs(rec);
5368                 list_del_init(&rec->list);
5369                 free(rec);
5370         }
5371         return 0;
5372 }
5373
5374 static int check_owner_ref(struct btrfs_root *root,
5375                             struct extent_record *rec,
5376                             struct extent_buffer *buf)
5377 {
5378         struct extent_backref *node;
5379         struct tree_backref *back;
5380         struct btrfs_root *ref_root;
5381         struct btrfs_key key;
5382         struct btrfs_path path;
5383         struct extent_buffer *parent;
5384         int level;
5385         int found = 0;
5386         int ret;
5387
5388         list_for_each_entry(node, &rec->backrefs, list) {
5389                 if (node->is_data)
5390                         continue;
5391                 if (!node->found_ref)
5392                         continue;
5393                 if (node->full_backref)
5394                         continue;
5395                 back = to_tree_backref(node);
5396                 if (btrfs_header_owner(buf) == back->root)
5397                         return 0;
5398         }
5399         BUG_ON(rec->is_root);
5400
5401         /* try to find the block by search corresponding fs tree */
5402         key.objectid = btrfs_header_owner(buf);
5403         key.type = BTRFS_ROOT_ITEM_KEY;
5404         key.offset = (u64)-1;
5405
5406         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5407         if (IS_ERR(ref_root))
5408                 return 1;
5409
5410         level = btrfs_header_level(buf);
5411         if (level == 0)
5412                 btrfs_item_key_to_cpu(buf, &key, 0);
5413         else
5414                 btrfs_node_key_to_cpu(buf, &key, 0);
5415
5416         btrfs_init_path(&path);
5417         path.lowest_level = level + 1;
5418         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5419         if (ret < 0)
5420                 return 0;
5421
5422         parent = path.nodes[level + 1];
5423         if (parent && buf->start == btrfs_node_blockptr(parent,
5424                                                         path.slots[level + 1]))
5425                 found = 1;
5426
5427         btrfs_release_path(&path);
5428         return found ? 0 : 1;
5429 }
5430
5431 static int is_extent_tree_record(struct extent_record *rec)
5432 {
5433         struct list_head *cur = rec->backrefs.next;
5434         struct extent_backref *node;
5435         struct tree_backref *back;
5436         int is_extent = 0;
5437
5438         while(cur != &rec->backrefs) {
5439                 node = to_extent_backref(cur);
5440                 cur = cur->next;
5441                 if (node->is_data)
5442                         return 0;
5443                 back = to_tree_backref(node);
5444                 if (node->full_backref)
5445                         return 0;
5446                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5447                         is_extent = 1;
5448         }
5449         return is_extent;
5450 }
5451
5452
5453 static int record_bad_block_io(struct btrfs_fs_info *info,
5454                                struct cache_tree *extent_cache,
5455                                u64 start, u64 len)
5456 {
5457         struct extent_record *rec;
5458         struct cache_extent *cache;
5459         struct btrfs_key key;
5460
5461         cache = lookup_cache_extent(extent_cache, start, len);
5462         if (!cache)
5463                 return 0;
5464
5465         rec = container_of(cache, struct extent_record, cache);
5466         if (!is_extent_tree_record(rec))
5467                 return 0;
5468
5469         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5470         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5471 }
5472
5473 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5474                        struct extent_buffer *buf, int slot)
5475 {
5476         if (btrfs_header_level(buf)) {
5477                 struct btrfs_key_ptr ptr1, ptr2;
5478
5479                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5480                                    sizeof(struct btrfs_key_ptr));
5481                 read_extent_buffer(buf, &ptr2,
5482                                    btrfs_node_key_ptr_offset(slot + 1),
5483                                    sizeof(struct btrfs_key_ptr));
5484                 write_extent_buffer(buf, &ptr1,
5485                                     btrfs_node_key_ptr_offset(slot + 1),
5486                                     sizeof(struct btrfs_key_ptr));
5487                 write_extent_buffer(buf, &ptr2,
5488                                     btrfs_node_key_ptr_offset(slot),
5489                                     sizeof(struct btrfs_key_ptr));
5490                 if (slot == 0) {
5491                         struct btrfs_disk_key key;
5492                         btrfs_node_key(buf, &key, 0);
5493                         btrfs_fixup_low_keys(root, path, &key,
5494                                              btrfs_header_level(buf) + 1);
5495                 }
5496         } else {
5497                 struct btrfs_item *item1, *item2;
5498                 struct btrfs_key k1, k2;
5499                 char *item1_data, *item2_data;
5500                 u32 item1_offset, item2_offset, item1_size, item2_size;
5501
5502                 item1 = btrfs_item_nr(slot);
5503                 item2 = btrfs_item_nr(slot + 1);
5504                 btrfs_item_key_to_cpu(buf, &k1, slot);
5505                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5506                 item1_offset = btrfs_item_offset(buf, item1);
5507                 item2_offset = btrfs_item_offset(buf, item2);
5508                 item1_size = btrfs_item_size(buf, item1);
5509                 item2_size = btrfs_item_size(buf, item2);
5510
5511                 item1_data = malloc(item1_size);
5512                 if (!item1_data)
5513                         return -ENOMEM;
5514                 item2_data = malloc(item2_size);
5515                 if (!item2_data) {
5516                         free(item1_data);
5517                         return -ENOMEM;
5518                 }
5519
5520                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5521                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5522
5523                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5524                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5525                 free(item1_data);
5526                 free(item2_data);
5527
5528                 btrfs_set_item_offset(buf, item1, item2_offset);
5529                 btrfs_set_item_offset(buf, item2, item1_offset);
5530                 btrfs_set_item_size(buf, item1, item2_size);
5531                 btrfs_set_item_size(buf, item2, item1_size);
5532
5533                 path->slots[0] = slot;
5534                 btrfs_set_item_key_unsafe(root, path, &k2);
5535                 path->slots[0] = slot + 1;
5536                 btrfs_set_item_key_unsafe(root, path, &k1);
5537         }
5538         return 0;
5539 }
5540
5541 static int fix_key_order(struct btrfs_trans_handle *trans,
5542                          struct btrfs_root *root,
5543                          struct btrfs_path *path)
5544 {
5545         struct extent_buffer *buf;
5546         struct btrfs_key k1, k2;
5547         int i;
5548         int level = path->lowest_level;
5549         int ret = -EIO;
5550
5551         buf = path->nodes[level];
5552         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5553                 if (level) {
5554                         btrfs_node_key_to_cpu(buf, &k1, i);
5555                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5556                 } else {
5557                         btrfs_item_key_to_cpu(buf, &k1, i);
5558                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5559                 }
5560                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5561                         continue;
5562                 ret = swap_values(root, path, buf, i);
5563                 if (ret)
5564                         break;
5565                 btrfs_mark_buffer_dirty(buf);
5566                 i = 0;
5567         }
5568         return ret;
5569 }
5570
5571 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5572                              struct btrfs_root *root,
5573                              struct btrfs_path *path,
5574                              struct extent_buffer *buf, int slot)
5575 {
5576         struct btrfs_key key;
5577         int nritems = btrfs_header_nritems(buf);
5578
5579         btrfs_item_key_to_cpu(buf, &key, slot);
5580
5581         /* These are all the keys we can deal with missing. */
5582         if (key.type != BTRFS_DIR_INDEX_KEY &&
5583             key.type != BTRFS_EXTENT_ITEM_KEY &&
5584             key.type != BTRFS_METADATA_ITEM_KEY &&
5585             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5586             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5587                 return -1;
5588
5589         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5590                (unsigned long long)key.objectid, key.type,
5591                (unsigned long long)key.offset, slot, buf->start);
5592         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5593                               btrfs_item_nr_offset(slot + 1),
5594                               sizeof(struct btrfs_item) *
5595                               (nritems - slot - 1));
5596         btrfs_set_header_nritems(buf, nritems - 1);
5597         if (slot == 0) {
5598                 struct btrfs_disk_key disk_key;
5599
5600                 btrfs_item_key(buf, &disk_key, 0);
5601                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5602         }
5603         btrfs_mark_buffer_dirty(buf);
5604         return 0;
5605 }
5606
5607 static int fix_item_offset(struct btrfs_trans_handle *trans,
5608                            struct btrfs_root *root,
5609                            struct btrfs_path *path)
5610 {
5611         struct extent_buffer *buf;
5612         int i;
5613         int ret = 0;
5614
5615         /* We should only get this for leaves */
5616         BUG_ON(path->lowest_level);
5617         buf = path->nodes[0];
5618 again:
5619         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5620                 unsigned int shift = 0, offset;
5621
5622                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5623                     BTRFS_LEAF_DATA_SIZE(root)) {
5624                         if (btrfs_item_end_nr(buf, i) >
5625                             BTRFS_LEAF_DATA_SIZE(root)) {
5626                                 ret = delete_bogus_item(trans, root, path,
5627                                                         buf, i);
5628                                 if (!ret)
5629                                         goto again;
5630                                 fprintf(stderr, "item is off the end of the "
5631                                         "leaf, can't fix\n");
5632                                 ret = -EIO;
5633                                 break;
5634                         }
5635                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5636                                 btrfs_item_end_nr(buf, i);
5637                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5638                            btrfs_item_offset_nr(buf, i - 1)) {
5639                         if (btrfs_item_end_nr(buf, i) >
5640                             btrfs_item_offset_nr(buf, i - 1)) {
5641                                 ret = delete_bogus_item(trans, root, path,
5642                                                         buf, i);
5643                                 if (!ret)
5644                                         goto again;
5645                                 fprintf(stderr, "items overlap, can't fix\n");
5646                                 ret = -EIO;
5647                                 break;
5648                         }
5649                         shift = btrfs_item_offset_nr(buf, i - 1) -
5650                                 btrfs_item_end_nr(buf, i);
5651                 }
5652                 if (!shift)
5653                         continue;
5654
5655                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5656                        i, shift, (unsigned long long)buf->start);
5657                 offset = btrfs_item_offset_nr(buf, i);
5658                 memmove_extent_buffer(buf,
5659                                       btrfs_leaf_data(buf) + offset + shift,
5660                                       btrfs_leaf_data(buf) + offset,
5661                                       btrfs_item_size_nr(buf, i));
5662                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5663                                       offset + shift);
5664                 btrfs_mark_buffer_dirty(buf);
5665         }
5666
5667         /*
5668          * We may have moved things, in which case we want to exit so we don't
5669          * write those changes out.  Once we have proper abort functionality in
5670          * progs this can be changed to something nicer.
5671          */
5672         BUG_ON(ret);
5673         return ret;
5674 }
5675
5676 /*
5677  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5678  * then just return -EIO.
5679  */
5680 static int try_to_fix_bad_block(struct btrfs_root *root,
5681                                 struct extent_buffer *buf,
5682                                 enum btrfs_tree_block_status status)
5683 {
5684         struct btrfs_trans_handle *trans;
5685         struct ulist *roots;
5686         struct ulist_node *node;
5687         struct btrfs_root *search_root;
5688         struct btrfs_path path;
5689         struct ulist_iterator iter;
5690         struct btrfs_key root_key, key;
5691         int ret;
5692
5693         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5694             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5695                 return -EIO;
5696
5697         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5698         if (ret)
5699                 return -EIO;
5700
5701         btrfs_init_path(&path);
5702         ULIST_ITER_INIT(&iter);
5703         while ((node = ulist_next(roots, &iter))) {
5704                 root_key.objectid = node->val;
5705                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5706                 root_key.offset = (u64)-1;
5707
5708                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5709                 if (IS_ERR(root)) {
5710                         ret = -EIO;
5711                         break;
5712                 }
5713
5714
5715                 trans = btrfs_start_transaction(search_root, 0);
5716                 if (IS_ERR(trans)) {
5717                         ret = PTR_ERR(trans);
5718                         break;
5719                 }
5720
5721                 path.lowest_level = btrfs_header_level(buf);
5722                 path.skip_check_block = 1;
5723                 if (path.lowest_level)
5724                         btrfs_node_key_to_cpu(buf, &key, 0);
5725                 else
5726                         btrfs_item_key_to_cpu(buf, &key, 0);
5727                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5728                 if (ret) {
5729                         ret = -EIO;
5730                         btrfs_commit_transaction(trans, search_root);
5731                         break;
5732                 }
5733                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5734                         ret = fix_key_order(trans, search_root, &path);
5735                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5736                         ret = fix_item_offset(trans, search_root, &path);
5737                 if (ret) {
5738                         btrfs_commit_transaction(trans, search_root);
5739                         break;
5740                 }
5741                 btrfs_release_path(&path);
5742                 btrfs_commit_transaction(trans, search_root);
5743         }
5744         ulist_free(roots);
5745         btrfs_release_path(&path);
5746         return ret;
5747 }
5748
5749 static int check_block(struct btrfs_root *root,
5750                        struct cache_tree *extent_cache,
5751                        struct extent_buffer *buf, u64 flags)
5752 {
5753         struct extent_record *rec;
5754         struct cache_extent *cache;
5755         struct btrfs_key key;
5756         enum btrfs_tree_block_status status;
5757         int ret = 0;
5758         int level;
5759
5760         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5761         if (!cache)
5762                 return 1;
5763         rec = container_of(cache, struct extent_record, cache);
5764         rec->generation = btrfs_header_generation(buf);
5765
5766         level = btrfs_header_level(buf);
5767         if (btrfs_header_nritems(buf) > 0) {
5768
5769                 if (level == 0)
5770                         btrfs_item_key_to_cpu(buf, &key, 0);
5771                 else
5772                         btrfs_node_key_to_cpu(buf, &key, 0);
5773
5774                 rec->info_objectid = key.objectid;
5775         }
5776         rec->info_level = level;
5777
5778         if (btrfs_is_leaf(buf))
5779                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5780         else
5781                 status = btrfs_check_node(root, &rec->parent_key, buf);
5782
5783         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5784                 if (repair)
5785                         status = try_to_fix_bad_block(root, buf, status);
5786                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5787                         ret = -EIO;
5788                         fprintf(stderr, "bad block %llu\n",
5789                                 (unsigned long long)buf->start);
5790                 } else {
5791                         /*
5792                          * Signal to callers we need to start the scan over
5793                          * again since we'll have cowed blocks.
5794                          */
5795                         ret = -EAGAIN;
5796                 }
5797         } else {
5798                 rec->content_checked = 1;
5799                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5800                         rec->owner_ref_checked = 1;
5801                 else {
5802                         ret = check_owner_ref(root, rec, buf);
5803                         if (!ret)
5804                                 rec->owner_ref_checked = 1;
5805                 }
5806         }
5807         if (!ret)
5808                 maybe_free_extent_rec(extent_cache, rec);
5809         return ret;
5810 }
5811
5812 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5813                                                 u64 parent, u64 root)
5814 {
5815         struct list_head *cur = rec->backrefs.next;
5816         struct extent_backref *node;
5817         struct tree_backref *back;
5818
5819         while(cur != &rec->backrefs) {
5820                 node = to_extent_backref(cur);
5821                 cur = cur->next;
5822                 if (node->is_data)
5823                         continue;
5824                 back = to_tree_backref(node);
5825                 if (parent > 0) {
5826                         if (!node->full_backref)
5827                                 continue;
5828                         if (parent == back->parent)
5829                                 return back;
5830                 } else {
5831                         if (node->full_backref)
5832                                 continue;
5833                         if (back->root == root)
5834                                 return back;
5835                 }
5836         }
5837         return NULL;
5838 }
5839
5840 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5841                                                 u64 parent, u64 root)
5842 {
5843         struct tree_backref *ref = malloc(sizeof(*ref));
5844
5845         if (!ref)
5846                 return NULL;
5847         memset(&ref->node, 0, sizeof(ref->node));
5848         if (parent > 0) {
5849                 ref->parent = parent;
5850                 ref->node.full_backref = 1;
5851         } else {
5852                 ref->root = root;
5853                 ref->node.full_backref = 0;
5854         }
5855         list_add_tail(&ref->node.list, &rec->backrefs);
5856
5857         return ref;
5858 }
5859
5860 static struct data_backref *find_data_backref(struct extent_record *rec,
5861                                                 u64 parent, u64 root,
5862                                                 u64 owner, u64 offset,
5863                                                 int found_ref,
5864                                                 u64 disk_bytenr, u64 bytes)
5865 {
5866         struct list_head *cur = rec->backrefs.next;
5867         struct extent_backref *node;
5868         struct data_backref *back;
5869
5870         while(cur != &rec->backrefs) {
5871                 node = to_extent_backref(cur);
5872                 cur = cur->next;
5873                 if (!node->is_data)
5874                         continue;
5875                 back = to_data_backref(node);
5876                 if (parent > 0) {
5877                         if (!node->full_backref)
5878                                 continue;
5879                         if (parent == back->parent)
5880                                 return back;
5881                 } else {
5882                         if (node->full_backref)
5883                                 continue;
5884                         if (back->root == root && back->owner == owner &&
5885                             back->offset == offset) {
5886                                 if (found_ref && node->found_ref &&
5887                                     (back->bytes != bytes ||
5888                                     back->disk_bytenr != disk_bytenr))
5889                                         continue;
5890                                 return back;
5891                         }
5892                 }
5893         }
5894         return NULL;
5895 }
5896
5897 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5898                                                 u64 parent, u64 root,
5899                                                 u64 owner, u64 offset,
5900                                                 u64 max_size)
5901 {
5902         struct data_backref *ref = malloc(sizeof(*ref));
5903
5904         if (!ref)
5905                 return NULL;
5906         memset(&ref->node, 0, sizeof(ref->node));
5907         ref->node.is_data = 1;
5908
5909         if (parent > 0) {
5910                 ref->parent = parent;
5911                 ref->owner = 0;
5912                 ref->offset = 0;
5913                 ref->node.full_backref = 1;
5914         } else {
5915                 ref->root = root;
5916                 ref->owner = owner;
5917                 ref->offset = offset;
5918                 ref->node.full_backref = 0;
5919         }
5920         ref->bytes = max_size;
5921         ref->found_ref = 0;
5922         ref->num_refs = 0;
5923         list_add_tail(&ref->node.list, &rec->backrefs);
5924         if (max_size > rec->max_size)
5925                 rec->max_size = max_size;
5926         return ref;
5927 }
5928
5929 /* Check if the type of extent matches with its chunk */
5930 static void check_extent_type(struct extent_record *rec)
5931 {
5932         struct btrfs_block_group_cache *bg_cache;
5933
5934         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5935         if (!bg_cache)
5936                 return;
5937
5938         /* data extent, check chunk directly*/
5939         if (!rec->metadata) {
5940                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5941                         rec->wrong_chunk_type = 1;
5942                 return;
5943         }
5944
5945         /* metadata extent, check the obvious case first */
5946         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5947                                  BTRFS_BLOCK_GROUP_METADATA))) {
5948                 rec->wrong_chunk_type = 1;
5949                 return;
5950         }
5951
5952         /*
5953          * Check SYSTEM extent, as it's also marked as metadata, we can only
5954          * make sure it's a SYSTEM extent by its backref
5955          */
5956         if (!list_empty(&rec->backrefs)) {
5957                 struct extent_backref *node;
5958                 struct tree_backref *tback;
5959                 u64 bg_type;
5960
5961                 node = to_extent_backref(rec->backrefs.next);
5962                 if (node->is_data) {
5963                         /* tree block shouldn't have data backref */
5964                         rec->wrong_chunk_type = 1;
5965                         return;
5966                 }
5967                 tback = container_of(node, struct tree_backref, node);
5968
5969                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5970                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5971                 else
5972                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5973                 if (!(bg_cache->flags & bg_type))
5974                         rec->wrong_chunk_type = 1;
5975         }
5976 }
5977
5978 /*
5979  * Allocate a new extent record, fill default values from @tmpl and insert int
5980  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5981  * the cache, otherwise it fails.
5982  */
5983 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5984                 struct extent_record *tmpl)
5985 {
5986         struct extent_record *rec;
5987         int ret = 0;
5988
5989         rec = malloc(sizeof(*rec));
5990         if (!rec)
5991                 return -ENOMEM;
5992         rec->start = tmpl->start;
5993         rec->max_size = tmpl->max_size;
5994         rec->nr = max(tmpl->nr, tmpl->max_size);
5995         rec->found_rec = tmpl->found_rec;
5996         rec->content_checked = tmpl->content_checked;
5997         rec->owner_ref_checked = tmpl->owner_ref_checked;
5998         rec->num_duplicates = 0;
5999         rec->metadata = tmpl->metadata;
6000         rec->flag_block_full_backref = FLAG_UNSET;
6001         rec->bad_full_backref = 0;
6002         rec->crossing_stripes = 0;
6003         rec->wrong_chunk_type = 0;
6004         rec->is_root = tmpl->is_root;
6005         rec->refs = tmpl->refs;
6006         rec->extent_item_refs = tmpl->extent_item_refs;
6007         rec->parent_generation = tmpl->parent_generation;
6008         INIT_LIST_HEAD(&rec->backrefs);
6009         INIT_LIST_HEAD(&rec->dups);
6010         INIT_LIST_HEAD(&rec->list);
6011         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6012         rec->cache.start = tmpl->start;
6013         rec->cache.size = tmpl->nr;
6014         ret = insert_cache_extent(extent_cache, &rec->cache);
6015         if (ret) {
6016                 free(rec);
6017                 return ret;
6018         }
6019         bytes_used += rec->nr;
6020
6021         if (tmpl->metadata)
6022                 rec->crossing_stripes = check_crossing_stripes(global_info,
6023                                 rec->start, global_info->tree_root->nodesize);
6024         check_extent_type(rec);
6025         return ret;
6026 }
6027
6028 /*
6029  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6030  * some are hints:
6031  * - refs              - if found, increase refs
6032  * - is_root           - if found, set
6033  * - content_checked   - if found, set
6034  * - owner_ref_checked - if found, set
6035  *
6036  * If not found, create a new one, initialize and insert.
6037  */
6038 static int add_extent_rec(struct cache_tree *extent_cache,
6039                 struct extent_record *tmpl)
6040 {
6041         struct extent_record *rec;
6042         struct cache_extent *cache;
6043         int ret = 0;
6044         int dup = 0;
6045
6046         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6047         if (cache) {
6048                 rec = container_of(cache, struct extent_record, cache);
6049                 if (tmpl->refs)
6050                         rec->refs++;
6051                 if (rec->nr == 1)
6052                         rec->nr = max(tmpl->nr, tmpl->max_size);
6053
6054                 /*
6055                  * We need to make sure to reset nr to whatever the extent
6056                  * record says was the real size, this way we can compare it to
6057                  * the backrefs.
6058                  */
6059                 if (tmpl->found_rec) {
6060                         if (tmpl->start != rec->start || rec->found_rec) {
6061                                 struct extent_record *tmp;
6062
6063                                 dup = 1;
6064                                 if (list_empty(&rec->list))
6065                                         list_add_tail(&rec->list,
6066                                                       &duplicate_extents);
6067
6068                                 /*
6069                                  * We have to do this song and dance in case we
6070                                  * find an extent record that falls inside of
6071                                  * our current extent record but does not have
6072                                  * the same objectid.
6073                                  */
6074                                 tmp = malloc(sizeof(*tmp));
6075                                 if (!tmp)
6076                                         return -ENOMEM;
6077                                 tmp->start = tmpl->start;
6078                                 tmp->max_size = tmpl->max_size;
6079                                 tmp->nr = tmpl->nr;
6080                                 tmp->found_rec = 1;
6081                                 tmp->metadata = tmpl->metadata;
6082                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6083                                 INIT_LIST_HEAD(&tmp->list);
6084                                 list_add_tail(&tmp->list, &rec->dups);
6085                                 rec->num_duplicates++;
6086                         } else {
6087                                 rec->nr = tmpl->nr;
6088                                 rec->found_rec = 1;
6089                         }
6090                 }
6091
6092                 if (tmpl->extent_item_refs && !dup) {
6093                         if (rec->extent_item_refs) {
6094                                 fprintf(stderr, "block %llu rec "
6095                                         "extent_item_refs %llu, passed %llu\n",
6096                                         (unsigned long long)tmpl->start,
6097                                         (unsigned long long)
6098                                                         rec->extent_item_refs,
6099                                         (unsigned long long)tmpl->extent_item_refs);
6100                         }
6101                         rec->extent_item_refs = tmpl->extent_item_refs;
6102                 }
6103                 if (tmpl->is_root)
6104                         rec->is_root = 1;
6105                 if (tmpl->content_checked)
6106                         rec->content_checked = 1;
6107                 if (tmpl->owner_ref_checked)
6108                         rec->owner_ref_checked = 1;
6109                 memcpy(&rec->parent_key, &tmpl->parent_key,
6110                                 sizeof(tmpl->parent_key));
6111                 if (tmpl->parent_generation)
6112                         rec->parent_generation = tmpl->parent_generation;
6113                 if (rec->max_size < tmpl->max_size)
6114                         rec->max_size = tmpl->max_size;
6115
6116                 /*
6117                  * A metadata extent can't cross stripe_len boundary, otherwise
6118                  * kernel scrub won't be able to handle it.
6119                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6120                  * it.
6121                  */
6122                 if (tmpl->metadata)
6123                         rec->crossing_stripes = check_crossing_stripes(
6124                                         global_info, rec->start,
6125                                         global_info->tree_root->nodesize);
6126                 check_extent_type(rec);
6127                 maybe_free_extent_rec(extent_cache, rec);
6128                 return ret;
6129         }
6130
6131         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6132
6133         return ret;
6134 }
6135
6136 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6137                             u64 parent, u64 root, int found_ref)
6138 {
6139         struct extent_record *rec;
6140         struct tree_backref *back;
6141         struct cache_extent *cache;
6142         int ret;
6143
6144         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6145         if (!cache) {
6146                 struct extent_record tmpl;
6147
6148                 memset(&tmpl, 0, sizeof(tmpl));
6149                 tmpl.start = bytenr;
6150                 tmpl.nr = 1;
6151                 tmpl.metadata = 1;
6152
6153                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6154                 if (ret)
6155                         return ret;
6156
6157                 /* really a bug in cache_extent implement now */
6158                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6159                 if (!cache)
6160                         return -ENOENT;
6161         }
6162
6163         rec = container_of(cache, struct extent_record, cache);
6164         if (rec->start != bytenr) {
6165                 /*
6166                  * Several cause, from unaligned bytenr to over lapping extents
6167                  */
6168                 return -EEXIST;
6169         }
6170
6171         back = find_tree_backref(rec, parent, root);
6172         if (!back) {
6173                 back = alloc_tree_backref(rec, parent, root);
6174                 if (!back)
6175                         return -ENOMEM;
6176         }
6177
6178         if (found_ref) {
6179                 if (back->node.found_ref) {
6180                         fprintf(stderr, "Extent back ref already exists "
6181                                 "for %llu parent %llu root %llu \n",
6182                                 (unsigned long long)bytenr,
6183                                 (unsigned long long)parent,
6184                                 (unsigned long long)root);
6185                 }
6186                 back->node.found_ref = 1;
6187         } else {
6188                 if (back->node.found_extent_tree) {
6189                         fprintf(stderr, "Extent back ref already exists "
6190                                 "for %llu parent %llu root %llu \n",
6191                                 (unsigned long long)bytenr,
6192                                 (unsigned long long)parent,
6193                                 (unsigned long long)root);
6194                 }
6195                 back->node.found_extent_tree = 1;
6196         }
6197         check_extent_type(rec);
6198         maybe_free_extent_rec(extent_cache, rec);
6199         return 0;
6200 }
6201
6202 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6203                             u64 parent, u64 root, u64 owner, u64 offset,
6204                             u32 num_refs, int found_ref, u64 max_size)
6205 {
6206         struct extent_record *rec;
6207         struct data_backref *back;
6208         struct cache_extent *cache;
6209         int ret;
6210
6211         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6212         if (!cache) {
6213                 struct extent_record tmpl;
6214
6215                 memset(&tmpl, 0, sizeof(tmpl));
6216                 tmpl.start = bytenr;
6217                 tmpl.nr = 1;
6218                 tmpl.max_size = max_size;
6219
6220                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6221                 if (ret)
6222                         return ret;
6223
6224                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6225                 if (!cache)
6226                         abort();
6227         }
6228
6229         rec = container_of(cache, struct extent_record, cache);
6230         if (rec->max_size < max_size)
6231                 rec->max_size = max_size;
6232
6233         /*
6234          * If found_ref is set then max_size is the real size and must match the
6235          * existing refs.  So if we have already found a ref then we need to
6236          * make sure that this ref matches the existing one, otherwise we need
6237          * to add a new backref so we can notice that the backrefs don't match
6238          * and we need to figure out who is telling the truth.  This is to
6239          * account for that awful fsync bug I introduced where we'd end up with
6240          * a btrfs_file_extent_item that would have its length include multiple
6241          * prealloc extents or point inside of a prealloc extent.
6242          */
6243         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6244                                  bytenr, max_size);
6245         if (!back) {
6246                 back = alloc_data_backref(rec, parent, root, owner, offset,
6247                                           max_size);
6248                 BUG_ON(!back);
6249         }
6250
6251         if (found_ref) {
6252                 BUG_ON(num_refs != 1);
6253                 if (back->node.found_ref)
6254                         BUG_ON(back->bytes != max_size);
6255                 back->node.found_ref = 1;
6256                 back->found_ref += 1;
6257                 back->bytes = max_size;
6258                 back->disk_bytenr = bytenr;
6259                 rec->refs += 1;
6260                 rec->content_checked = 1;
6261                 rec->owner_ref_checked = 1;
6262         } else {
6263                 if (back->node.found_extent_tree) {
6264                         fprintf(stderr, "Extent back ref already exists "
6265                                 "for %llu parent %llu root %llu "
6266                                 "owner %llu offset %llu num_refs %lu\n",
6267                                 (unsigned long long)bytenr,
6268                                 (unsigned long long)parent,
6269                                 (unsigned long long)root,
6270                                 (unsigned long long)owner,
6271                                 (unsigned long long)offset,
6272                                 (unsigned long)num_refs);
6273                 }
6274                 back->num_refs = num_refs;
6275                 back->node.found_extent_tree = 1;
6276         }
6277         maybe_free_extent_rec(extent_cache, rec);
6278         return 0;
6279 }
6280
6281 static int add_pending(struct cache_tree *pending,
6282                        struct cache_tree *seen, u64 bytenr, u32 size)
6283 {
6284         int ret;
6285         ret = add_cache_extent(seen, bytenr, size);
6286         if (ret)
6287                 return ret;
6288         add_cache_extent(pending, bytenr, size);
6289         return 0;
6290 }
6291
6292 static int pick_next_pending(struct cache_tree *pending,
6293                         struct cache_tree *reada,
6294                         struct cache_tree *nodes,
6295                         u64 last, struct block_info *bits, int bits_nr,
6296                         int *reada_bits)
6297 {
6298         unsigned long node_start = last;
6299         struct cache_extent *cache;
6300         int ret;
6301
6302         cache = search_cache_extent(reada, 0);
6303         if (cache) {
6304                 bits[0].start = cache->start;
6305                 bits[0].size = cache->size;
6306                 *reada_bits = 1;
6307                 return 1;
6308         }
6309         *reada_bits = 0;
6310         if (node_start > 32768)
6311                 node_start -= 32768;
6312
6313         cache = search_cache_extent(nodes, node_start);
6314         if (!cache)
6315                 cache = search_cache_extent(nodes, 0);
6316
6317         if (!cache) {
6318                  cache = search_cache_extent(pending, 0);
6319                  if (!cache)
6320                          return 0;
6321                  ret = 0;
6322                  do {
6323                          bits[ret].start = cache->start;
6324                          bits[ret].size = cache->size;
6325                          cache = next_cache_extent(cache);
6326                          ret++;
6327                  } while (cache && ret < bits_nr);
6328                  return ret;
6329         }
6330
6331         ret = 0;
6332         do {
6333                 bits[ret].start = cache->start;
6334                 bits[ret].size = cache->size;
6335                 cache = next_cache_extent(cache);
6336                 ret++;
6337         } while (cache && ret < bits_nr);
6338
6339         if (bits_nr - ret > 8) {
6340                 u64 lookup = bits[0].start + bits[0].size;
6341                 struct cache_extent *next;
6342                 next = search_cache_extent(pending, lookup);
6343                 while(next) {
6344                         if (next->start - lookup > 32768)
6345                                 break;
6346                         bits[ret].start = next->start;
6347                         bits[ret].size = next->size;
6348                         lookup = next->start + next->size;
6349                         ret++;
6350                         if (ret == bits_nr)
6351                                 break;
6352                         next = next_cache_extent(next);
6353                         if (!next)
6354                                 break;
6355                 }
6356         }
6357         return ret;
6358 }
6359
6360 static void free_chunk_record(struct cache_extent *cache)
6361 {
6362         struct chunk_record *rec;
6363
6364         rec = container_of(cache, struct chunk_record, cache);
6365         list_del_init(&rec->list);
6366         list_del_init(&rec->dextents);
6367         free(rec);
6368 }
6369
6370 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6371 {
6372         cache_tree_free_extents(chunk_cache, free_chunk_record);
6373 }
6374
6375 static void free_device_record(struct rb_node *node)
6376 {
6377         struct device_record *rec;
6378
6379         rec = container_of(node, struct device_record, node);
6380         free(rec);
6381 }
6382
6383 FREE_RB_BASED_TREE(device_cache, free_device_record);
6384
6385 int insert_block_group_record(struct block_group_tree *tree,
6386                               struct block_group_record *bg_rec)
6387 {
6388         int ret;
6389
6390         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6391         if (ret)
6392                 return ret;
6393
6394         list_add_tail(&bg_rec->list, &tree->block_groups);
6395         return 0;
6396 }
6397
6398 static void free_block_group_record(struct cache_extent *cache)
6399 {
6400         struct block_group_record *rec;
6401
6402         rec = container_of(cache, struct block_group_record, cache);
6403         list_del_init(&rec->list);
6404         free(rec);
6405 }
6406
6407 void free_block_group_tree(struct block_group_tree *tree)
6408 {
6409         cache_tree_free_extents(&tree->tree, free_block_group_record);
6410 }
6411
6412 int insert_device_extent_record(struct device_extent_tree *tree,
6413                                 struct device_extent_record *de_rec)
6414 {
6415         int ret;
6416
6417         /*
6418          * Device extent is a bit different from the other extents, because
6419          * the extents which belong to the different devices may have the
6420          * same start and size, so we need use the special extent cache
6421          * search/insert functions.
6422          */
6423         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6424         if (ret)
6425                 return ret;
6426
6427         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6428         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6429         return 0;
6430 }
6431
6432 static void free_device_extent_record(struct cache_extent *cache)
6433 {
6434         struct device_extent_record *rec;
6435
6436         rec = container_of(cache, struct device_extent_record, cache);
6437         if (!list_empty(&rec->chunk_list))
6438                 list_del_init(&rec->chunk_list);
6439         if (!list_empty(&rec->device_list))
6440                 list_del_init(&rec->device_list);
6441         free(rec);
6442 }
6443
6444 void free_device_extent_tree(struct device_extent_tree *tree)
6445 {
6446         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6447 }
6448
6449 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6450 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6451                                  struct extent_buffer *leaf, int slot)
6452 {
6453         struct btrfs_extent_ref_v0 *ref0;
6454         struct btrfs_key key;
6455         int ret;
6456
6457         btrfs_item_key_to_cpu(leaf, &key, slot);
6458         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6459         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6460                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6461                                 0, 0);
6462         } else {
6463                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6464                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6465         }
6466         return ret;
6467 }
6468 #endif
6469
6470 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6471                                             struct btrfs_key *key,
6472                                             int slot)
6473 {
6474         struct btrfs_chunk *ptr;
6475         struct chunk_record *rec;
6476         int num_stripes, i;
6477
6478         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6479         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6480
6481         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6482         if (!rec) {
6483                 fprintf(stderr, "memory allocation failed\n");
6484                 exit(-1);
6485         }
6486
6487         INIT_LIST_HEAD(&rec->list);
6488         INIT_LIST_HEAD(&rec->dextents);
6489         rec->bg_rec = NULL;
6490
6491         rec->cache.start = key->offset;
6492         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6493
6494         rec->generation = btrfs_header_generation(leaf);
6495
6496         rec->objectid = key->objectid;
6497         rec->type = key->type;
6498         rec->offset = key->offset;
6499
6500         rec->length = rec->cache.size;
6501         rec->owner = btrfs_chunk_owner(leaf, ptr);
6502         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6503         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6504         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6505         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6506         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6507         rec->num_stripes = num_stripes;
6508         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6509
6510         for (i = 0; i < rec->num_stripes; ++i) {
6511                 rec->stripes[i].devid =
6512                         btrfs_stripe_devid_nr(leaf, ptr, i);
6513                 rec->stripes[i].offset =
6514                         btrfs_stripe_offset_nr(leaf, ptr, i);
6515                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6516                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6517                                 BTRFS_UUID_SIZE);
6518         }
6519
6520         return rec;
6521 }
6522
6523 static int process_chunk_item(struct cache_tree *chunk_cache,
6524                               struct btrfs_key *key, struct extent_buffer *eb,
6525                               int slot)
6526 {
6527         struct chunk_record *rec;
6528         struct btrfs_chunk *chunk;
6529         int ret = 0;
6530
6531         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6532         /*
6533          * Do extra check for this chunk item,
6534          *
6535          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6536          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6537          * and owner<->key_type check.
6538          */
6539         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6540                                       key->offset);
6541         if (ret < 0) {
6542                 error("chunk(%llu, %llu) is not valid, ignore it",
6543                       key->offset, btrfs_chunk_length(eb, chunk));
6544                 return 0;
6545         }
6546         rec = btrfs_new_chunk_record(eb, key, slot);
6547         ret = insert_cache_extent(chunk_cache, &rec->cache);
6548         if (ret) {
6549                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6550                         rec->offset, rec->length);
6551                 free(rec);
6552         }
6553
6554         return ret;
6555 }
6556
6557 static int process_device_item(struct rb_root *dev_cache,
6558                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6559 {
6560         struct btrfs_dev_item *ptr;
6561         struct device_record *rec;
6562         int ret = 0;
6563
6564         ptr = btrfs_item_ptr(eb,
6565                 slot, struct btrfs_dev_item);
6566
6567         rec = malloc(sizeof(*rec));
6568         if (!rec) {
6569                 fprintf(stderr, "memory allocation failed\n");
6570                 return -ENOMEM;
6571         }
6572
6573         rec->devid = key->offset;
6574         rec->generation = btrfs_header_generation(eb);
6575
6576         rec->objectid = key->objectid;
6577         rec->type = key->type;
6578         rec->offset = key->offset;
6579
6580         rec->devid = btrfs_device_id(eb, ptr);
6581         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6582         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6583
6584         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6585         if (ret) {
6586                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6587                 free(rec);
6588         }
6589
6590         return ret;
6591 }
6592
6593 struct block_group_record *
6594 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6595                              int slot)
6596 {
6597         struct btrfs_block_group_item *ptr;
6598         struct block_group_record *rec;
6599
6600         rec = calloc(1, sizeof(*rec));
6601         if (!rec) {
6602                 fprintf(stderr, "memory allocation failed\n");
6603                 exit(-1);
6604         }
6605
6606         rec->cache.start = key->objectid;
6607         rec->cache.size = key->offset;
6608
6609         rec->generation = btrfs_header_generation(leaf);
6610
6611         rec->objectid = key->objectid;
6612         rec->type = key->type;
6613         rec->offset = key->offset;
6614
6615         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6616         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6617
6618         INIT_LIST_HEAD(&rec->list);
6619
6620         return rec;
6621 }
6622
6623 static int process_block_group_item(struct block_group_tree *block_group_cache,
6624                                     struct btrfs_key *key,
6625                                     struct extent_buffer *eb, int slot)
6626 {
6627         struct block_group_record *rec;
6628         int ret = 0;
6629
6630         rec = btrfs_new_block_group_record(eb, key, slot);
6631         ret = insert_block_group_record(block_group_cache, rec);
6632         if (ret) {
6633                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6634                         rec->objectid, rec->offset);
6635                 free(rec);
6636         }
6637
6638         return ret;
6639 }
6640
6641 struct device_extent_record *
6642 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6643                                struct btrfs_key *key, int slot)
6644 {
6645         struct device_extent_record *rec;
6646         struct btrfs_dev_extent *ptr;
6647
6648         rec = calloc(1, sizeof(*rec));
6649         if (!rec) {
6650                 fprintf(stderr, "memory allocation failed\n");
6651                 exit(-1);
6652         }
6653
6654         rec->cache.objectid = key->objectid;
6655         rec->cache.start = key->offset;
6656
6657         rec->generation = btrfs_header_generation(leaf);
6658
6659         rec->objectid = key->objectid;
6660         rec->type = key->type;
6661         rec->offset = key->offset;
6662
6663         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6664         rec->chunk_objecteid =
6665                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6666         rec->chunk_offset =
6667                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6668         rec->length = btrfs_dev_extent_length(leaf, ptr);
6669         rec->cache.size = rec->length;
6670
6671         INIT_LIST_HEAD(&rec->chunk_list);
6672         INIT_LIST_HEAD(&rec->device_list);
6673
6674         return rec;
6675 }
6676
6677 static int
6678 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6679                            struct btrfs_key *key, struct extent_buffer *eb,
6680                            int slot)
6681 {
6682         struct device_extent_record *rec;
6683         int ret;
6684
6685         rec = btrfs_new_device_extent_record(eb, key, slot);
6686         ret = insert_device_extent_record(dev_extent_cache, rec);
6687         if (ret) {
6688                 fprintf(stderr,
6689                         "Device extent[%llu, %llu, %llu] existed.\n",
6690                         rec->objectid, rec->offset, rec->length);
6691                 free(rec);
6692         }
6693
6694         return ret;
6695 }
6696
6697 static int process_extent_item(struct btrfs_root *root,
6698                                struct cache_tree *extent_cache,
6699                                struct extent_buffer *eb, int slot)
6700 {
6701         struct btrfs_extent_item *ei;
6702         struct btrfs_extent_inline_ref *iref;
6703         struct btrfs_extent_data_ref *dref;
6704         struct btrfs_shared_data_ref *sref;
6705         struct btrfs_key key;
6706         struct extent_record tmpl;
6707         unsigned long end;
6708         unsigned long ptr;
6709         int ret;
6710         int type;
6711         u32 item_size = btrfs_item_size_nr(eb, slot);
6712         u64 refs = 0;
6713         u64 offset;
6714         u64 num_bytes;
6715         int metadata = 0;
6716
6717         btrfs_item_key_to_cpu(eb, &key, slot);
6718
6719         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6720                 metadata = 1;
6721                 num_bytes = root->nodesize;
6722         } else {
6723                 num_bytes = key.offset;
6724         }
6725
6726         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6727                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6728                       key.objectid, root->sectorsize);
6729                 return -EIO;
6730         }
6731         if (item_size < sizeof(*ei)) {
6732 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6733                 struct btrfs_extent_item_v0 *ei0;
6734                 BUG_ON(item_size != sizeof(*ei0));
6735                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6736                 refs = btrfs_extent_refs_v0(eb, ei0);
6737 #else
6738                 BUG();
6739 #endif
6740                 memset(&tmpl, 0, sizeof(tmpl));
6741                 tmpl.start = key.objectid;
6742                 tmpl.nr = num_bytes;
6743                 tmpl.extent_item_refs = refs;
6744                 tmpl.metadata = metadata;
6745                 tmpl.found_rec = 1;
6746                 tmpl.max_size = num_bytes;
6747
6748                 return add_extent_rec(extent_cache, &tmpl);
6749         }
6750
6751         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6752         refs = btrfs_extent_refs(eb, ei);
6753         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6754                 metadata = 1;
6755         else
6756                 metadata = 0;
6757         if (metadata && num_bytes != root->nodesize) {
6758                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6759                       num_bytes, root->nodesize);
6760                 return -EIO;
6761         }
6762         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6763                 error("ignore invalid data extent, length %llu is not aligned to %u",
6764                       num_bytes, root->sectorsize);
6765                 return -EIO;
6766         }
6767
6768         memset(&tmpl, 0, sizeof(tmpl));
6769         tmpl.start = key.objectid;
6770         tmpl.nr = num_bytes;
6771         tmpl.extent_item_refs = refs;
6772         tmpl.metadata = metadata;
6773         tmpl.found_rec = 1;
6774         tmpl.max_size = num_bytes;
6775         add_extent_rec(extent_cache, &tmpl);
6776
6777         ptr = (unsigned long)(ei + 1);
6778         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6779             key.type == BTRFS_EXTENT_ITEM_KEY)
6780                 ptr += sizeof(struct btrfs_tree_block_info);
6781
6782         end = (unsigned long)ei + item_size;
6783         while (ptr < end) {
6784                 iref = (struct btrfs_extent_inline_ref *)ptr;
6785                 type = btrfs_extent_inline_ref_type(eb, iref);
6786                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6787                 switch (type) {
6788                 case BTRFS_TREE_BLOCK_REF_KEY:
6789                         ret = add_tree_backref(extent_cache, key.objectid,
6790                                         0, offset, 0);
6791                         if (ret < 0)
6792                                 error("add_tree_backref failed: %s",
6793                                       strerror(-ret));
6794                         break;
6795                 case BTRFS_SHARED_BLOCK_REF_KEY:
6796                         ret = add_tree_backref(extent_cache, key.objectid,
6797                                         offset, 0, 0);
6798                         if (ret < 0)
6799                                 error("add_tree_backref failed: %s",
6800                                       strerror(-ret));
6801                         break;
6802                 case BTRFS_EXTENT_DATA_REF_KEY:
6803                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6804                         add_data_backref(extent_cache, key.objectid, 0,
6805                                         btrfs_extent_data_ref_root(eb, dref),
6806                                         btrfs_extent_data_ref_objectid(eb,
6807                                                                        dref),
6808                                         btrfs_extent_data_ref_offset(eb, dref),
6809                                         btrfs_extent_data_ref_count(eb, dref),
6810                                         0, num_bytes);
6811                         break;
6812                 case BTRFS_SHARED_DATA_REF_KEY:
6813                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6814                         add_data_backref(extent_cache, key.objectid, offset,
6815                                         0, 0, 0,
6816                                         btrfs_shared_data_ref_count(eb, sref),
6817                                         0, num_bytes);
6818                         break;
6819                 default:
6820                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6821                                 key.objectid, key.type, num_bytes);
6822                         goto out;
6823                 }
6824                 ptr += btrfs_extent_inline_ref_size(type);
6825         }
6826         WARN_ON(ptr > end);
6827 out:
6828         return 0;
6829 }
6830
6831 static int check_cache_range(struct btrfs_root *root,
6832                              struct btrfs_block_group_cache *cache,
6833                              u64 offset, u64 bytes)
6834 {
6835         struct btrfs_free_space *entry;
6836         u64 *logical;
6837         u64 bytenr;
6838         int stripe_len;
6839         int i, nr, ret;
6840
6841         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6842                 bytenr = btrfs_sb_offset(i);
6843                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6844                                        cache->key.objectid, bytenr, 0,
6845                                        &logical, &nr, &stripe_len);
6846                 if (ret)
6847                         return ret;
6848
6849                 while (nr--) {
6850                         if (logical[nr] + stripe_len <= offset)
6851                                 continue;
6852                         if (offset + bytes <= logical[nr])
6853                                 continue;
6854                         if (logical[nr] == offset) {
6855                                 if (stripe_len >= bytes) {
6856                                         free(logical);
6857                                         return 0;
6858                                 }
6859                                 bytes -= stripe_len;
6860                                 offset += stripe_len;
6861                         } else if (logical[nr] < offset) {
6862                                 if (logical[nr] + stripe_len >=
6863                                     offset + bytes) {
6864                                         free(logical);
6865                                         return 0;
6866                                 }
6867                                 bytes = (offset + bytes) -
6868                                         (logical[nr] + stripe_len);
6869                                 offset = logical[nr] + stripe_len;
6870                         } else {
6871                                 /*
6872                                  * Could be tricky, the super may land in the
6873                                  * middle of the area we're checking.  First
6874                                  * check the easiest case, it's at the end.
6875                                  */
6876                                 if (logical[nr] + stripe_len >=
6877                                     bytes + offset) {
6878                                         bytes = logical[nr] - offset;
6879                                         continue;
6880                                 }
6881
6882                                 /* Check the left side */
6883                                 ret = check_cache_range(root, cache,
6884                                                         offset,
6885                                                         logical[nr] - offset);
6886                                 if (ret) {
6887                                         free(logical);
6888                                         return ret;
6889                                 }
6890
6891                                 /* Now we continue with the right side */
6892                                 bytes = (offset + bytes) -
6893                                         (logical[nr] + stripe_len);
6894                                 offset = logical[nr] + stripe_len;
6895                         }
6896                 }
6897
6898                 free(logical);
6899         }
6900
6901         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6902         if (!entry) {
6903                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6904                         offset, offset+bytes);
6905                 return -EINVAL;
6906         }
6907
6908         if (entry->offset != offset) {
6909                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6910                         entry->offset);
6911                 return -EINVAL;
6912         }
6913
6914         if (entry->bytes != bytes) {
6915                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6916                         bytes, entry->bytes, offset);
6917                 return -EINVAL;
6918         }
6919
6920         unlink_free_space(cache->free_space_ctl, entry);
6921         free(entry);
6922         return 0;
6923 }
6924
6925 static int verify_space_cache(struct btrfs_root *root,
6926                               struct btrfs_block_group_cache *cache)
6927 {
6928         struct btrfs_path path;
6929         struct extent_buffer *leaf;
6930         struct btrfs_key key;
6931         u64 last;
6932         int ret = 0;
6933
6934         root = root->fs_info->extent_root;
6935
6936         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6937
6938         btrfs_init_path(&path);
6939         key.objectid = last;
6940         key.offset = 0;
6941         key.type = BTRFS_EXTENT_ITEM_KEY;
6942         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6943         if (ret < 0)
6944                 goto out;
6945         ret = 0;
6946         while (1) {
6947                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6948                         ret = btrfs_next_leaf(root, &path);
6949                         if (ret < 0)
6950                                 goto out;
6951                         if (ret > 0) {
6952                                 ret = 0;
6953                                 break;
6954                         }
6955                 }
6956                 leaf = path.nodes[0];
6957                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6958                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6959                         break;
6960                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6961                     key.type != BTRFS_METADATA_ITEM_KEY) {
6962                         path.slots[0]++;
6963                         continue;
6964                 }
6965
6966                 if (last == key.objectid) {
6967                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6968                                 last = key.objectid + key.offset;
6969                         else
6970                                 last = key.objectid + root->nodesize;
6971                         path.slots[0]++;
6972                         continue;
6973                 }
6974
6975                 ret = check_cache_range(root, cache, last,
6976                                         key.objectid - last);
6977                 if (ret)
6978                         break;
6979                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6980                         last = key.objectid + key.offset;
6981                 else
6982                         last = key.objectid + root->nodesize;
6983                 path.slots[0]++;
6984         }
6985
6986         if (last < cache->key.objectid + cache->key.offset)
6987                 ret = check_cache_range(root, cache, last,
6988                                         cache->key.objectid +
6989                                         cache->key.offset - last);
6990
6991 out:
6992         btrfs_release_path(&path);
6993
6994         if (!ret &&
6995             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6996                 fprintf(stderr, "There are still entries left in the space "
6997                         "cache\n");
6998                 ret = -EINVAL;
6999         }
7000
7001         return ret;
7002 }
7003
7004 static int check_space_cache(struct btrfs_root *root)
7005 {
7006         struct btrfs_block_group_cache *cache;
7007         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7008         int ret;
7009         int error = 0;
7010
7011         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7012             btrfs_super_generation(root->fs_info->super_copy) !=
7013             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7014                 printf("cache and super generation don't match, space cache "
7015                        "will be invalidated\n");
7016                 return 0;
7017         }
7018
7019         if (ctx.progress_enabled) {
7020                 ctx.tp = TASK_FREE_SPACE;
7021                 task_start(ctx.info);
7022         }
7023
7024         while (1) {
7025                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7026                 if (!cache)
7027                         break;
7028
7029                 start = cache->key.objectid + cache->key.offset;
7030                 if (!cache->free_space_ctl) {
7031                         if (btrfs_init_free_space_ctl(cache,
7032                                                       root->sectorsize)) {
7033                                 ret = -ENOMEM;
7034                                 break;
7035                         }
7036                 } else {
7037                         btrfs_remove_free_space_cache(cache);
7038                 }
7039
7040                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7041                         ret = exclude_super_stripes(root, cache);
7042                         if (ret) {
7043                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7044                                         strerror(-ret));
7045                                 error++;
7046                                 continue;
7047                         }
7048                         ret = load_free_space_tree(root->fs_info, cache);
7049                         free_excluded_extents(root, cache);
7050                         if (ret < 0) {
7051                                 fprintf(stderr, "could not load free space tree: %s\n",
7052                                         strerror(-ret));
7053                                 error++;
7054                                 continue;
7055                         }
7056                         error += ret;
7057                 } else {
7058                         ret = load_free_space_cache(root->fs_info, cache);
7059                         if (!ret)
7060                                 continue;
7061                 }
7062
7063                 ret = verify_space_cache(root, cache);
7064                 if (ret) {
7065                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7066                                 cache->key.objectid);
7067                         error++;
7068                 }
7069         }
7070
7071         task_stop(ctx.info);
7072
7073         return error ? -EINVAL : 0;
7074 }
7075
7076 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7077                         u64 num_bytes, unsigned long leaf_offset,
7078                         struct extent_buffer *eb) {
7079
7080         u64 offset = 0;
7081         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7082         char *data;
7083         unsigned long csum_offset;
7084         u32 csum;
7085         u32 csum_expected;
7086         u64 read_len;
7087         u64 data_checked = 0;
7088         u64 tmp;
7089         int ret = 0;
7090         int mirror;
7091         int num_copies;
7092
7093         if (num_bytes % root->sectorsize)
7094                 return -EINVAL;
7095
7096         data = malloc(num_bytes);
7097         if (!data)
7098                 return -ENOMEM;
7099
7100         while (offset < num_bytes) {
7101                 mirror = 0;
7102 again:
7103                 read_len = num_bytes - offset;
7104                 /* read as much space once a time */
7105                 ret = read_extent_data(root, data + offset,
7106                                 bytenr + offset, &read_len, mirror);
7107                 if (ret)
7108                         goto out;
7109                 data_checked = 0;
7110                 /* verify every 4k data's checksum */
7111                 while (data_checked < read_len) {
7112                         csum = ~(u32)0;
7113                         tmp = offset + data_checked;
7114
7115                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
7116                                                csum, root->sectorsize);
7117                         btrfs_csum_final(csum, (u8 *)&csum);
7118
7119                         csum_offset = leaf_offset +
7120                                  tmp / root->sectorsize * csum_size;
7121                         read_extent_buffer(eb, (char *)&csum_expected,
7122                                            csum_offset, csum_size);
7123                         /* try another mirror */
7124                         if (csum != csum_expected) {
7125                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7126                                                 mirror, bytenr + tmp,
7127                                                 csum, csum_expected);
7128                                 num_copies = btrfs_num_copies(
7129                                                 &root->fs_info->mapping_tree,
7130                                                 bytenr, num_bytes);
7131                                 if (mirror < num_copies - 1) {
7132                                         mirror += 1;
7133                                         goto again;
7134                                 }
7135                         }
7136                         data_checked += root->sectorsize;
7137                 }
7138                 offset += read_len;
7139         }
7140 out:
7141         free(data);
7142         return ret;
7143 }
7144
7145 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7146                                u64 num_bytes)
7147 {
7148         struct btrfs_path path;
7149         struct extent_buffer *leaf;
7150         struct btrfs_key key;
7151         int ret;
7152
7153         btrfs_init_path(&path);
7154         key.objectid = bytenr;
7155         key.type = BTRFS_EXTENT_ITEM_KEY;
7156         key.offset = (u64)-1;
7157
7158 again:
7159         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7160                                 0, 0);
7161         if (ret < 0) {
7162                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7163                 btrfs_release_path(&path);
7164                 return ret;
7165         } else if (ret) {
7166                 if (path.slots[0] > 0) {
7167                         path.slots[0]--;
7168                 } else {
7169                         ret = btrfs_prev_leaf(root, &path);
7170                         if (ret < 0) {
7171                                 goto out;
7172                         } else if (ret > 0) {
7173                                 ret = 0;
7174                                 goto out;
7175                         }
7176                 }
7177         }
7178
7179         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7180
7181         /*
7182          * Block group items come before extent items if they have the same
7183          * bytenr, so walk back one more just in case.  Dear future traveller,
7184          * first congrats on mastering time travel.  Now if it's not too much
7185          * trouble could you go back to 2006 and tell Chris to make the
7186          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7187          * EXTENT_ITEM_KEY please?
7188          */
7189         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7190                 if (path.slots[0] > 0) {
7191                         path.slots[0]--;
7192                 } else {
7193                         ret = btrfs_prev_leaf(root, &path);
7194                         if (ret < 0) {
7195                                 goto out;
7196                         } else if (ret > 0) {
7197                                 ret = 0;
7198                                 goto out;
7199                         }
7200                 }
7201                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7202         }
7203
7204         while (num_bytes) {
7205                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7206                         ret = btrfs_next_leaf(root, &path);
7207                         if (ret < 0) {
7208                                 fprintf(stderr, "Error going to next leaf "
7209                                         "%d\n", ret);
7210                                 btrfs_release_path(&path);
7211                                 return ret;
7212                         } else if (ret) {
7213                                 break;
7214                         }
7215                 }
7216                 leaf = path.nodes[0];
7217                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7218                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7219                         path.slots[0]++;
7220                         continue;
7221                 }
7222                 if (key.objectid + key.offset < bytenr) {
7223                         path.slots[0]++;
7224                         continue;
7225                 }
7226                 if (key.objectid > bytenr + num_bytes)
7227                         break;
7228
7229                 if (key.objectid == bytenr) {
7230                         if (key.offset >= num_bytes) {
7231                                 num_bytes = 0;
7232                                 break;
7233                         }
7234                         num_bytes -= key.offset;
7235                         bytenr += key.offset;
7236                 } else if (key.objectid < bytenr) {
7237                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7238                                 num_bytes = 0;
7239                                 break;
7240                         }
7241                         num_bytes = (bytenr + num_bytes) -
7242                                 (key.objectid + key.offset);
7243                         bytenr = key.objectid + key.offset;
7244                 } else {
7245                         if (key.objectid + key.offset < bytenr + num_bytes) {
7246                                 u64 new_start = key.objectid + key.offset;
7247                                 u64 new_bytes = bytenr + num_bytes - new_start;
7248
7249                                 /*
7250                                  * Weird case, the extent is in the middle of
7251                                  * our range, we'll have to search one side
7252                                  * and then the other.  Not sure if this happens
7253                                  * in real life, but no harm in coding it up
7254                                  * anyway just in case.
7255                                  */
7256                                 btrfs_release_path(&path);
7257                                 ret = check_extent_exists(root, new_start,
7258                                                           new_bytes);
7259                                 if (ret) {
7260                                         fprintf(stderr, "Right section didn't "
7261                                                 "have a record\n");
7262                                         break;
7263                                 }
7264                                 num_bytes = key.objectid - bytenr;
7265                                 goto again;
7266                         }
7267                         num_bytes = key.objectid - bytenr;
7268                 }
7269                 path.slots[0]++;
7270         }
7271         ret = 0;
7272
7273 out:
7274         if (num_bytes && !ret) {
7275                 fprintf(stderr, "There are no extents for csum range "
7276                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7277                 ret = 1;
7278         }
7279
7280         btrfs_release_path(&path);
7281         return ret;
7282 }
7283
7284 static int check_csums(struct btrfs_root *root)
7285 {
7286         struct btrfs_path path;
7287         struct extent_buffer *leaf;
7288         struct btrfs_key key;
7289         u64 offset = 0, num_bytes = 0;
7290         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7291         int errors = 0;
7292         int ret;
7293         u64 data_len;
7294         unsigned long leaf_offset;
7295
7296         root = root->fs_info->csum_root;
7297         if (!extent_buffer_uptodate(root->node)) {
7298                 fprintf(stderr, "No valid csum tree found\n");
7299                 return -ENOENT;
7300         }
7301
7302         btrfs_init_path(&path);
7303         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7304         key.type = BTRFS_EXTENT_CSUM_KEY;
7305         key.offset = 0;
7306         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7307         if (ret < 0) {
7308                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7309                 btrfs_release_path(&path);
7310                 return ret;
7311         }
7312
7313         if (ret > 0 && path.slots[0])
7314                 path.slots[0]--;
7315         ret = 0;
7316
7317         while (1) {
7318                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7319                         ret = btrfs_next_leaf(root, &path);
7320                         if (ret < 0) {
7321                                 fprintf(stderr, "Error going to next leaf "
7322                                         "%d\n", ret);
7323                                 break;
7324                         }
7325                         if (ret)
7326                                 break;
7327                 }
7328                 leaf = path.nodes[0];
7329
7330                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7331                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7332                         path.slots[0]++;
7333                         continue;
7334                 }
7335
7336                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7337                               csum_size) * root->sectorsize;
7338                 if (!check_data_csum)
7339                         goto skip_csum_check;
7340                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7341                 ret = check_extent_csums(root, key.offset, data_len,
7342                                          leaf_offset, leaf);
7343                 if (ret)
7344                         break;
7345 skip_csum_check:
7346                 if (!num_bytes) {
7347                         offset = key.offset;
7348                 } else if (key.offset != offset + num_bytes) {
7349                         ret = check_extent_exists(root, offset, num_bytes);
7350                         if (ret) {
7351                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7352                                         "there is no extent record\n",
7353                                         offset, offset+num_bytes);
7354                                 errors++;
7355                         }
7356                         offset = key.offset;
7357                         num_bytes = 0;
7358                 }
7359                 num_bytes += data_len;
7360                 path.slots[0]++;
7361         }
7362
7363         btrfs_release_path(&path);
7364         return errors;
7365 }
7366
7367 static int is_dropped_key(struct btrfs_key *key,
7368                           struct btrfs_key *drop_key) {
7369         if (key->objectid < drop_key->objectid)
7370                 return 1;
7371         else if (key->objectid == drop_key->objectid) {
7372                 if (key->type < drop_key->type)
7373                         return 1;
7374                 else if (key->type == drop_key->type) {
7375                         if (key->offset < drop_key->offset)
7376                                 return 1;
7377                 }
7378         }
7379         return 0;
7380 }
7381
7382 /*
7383  * Here are the rules for FULL_BACKREF.
7384  *
7385  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7386  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7387  *      FULL_BACKREF set.
7388  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7389  *    if it happened after the relocation occurred since we'll have dropped the
7390  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7391  *    have no real way to know for sure.
7392  *
7393  * We process the blocks one root at a time, and we start from the lowest root
7394  * objectid and go to the highest.  So we can just lookup the owner backref for
7395  * the record and if we don't find it then we know it doesn't exist and we have
7396  * a FULL BACKREF.
7397  *
7398  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7399  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7400  * be set or not and then we can check later once we've gathered all the refs.
7401  */
7402 static int calc_extent_flag(struct btrfs_root *root,
7403                            struct cache_tree *extent_cache,
7404                            struct extent_buffer *buf,
7405                            struct root_item_record *ri,
7406                            u64 *flags)
7407 {
7408         struct extent_record *rec;
7409         struct cache_extent *cache;
7410         struct tree_backref *tback;
7411         u64 owner = 0;
7412
7413         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7414         /* we have added this extent before */
7415         if (!cache)
7416                 return -ENOENT;
7417
7418         rec = container_of(cache, struct extent_record, cache);
7419
7420         /*
7421          * Except file/reloc tree, we can not have
7422          * FULL BACKREF MODE
7423          */
7424         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7425                 goto normal;
7426         /*
7427          * root node
7428          */
7429         if (buf->start == ri->bytenr)
7430                 goto normal;
7431
7432         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7433                 goto full_backref;
7434
7435         owner = btrfs_header_owner(buf);
7436         if (owner == ri->objectid)
7437                 goto normal;
7438
7439         tback = find_tree_backref(rec, 0, owner);
7440         if (!tback)
7441                 goto full_backref;
7442 normal:
7443         *flags = 0;
7444         if (rec->flag_block_full_backref != FLAG_UNSET &&
7445             rec->flag_block_full_backref != 0)
7446                 rec->bad_full_backref = 1;
7447         return 0;
7448 full_backref:
7449         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7450         if (rec->flag_block_full_backref != FLAG_UNSET &&
7451             rec->flag_block_full_backref != 1)
7452                 rec->bad_full_backref = 1;
7453         return 0;
7454 }
7455
7456 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7457 {
7458         fprintf(stderr, "Invalid key type(");
7459         print_key_type(stderr, 0, key_type);
7460         fprintf(stderr, ") found in root(");
7461         print_objectid(stderr, rootid, 0);
7462         fprintf(stderr, ")\n");
7463 }
7464
7465 /*
7466  * Check if the key is valid with its extent buffer.
7467  *
7468  * This is a early check in case invalid key exists in a extent buffer
7469  * This is not comprehensive yet, but should prevent wrong key/item passed
7470  * further
7471  */
7472 static int check_type_with_root(u64 rootid, u8 key_type)
7473 {
7474         switch (key_type) {
7475         /* Only valid in chunk tree */
7476         case BTRFS_DEV_ITEM_KEY:
7477         case BTRFS_CHUNK_ITEM_KEY:
7478                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7479                         goto err;
7480                 break;
7481         /* valid in csum and log tree */
7482         case BTRFS_CSUM_TREE_OBJECTID:
7483                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7484                       is_fstree(rootid)))
7485                         goto err;
7486                 break;
7487         case BTRFS_EXTENT_ITEM_KEY:
7488         case BTRFS_METADATA_ITEM_KEY:
7489         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7490                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7491                         goto err;
7492                 break;
7493         case BTRFS_ROOT_ITEM_KEY:
7494                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7495                         goto err;
7496                 break;
7497         case BTRFS_DEV_EXTENT_KEY:
7498                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7499                         goto err;
7500                 break;
7501         }
7502         return 0;
7503 err:
7504         report_mismatch_key_root(key_type, rootid);
7505         return -EINVAL;
7506 }
7507
7508 static int run_next_block(struct btrfs_root *root,
7509                           struct block_info *bits,
7510                           int bits_nr,
7511                           u64 *last,
7512                           struct cache_tree *pending,
7513                           struct cache_tree *seen,
7514                           struct cache_tree *reada,
7515                           struct cache_tree *nodes,
7516                           struct cache_tree *extent_cache,
7517                           struct cache_tree *chunk_cache,
7518                           struct rb_root *dev_cache,
7519                           struct block_group_tree *block_group_cache,
7520                           struct device_extent_tree *dev_extent_cache,
7521                           struct root_item_record *ri)
7522 {
7523         struct extent_buffer *buf;
7524         struct extent_record *rec = NULL;
7525         u64 bytenr;
7526         u32 size;
7527         u64 parent;
7528         u64 owner;
7529         u64 flags;
7530         u64 ptr;
7531         u64 gen = 0;
7532         int ret = 0;
7533         int i;
7534         int nritems;
7535         struct btrfs_key key;
7536         struct cache_extent *cache;
7537         int reada_bits;
7538
7539         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7540                                     bits_nr, &reada_bits);
7541         if (nritems == 0)
7542                 return 1;
7543
7544         if (!reada_bits) {
7545                 for(i = 0; i < nritems; i++) {
7546                         ret = add_cache_extent(reada, bits[i].start,
7547                                                bits[i].size);
7548                         if (ret == -EEXIST)
7549                                 continue;
7550
7551                         /* fixme, get the parent transid */
7552                         readahead_tree_block(root, bits[i].start,
7553                                              bits[i].size, 0);
7554                 }
7555         }
7556         *last = bits[0].start;
7557         bytenr = bits[0].start;
7558         size = bits[0].size;
7559
7560         cache = lookup_cache_extent(pending, bytenr, size);
7561         if (cache) {
7562                 remove_cache_extent(pending, cache);
7563                 free(cache);
7564         }
7565         cache = lookup_cache_extent(reada, bytenr, size);
7566         if (cache) {
7567                 remove_cache_extent(reada, cache);
7568                 free(cache);
7569         }
7570         cache = lookup_cache_extent(nodes, bytenr, size);
7571         if (cache) {
7572                 remove_cache_extent(nodes, cache);
7573                 free(cache);
7574         }
7575         cache = lookup_cache_extent(extent_cache, bytenr, size);
7576         if (cache) {
7577                 rec = container_of(cache, struct extent_record, cache);
7578                 gen = rec->parent_generation;
7579         }
7580
7581         /* fixme, get the real parent transid */
7582         buf = read_tree_block(root, bytenr, size, gen);
7583         if (!extent_buffer_uptodate(buf)) {
7584                 record_bad_block_io(root->fs_info,
7585                                     extent_cache, bytenr, size);
7586                 goto out;
7587         }
7588
7589         nritems = btrfs_header_nritems(buf);
7590
7591         flags = 0;
7592         if (!init_extent_tree) {
7593                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7594                                        btrfs_header_level(buf), 1, NULL,
7595                                        &flags);
7596                 if (ret < 0) {
7597                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7598                         if (ret < 0) {
7599                                 fprintf(stderr, "Couldn't calc extent flags\n");
7600                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7601                         }
7602                 }
7603         } else {
7604                 flags = 0;
7605                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7606                 if (ret < 0) {
7607                         fprintf(stderr, "Couldn't calc extent flags\n");
7608                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7609                 }
7610         }
7611
7612         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7613                 if (ri != NULL &&
7614                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7615                     ri->objectid == btrfs_header_owner(buf)) {
7616                         /*
7617                          * Ok we got to this block from it's original owner and
7618                          * we have FULL_BACKREF set.  Relocation can leave
7619                          * converted blocks over so this is altogether possible,
7620                          * however it's not possible if the generation > the
7621                          * last snapshot, so check for this case.
7622                          */
7623                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7624                             btrfs_header_generation(buf) > ri->last_snapshot) {
7625                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7626                                 rec->bad_full_backref = 1;
7627                         }
7628                 }
7629         } else {
7630                 if (ri != NULL &&
7631                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7632                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7633                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7634                         rec->bad_full_backref = 1;
7635                 }
7636         }
7637
7638         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7639                 rec->flag_block_full_backref = 1;
7640                 parent = bytenr;
7641                 owner = 0;
7642         } else {
7643                 rec->flag_block_full_backref = 0;
7644                 parent = 0;
7645                 owner = btrfs_header_owner(buf);
7646         }
7647
7648         ret = check_block(root, extent_cache, buf, flags);
7649         if (ret)
7650                 goto out;
7651
7652         if (btrfs_is_leaf(buf)) {
7653                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7654                 for (i = 0; i < nritems; i++) {
7655                         struct btrfs_file_extent_item *fi;
7656                         btrfs_item_key_to_cpu(buf, &key, i);
7657                         /*
7658                          * Check key type against the leaf owner.
7659                          * Could filter quite a lot of early error if
7660                          * owner is correct
7661                          */
7662                         if (check_type_with_root(btrfs_header_owner(buf),
7663                                                  key.type)) {
7664                                 fprintf(stderr, "ignoring invalid key\n");
7665                                 continue;
7666                         }
7667                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7668                                 process_extent_item(root, extent_cache, buf,
7669                                                     i);
7670                                 continue;
7671                         }
7672                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7673                                 process_extent_item(root, extent_cache, buf,
7674                                                     i);
7675                                 continue;
7676                         }
7677                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7678                                 total_csum_bytes +=
7679                                         btrfs_item_size_nr(buf, i);
7680                                 continue;
7681                         }
7682                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7683                                 process_chunk_item(chunk_cache, &key, buf, i);
7684                                 continue;
7685                         }
7686                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7687                                 process_device_item(dev_cache, &key, buf, i);
7688                                 continue;
7689                         }
7690                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7691                                 process_block_group_item(block_group_cache,
7692                                         &key, buf, i);
7693                                 continue;
7694                         }
7695                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7696                                 process_device_extent_item(dev_extent_cache,
7697                                         &key, buf, i);
7698                                 continue;
7699
7700                         }
7701                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7702 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7703                                 process_extent_ref_v0(extent_cache, buf, i);
7704 #else
7705                                 BUG();
7706 #endif
7707                                 continue;
7708                         }
7709
7710                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7711                                 ret = add_tree_backref(extent_cache,
7712                                                 key.objectid, 0, key.offset, 0);
7713                                 if (ret < 0)
7714                                         error("add_tree_backref failed: %s",
7715                                               strerror(-ret));
7716                                 continue;
7717                         }
7718                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7719                                 ret = add_tree_backref(extent_cache,
7720                                                 key.objectid, key.offset, 0, 0);
7721                                 if (ret < 0)
7722                                         error("add_tree_backref failed: %s",
7723                                               strerror(-ret));
7724                                 continue;
7725                         }
7726                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7727                                 struct btrfs_extent_data_ref *ref;
7728                                 ref = btrfs_item_ptr(buf, i,
7729                                                 struct btrfs_extent_data_ref);
7730                                 add_data_backref(extent_cache,
7731                                         key.objectid, 0,
7732                                         btrfs_extent_data_ref_root(buf, ref),
7733                                         btrfs_extent_data_ref_objectid(buf,
7734                                                                        ref),
7735                                         btrfs_extent_data_ref_offset(buf, ref),
7736                                         btrfs_extent_data_ref_count(buf, ref),
7737                                         0, root->sectorsize);
7738                                 continue;
7739                         }
7740                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7741                                 struct btrfs_shared_data_ref *ref;
7742                                 ref = btrfs_item_ptr(buf, i,
7743                                                 struct btrfs_shared_data_ref);
7744                                 add_data_backref(extent_cache,
7745                                         key.objectid, key.offset, 0, 0, 0,
7746                                         btrfs_shared_data_ref_count(buf, ref),
7747                                         0, root->sectorsize);
7748                                 continue;
7749                         }
7750                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7751                                 struct bad_item *bad;
7752
7753                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7754                                         continue;
7755                                 if (!owner)
7756                                         continue;
7757                                 bad = malloc(sizeof(struct bad_item));
7758                                 if (!bad)
7759                                         continue;
7760                                 INIT_LIST_HEAD(&bad->list);
7761                                 memcpy(&bad->key, &key,
7762                                        sizeof(struct btrfs_key));
7763                                 bad->root_id = owner;
7764                                 list_add_tail(&bad->list, &delete_items);
7765                                 continue;
7766                         }
7767                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7768                                 continue;
7769                         fi = btrfs_item_ptr(buf, i,
7770                                             struct btrfs_file_extent_item);
7771                         if (btrfs_file_extent_type(buf, fi) ==
7772                             BTRFS_FILE_EXTENT_INLINE)
7773                                 continue;
7774                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7775                                 continue;
7776
7777                         data_bytes_allocated +=
7778                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7779                         if (data_bytes_allocated < root->sectorsize) {
7780                                 abort();
7781                         }
7782                         data_bytes_referenced +=
7783                                 btrfs_file_extent_num_bytes(buf, fi);
7784                         add_data_backref(extent_cache,
7785                                 btrfs_file_extent_disk_bytenr(buf, fi),
7786                                 parent, owner, key.objectid, key.offset -
7787                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7788                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7789                 }
7790         } else {
7791                 int level;
7792                 struct btrfs_key first_key;
7793
7794                 first_key.objectid = 0;
7795
7796                 if (nritems > 0)
7797                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7798                 level = btrfs_header_level(buf);
7799                 for (i = 0; i < nritems; i++) {
7800                         struct extent_record tmpl;
7801
7802                         ptr = btrfs_node_blockptr(buf, i);
7803                         size = root->nodesize;
7804                         btrfs_node_key_to_cpu(buf, &key, i);
7805                         if (ri != NULL) {
7806                                 if ((level == ri->drop_level)
7807                                     && is_dropped_key(&key, &ri->drop_key)) {
7808                                         continue;
7809                                 }
7810                         }
7811
7812                         memset(&tmpl, 0, sizeof(tmpl));
7813                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7814                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7815                         tmpl.start = ptr;
7816                         tmpl.nr = size;
7817                         tmpl.refs = 1;
7818                         tmpl.metadata = 1;
7819                         tmpl.max_size = size;
7820                         ret = add_extent_rec(extent_cache, &tmpl);
7821                         if (ret < 0)
7822                                 goto out;
7823
7824                         ret = add_tree_backref(extent_cache, ptr, parent,
7825                                         owner, 1);
7826                         if (ret < 0) {
7827                                 error("add_tree_backref failed: %s",
7828                                       strerror(-ret));
7829                                 continue;
7830                         }
7831
7832                         if (level > 1) {
7833                                 add_pending(nodes, seen, ptr, size);
7834                         } else {
7835                                 add_pending(pending, seen, ptr, size);
7836                         }
7837                 }
7838                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7839                                       nritems) * sizeof(struct btrfs_key_ptr);
7840         }
7841         total_btree_bytes += buf->len;
7842         if (fs_root_objectid(btrfs_header_owner(buf)))
7843                 total_fs_tree_bytes += buf->len;
7844         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7845                 total_extent_tree_bytes += buf->len;
7846         if (!found_old_backref &&
7847             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7848             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7849             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7850                 found_old_backref = 1;
7851 out:
7852         free_extent_buffer(buf);
7853         return ret;
7854 }
7855
7856 static int add_root_to_pending(struct extent_buffer *buf,
7857                                struct cache_tree *extent_cache,
7858                                struct cache_tree *pending,
7859                                struct cache_tree *seen,
7860                                struct cache_tree *nodes,
7861                                u64 objectid)
7862 {
7863         struct extent_record tmpl;
7864         int ret;
7865
7866         if (btrfs_header_level(buf) > 0)
7867                 add_pending(nodes, seen, buf->start, buf->len);
7868         else
7869                 add_pending(pending, seen, buf->start, buf->len);
7870
7871         memset(&tmpl, 0, sizeof(tmpl));
7872         tmpl.start = buf->start;
7873         tmpl.nr = buf->len;
7874         tmpl.is_root = 1;
7875         tmpl.refs = 1;
7876         tmpl.metadata = 1;
7877         tmpl.max_size = buf->len;
7878         add_extent_rec(extent_cache, &tmpl);
7879
7880         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7881             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7882                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7883                                 0, 1);
7884         else
7885                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7886                                 1);
7887         return ret;
7888 }
7889
7890 /* as we fix the tree, we might be deleting blocks that
7891  * we're tracking for repair.  This hook makes sure we
7892  * remove any backrefs for blocks as we are fixing them.
7893  */
7894 static int free_extent_hook(struct btrfs_trans_handle *trans,
7895                             struct btrfs_root *root,
7896                             u64 bytenr, u64 num_bytes, u64 parent,
7897                             u64 root_objectid, u64 owner, u64 offset,
7898                             int refs_to_drop)
7899 {
7900         struct extent_record *rec;
7901         struct cache_extent *cache;
7902         int is_data;
7903         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7904
7905         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7906         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7907         if (!cache)
7908                 return 0;
7909
7910         rec = container_of(cache, struct extent_record, cache);
7911         if (is_data) {
7912                 struct data_backref *back;
7913                 back = find_data_backref(rec, parent, root_objectid, owner,
7914                                          offset, 1, bytenr, num_bytes);
7915                 if (!back)
7916                         goto out;
7917                 if (back->node.found_ref) {
7918                         back->found_ref -= refs_to_drop;
7919                         if (rec->refs)
7920                                 rec->refs -= refs_to_drop;
7921                 }
7922                 if (back->node.found_extent_tree) {
7923                         back->num_refs -= refs_to_drop;
7924                         if (rec->extent_item_refs)
7925                                 rec->extent_item_refs -= refs_to_drop;
7926                 }
7927                 if (back->found_ref == 0)
7928                         back->node.found_ref = 0;
7929                 if (back->num_refs == 0)
7930                         back->node.found_extent_tree = 0;
7931
7932                 if (!back->node.found_extent_tree && back->node.found_ref) {
7933                         list_del(&back->node.list);
7934                         free(back);
7935                 }
7936         } else {
7937                 struct tree_backref *back;
7938                 back = find_tree_backref(rec, parent, root_objectid);
7939                 if (!back)
7940                         goto out;
7941                 if (back->node.found_ref) {
7942                         if (rec->refs)
7943                                 rec->refs--;
7944                         back->node.found_ref = 0;
7945                 }
7946                 if (back->node.found_extent_tree) {
7947                         if (rec->extent_item_refs)
7948                                 rec->extent_item_refs--;
7949                         back->node.found_extent_tree = 0;
7950                 }
7951                 if (!back->node.found_extent_tree && back->node.found_ref) {
7952                         list_del(&back->node.list);
7953                         free(back);
7954                 }
7955         }
7956         maybe_free_extent_rec(extent_cache, rec);
7957 out:
7958         return 0;
7959 }
7960
7961 static int delete_extent_records(struct btrfs_trans_handle *trans,
7962                                  struct btrfs_root *root,
7963                                  struct btrfs_path *path,
7964                                  u64 bytenr, u64 new_len)
7965 {
7966         struct btrfs_key key;
7967         struct btrfs_key found_key;
7968         struct extent_buffer *leaf;
7969         int ret;
7970         int slot;
7971
7972
7973         key.objectid = bytenr;
7974         key.type = (u8)-1;
7975         key.offset = (u64)-1;
7976
7977         while(1) {
7978                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7979                                         &key, path, 0, 1);
7980                 if (ret < 0)
7981                         break;
7982
7983                 if (ret > 0) {
7984                         ret = 0;
7985                         if (path->slots[0] == 0)
7986                                 break;
7987                         path->slots[0]--;
7988                 }
7989                 ret = 0;
7990
7991                 leaf = path->nodes[0];
7992                 slot = path->slots[0];
7993
7994                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7995                 if (found_key.objectid != bytenr)
7996                         break;
7997
7998                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7999                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8000                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8001                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8002                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8003                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8004                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8005                         btrfs_release_path(path);
8006                         if (found_key.type == 0) {
8007                                 if (found_key.offset == 0)
8008                                         break;
8009                                 key.offset = found_key.offset - 1;
8010                                 key.type = found_key.type;
8011                         }
8012                         key.type = found_key.type - 1;
8013                         key.offset = (u64)-1;
8014                         continue;
8015                 }
8016
8017                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8018                         found_key.objectid, found_key.type, found_key.offset);
8019
8020                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8021                 if (ret)
8022                         break;
8023                 btrfs_release_path(path);
8024
8025                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8026                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8027                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8028                                 found_key.offset : root->nodesize;
8029
8030                         ret = btrfs_update_block_group(trans, root, bytenr,
8031                                                        bytes, 0, 0);
8032                         if (ret)
8033                                 break;
8034                 }
8035         }
8036
8037         btrfs_release_path(path);
8038         return ret;
8039 }
8040
8041 /*
8042  * for a single backref, this will allocate a new extent
8043  * and add the backref to it.
8044  */
8045 static int record_extent(struct btrfs_trans_handle *trans,
8046                          struct btrfs_fs_info *info,
8047                          struct btrfs_path *path,
8048                          struct extent_record *rec,
8049                          struct extent_backref *back,
8050                          int allocated, u64 flags)
8051 {
8052         int ret = 0;
8053         struct btrfs_root *extent_root = info->extent_root;
8054         struct extent_buffer *leaf;
8055         struct btrfs_key ins_key;
8056         struct btrfs_extent_item *ei;
8057         struct data_backref *dback;
8058         struct btrfs_tree_block_info *bi;
8059
8060         if (!back->is_data)
8061                 rec->max_size = max_t(u64, rec->max_size,
8062                                     info->extent_root->nodesize);
8063
8064         if (!allocated) {
8065                 u32 item_size = sizeof(*ei);
8066
8067                 if (!back->is_data)
8068                         item_size += sizeof(*bi);
8069
8070                 ins_key.objectid = rec->start;
8071                 ins_key.offset = rec->max_size;
8072                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8073
8074                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8075                                         &ins_key, item_size);
8076                 if (ret)
8077                         goto fail;
8078
8079                 leaf = path->nodes[0];
8080                 ei = btrfs_item_ptr(leaf, path->slots[0],
8081                                     struct btrfs_extent_item);
8082
8083                 btrfs_set_extent_refs(leaf, ei, 0);
8084                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8085
8086                 if (back->is_data) {
8087                         btrfs_set_extent_flags(leaf, ei,
8088                                                BTRFS_EXTENT_FLAG_DATA);
8089                 } else {
8090                         struct btrfs_disk_key copy_key;;
8091
8092                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8093                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8094                                              sizeof(*bi));
8095
8096                         btrfs_set_disk_key_objectid(&copy_key,
8097                                                     rec->info_objectid);
8098                         btrfs_set_disk_key_type(&copy_key, 0);
8099                         btrfs_set_disk_key_offset(&copy_key, 0);
8100
8101                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8102                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8103
8104                         btrfs_set_extent_flags(leaf, ei,
8105                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8106                 }
8107
8108                 btrfs_mark_buffer_dirty(leaf);
8109                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8110                                                rec->max_size, 1, 0);
8111                 if (ret)
8112                         goto fail;
8113                 btrfs_release_path(path);
8114         }
8115
8116         if (back->is_data) {
8117                 u64 parent;
8118                 int i;
8119
8120                 dback = to_data_backref(back);
8121                 if (back->full_backref)
8122                         parent = dback->parent;
8123                 else
8124                         parent = 0;
8125
8126                 for (i = 0; i < dback->found_ref; i++) {
8127                         /* if parent != 0, we're doing a full backref
8128                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8129                          * just makes the backref allocator create a data
8130                          * backref
8131                          */
8132                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8133                                                    rec->start, rec->max_size,
8134                                                    parent,
8135                                                    dback->root,
8136                                                    parent ?
8137                                                    BTRFS_FIRST_FREE_OBJECTID :
8138                                                    dback->owner,
8139                                                    dback->offset);
8140                         if (ret)
8141                                 break;
8142                 }
8143                 fprintf(stderr, "adding new data backref"
8144                                 " on %llu %s %llu owner %llu"
8145                                 " offset %llu found %d\n",
8146                                 (unsigned long long)rec->start,
8147                                 back->full_backref ?
8148                                 "parent" : "root",
8149                                 back->full_backref ?
8150                                 (unsigned long long)parent :
8151                                 (unsigned long long)dback->root,
8152                                 (unsigned long long)dback->owner,
8153                                 (unsigned long long)dback->offset,
8154                                 dback->found_ref);
8155         } else {
8156                 u64 parent;
8157                 struct tree_backref *tback;
8158
8159                 tback = to_tree_backref(back);
8160                 if (back->full_backref)
8161                         parent = tback->parent;
8162                 else
8163                         parent = 0;
8164
8165                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8166                                            rec->start, rec->max_size,
8167                                            parent, tback->root, 0, 0);
8168                 fprintf(stderr, "adding new tree backref on "
8169                         "start %llu len %llu parent %llu root %llu\n",
8170                         rec->start, rec->max_size, parent, tback->root);
8171         }
8172 fail:
8173         btrfs_release_path(path);
8174         return ret;
8175 }
8176
8177 static struct extent_entry *find_entry(struct list_head *entries,
8178                                        u64 bytenr, u64 bytes)
8179 {
8180         struct extent_entry *entry = NULL;
8181
8182         list_for_each_entry(entry, entries, list) {
8183                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8184                         return entry;
8185         }
8186
8187         return NULL;
8188 }
8189
8190 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8191 {
8192         struct extent_entry *entry, *best = NULL, *prev = NULL;
8193
8194         list_for_each_entry(entry, entries, list) {
8195                 /*
8196                  * If there are as many broken entries as entries then we know
8197                  * not to trust this particular entry.
8198                  */
8199                 if (entry->broken == entry->count)
8200                         continue;
8201
8202                 /*
8203                  * Special case, when there are only two entries and 'best' is
8204                  * the first one
8205                  */
8206                 if (!prev) {
8207                         best = entry;
8208                         prev = entry;
8209                         continue;
8210                 }
8211
8212                 /*
8213                  * If our current entry == best then we can't be sure our best
8214                  * is really the best, so we need to keep searching.
8215                  */
8216                 if (best && best->count == entry->count) {
8217                         prev = entry;
8218                         best = NULL;
8219                         continue;
8220                 }
8221
8222                 /* Prev == entry, not good enough, have to keep searching */
8223                 if (!prev->broken && prev->count == entry->count)
8224                         continue;
8225
8226                 if (!best)
8227                         best = (prev->count > entry->count) ? prev : entry;
8228                 else if (best->count < entry->count)
8229                         best = entry;
8230                 prev = entry;
8231         }
8232
8233         return best;
8234 }
8235
8236 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8237                       struct data_backref *dback, struct extent_entry *entry)
8238 {
8239         struct btrfs_trans_handle *trans;
8240         struct btrfs_root *root;
8241         struct btrfs_file_extent_item *fi;
8242         struct extent_buffer *leaf;
8243         struct btrfs_key key;
8244         u64 bytenr, bytes;
8245         int ret, err;
8246
8247         key.objectid = dback->root;
8248         key.type = BTRFS_ROOT_ITEM_KEY;
8249         key.offset = (u64)-1;
8250         root = btrfs_read_fs_root(info, &key);
8251         if (IS_ERR(root)) {
8252                 fprintf(stderr, "Couldn't find root for our ref\n");
8253                 return -EINVAL;
8254         }
8255
8256         /*
8257          * The backref points to the original offset of the extent if it was
8258          * split, so we need to search down to the offset we have and then walk
8259          * forward until we find the backref we're looking for.
8260          */
8261         key.objectid = dback->owner;
8262         key.type = BTRFS_EXTENT_DATA_KEY;
8263         key.offset = dback->offset;
8264         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8265         if (ret < 0) {
8266                 fprintf(stderr, "Error looking up ref %d\n", ret);
8267                 return ret;
8268         }
8269
8270         while (1) {
8271                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8272                         ret = btrfs_next_leaf(root, path);
8273                         if (ret) {
8274                                 fprintf(stderr, "Couldn't find our ref, next\n");
8275                                 return -EINVAL;
8276                         }
8277                 }
8278                 leaf = path->nodes[0];
8279                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8280                 if (key.objectid != dback->owner ||
8281                     key.type != BTRFS_EXTENT_DATA_KEY) {
8282                         fprintf(stderr, "Couldn't find our ref, search\n");
8283                         return -EINVAL;
8284                 }
8285                 fi = btrfs_item_ptr(leaf, path->slots[0],
8286                                     struct btrfs_file_extent_item);
8287                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8288                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8289
8290                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8291                         break;
8292                 path->slots[0]++;
8293         }
8294
8295         btrfs_release_path(path);
8296
8297         trans = btrfs_start_transaction(root, 1);
8298         if (IS_ERR(trans))
8299                 return PTR_ERR(trans);
8300
8301         /*
8302          * Ok we have the key of the file extent we want to fix, now we can cow
8303          * down to the thing and fix it.
8304          */
8305         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8306         if (ret < 0) {
8307                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8308                         key.objectid, key.type, key.offset, ret);
8309                 goto out;
8310         }
8311         if (ret > 0) {
8312                 fprintf(stderr, "Well that's odd, we just found this key "
8313                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8314                         key.offset);
8315                 ret = -EINVAL;
8316                 goto out;
8317         }
8318         leaf = path->nodes[0];
8319         fi = btrfs_item_ptr(leaf, path->slots[0],
8320                             struct btrfs_file_extent_item);
8321
8322         if (btrfs_file_extent_compression(leaf, fi) &&
8323             dback->disk_bytenr != entry->bytenr) {
8324                 fprintf(stderr, "Ref doesn't match the record start and is "
8325                         "compressed, please take a btrfs-image of this file "
8326                         "system and send it to a btrfs developer so they can "
8327                         "complete this functionality for bytenr %Lu\n",
8328                         dback->disk_bytenr);
8329                 ret = -EINVAL;
8330                 goto out;
8331         }
8332
8333         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8334                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8335         } else if (dback->disk_bytenr > entry->bytenr) {
8336                 u64 off_diff, offset;
8337
8338                 off_diff = dback->disk_bytenr - entry->bytenr;
8339                 offset = btrfs_file_extent_offset(leaf, fi);
8340                 if (dback->disk_bytenr + offset +
8341                     btrfs_file_extent_num_bytes(leaf, fi) >
8342                     entry->bytenr + entry->bytes) {
8343                         fprintf(stderr, "Ref is past the entry end, please "
8344                                 "take a btrfs-image of this file system and "
8345                                 "send it to a btrfs developer, ref %Lu\n",
8346                                 dback->disk_bytenr);
8347                         ret = -EINVAL;
8348                         goto out;
8349                 }
8350                 offset += off_diff;
8351                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8352                 btrfs_set_file_extent_offset(leaf, fi, offset);
8353         } else if (dback->disk_bytenr < entry->bytenr) {
8354                 u64 offset;
8355
8356                 offset = btrfs_file_extent_offset(leaf, fi);
8357                 if (dback->disk_bytenr + offset < entry->bytenr) {
8358                         fprintf(stderr, "Ref is before the entry start, please"
8359                                 " take a btrfs-image of this file system and "
8360                                 "send it to a btrfs developer, ref %Lu\n",
8361                                 dback->disk_bytenr);
8362                         ret = -EINVAL;
8363                         goto out;
8364                 }
8365
8366                 offset += dback->disk_bytenr;
8367                 offset -= entry->bytenr;
8368                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8369                 btrfs_set_file_extent_offset(leaf, fi, offset);
8370         }
8371
8372         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8373
8374         /*
8375          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8376          * only do this if we aren't using compression, otherwise it's a
8377          * trickier case.
8378          */
8379         if (!btrfs_file_extent_compression(leaf, fi))
8380                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8381         else
8382                 printf("ram bytes may be wrong?\n");
8383         btrfs_mark_buffer_dirty(leaf);
8384 out:
8385         err = btrfs_commit_transaction(trans, root);
8386         btrfs_release_path(path);
8387         return ret ? ret : err;
8388 }
8389
8390 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8391                            struct extent_record *rec)
8392 {
8393         struct extent_backref *back;
8394         struct data_backref *dback;
8395         struct extent_entry *entry, *best = NULL;
8396         LIST_HEAD(entries);
8397         int nr_entries = 0;
8398         int broken_entries = 0;
8399         int ret = 0;
8400         short mismatch = 0;
8401
8402         /*
8403          * Metadata is easy and the backrefs should always agree on bytenr and
8404          * size, if not we've got bigger issues.
8405          */
8406         if (rec->metadata)
8407                 return 0;
8408
8409         list_for_each_entry(back, &rec->backrefs, list) {
8410                 if (back->full_backref || !back->is_data)
8411                         continue;
8412
8413                 dback = to_data_backref(back);
8414
8415                 /*
8416                  * We only pay attention to backrefs that we found a real
8417                  * backref for.
8418                  */
8419                 if (dback->found_ref == 0)
8420                         continue;
8421
8422                 /*
8423                  * For now we only catch when the bytes don't match, not the
8424                  * bytenr.  We can easily do this at the same time, but I want
8425                  * to have a fs image to test on before we just add repair
8426                  * functionality willy-nilly so we know we won't screw up the
8427                  * repair.
8428                  */
8429
8430                 entry = find_entry(&entries, dback->disk_bytenr,
8431                                    dback->bytes);
8432                 if (!entry) {
8433                         entry = malloc(sizeof(struct extent_entry));
8434                         if (!entry) {
8435                                 ret = -ENOMEM;
8436                                 goto out;
8437                         }
8438                         memset(entry, 0, sizeof(*entry));
8439                         entry->bytenr = dback->disk_bytenr;
8440                         entry->bytes = dback->bytes;
8441                         list_add_tail(&entry->list, &entries);
8442                         nr_entries++;
8443                 }
8444
8445                 /*
8446                  * If we only have on entry we may think the entries agree when
8447                  * in reality they don't so we have to do some extra checking.
8448                  */
8449                 if (dback->disk_bytenr != rec->start ||
8450                     dback->bytes != rec->nr || back->broken)
8451                         mismatch = 1;
8452
8453                 if (back->broken) {
8454                         entry->broken++;
8455                         broken_entries++;
8456                 }
8457
8458                 entry->count++;
8459         }
8460
8461         /* Yay all the backrefs agree, carry on good sir */
8462         if (nr_entries <= 1 && !mismatch)
8463                 goto out;
8464
8465         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8466                 "%Lu\n", rec->start);
8467
8468         /*
8469          * First we want to see if the backrefs can agree amongst themselves who
8470          * is right, so figure out which one of the entries has the highest
8471          * count.
8472          */
8473         best = find_most_right_entry(&entries);
8474
8475         /*
8476          * Ok so we may have an even split between what the backrefs think, so
8477          * this is where we use the extent ref to see what it thinks.
8478          */
8479         if (!best) {
8480                 entry = find_entry(&entries, rec->start, rec->nr);
8481                 if (!entry && (!broken_entries || !rec->found_rec)) {
8482                         fprintf(stderr, "Backrefs don't agree with each other "
8483                                 "and extent record doesn't agree with anybody,"
8484                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8485                                 rec->start, rec->nr);
8486                         ret = -EINVAL;
8487                         goto out;
8488                 } else if (!entry) {
8489                         /*
8490                          * Ok our backrefs were broken, we'll assume this is the
8491                          * correct value and add an entry for this range.
8492                          */
8493                         entry = malloc(sizeof(struct extent_entry));
8494                         if (!entry) {
8495                                 ret = -ENOMEM;
8496                                 goto out;
8497                         }
8498                         memset(entry, 0, sizeof(*entry));
8499                         entry->bytenr = rec->start;
8500                         entry->bytes = rec->nr;
8501                         list_add_tail(&entry->list, &entries);
8502                         nr_entries++;
8503                 }
8504                 entry->count++;
8505                 best = find_most_right_entry(&entries);
8506                 if (!best) {
8507                         fprintf(stderr, "Backrefs and extent record evenly "
8508                                 "split on who is right, this is going to "
8509                                 "require user input to fix bytenr %Lu bytes "
8510                                 "%Lu\n", rec->start, rec->nr);
8511                         ret = -EINVAL;
8512                         goto out;
8513                 }
8514         }
8515
8516         /*
8517          * I don't think this can happen currently as we'll abort() if we catch
8518          * this case higher up, but in case somebody removes that we still can't
8519          * deal with it properly here yet, so just bail out of that's the case.
8520          */
8521         if (best->bytenr != rec->start) {
8522                 fprintf(stderr, "Extent start and backref starts don't match, "
8523                         "please use btrfs-image on this file system and send "
8524                         "it to a btrfs developer so they can make fsck fix "
8525                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8526                         rec->start, rec->nr);
8527                 ret = -EINVAL;
8528                 goto out;
8529         }
8530
8531         /*
8532          * Ok great we all agreed on an extent record, let's go find the real
8533          * references and fix up the ones that don't match.
8534          */
8535         list_for_each_entry(back, &rec->backrefs, list) {
8536                 if (back->full_backref || !back->is_data)
8537                         continue;
8538
8539                 dback = to_data_backref(back);
8540
8541                 /*
8542                  * Still ignoring backrefs that don't have a real ref attached
8543                  * to them.
8544                  */
8545                 if (dback->found_ref == 0)
8546                         continue;
8547
8548                 if (dback->bytes == best->bytes &&
8549                     dback->disk_bytenr == best->bytenr)
8550                         continue;
8551
8552                 ret = repair_ref(info, path, dback, best);
8553                 if (ret)
8554                         goto out;
8555         }
8556
8557         /*
8558          * Ok we messed with the actual refs, which means we need to drop our
8559          * entire cache and go back and rescan.  I know this is a huge pain and
8560          * adds a lot of extra work, but it's the only way to be safe.  Once all
8561          * the backrefs agree we may not need to do anything to the extent
8562          * record itself.
8563          */
8564         ret = -EAGAIN;
8565 out:
8566         while (!list_empty(&entries)) {
8567                 entry = list_entry(entries.next, struct extent_entry, list);
8568                 list_del_init(&entry->list);
8569                 free(entry);
8570         }
8571         return ret;
8572 }
8573
8574 static int process_duplicates(struct btrfs_root *root,
8575                               struct cache_tree *extent_cache,
8576                               struct extent_record *rec)
8577 {
8578         struct extent_record *good, *tmp;
8579         struct cache_extent *cache;
8580         int ret;
8581
8582         /*
8583          * If we found a extent record for this extent then return, or if we
8584          * have more than one duplicate we are likely going to need to delete
8585          * something.
8586          */
8587         if (rec->found_rec || rec->num_duplicates > 1)
8588                 return 0;
8589
8590         /* Shouldn't happen but just in case */
8591         BUG_ON(!rec->num_duplicates);
8592
8593         /*
8594          * So this happens if we end up with a backref that doesn't match the
8595          * actual extent entry.  So either the backref is bad or the extent
8596          * entry is bad.  Either way we want to have the extent_record actually
8597          * reflect what we found in the extent_tree, so we need to take the
8598          * duplicate out and use that as the extent_record since the only way we
8599          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8600          */
8601         remove_cache_extent(extent_cache, &rec->cache);
8602
8603         good = to_extent_record(rec->dups.next);
8604         list_del_init(&good->list);
8605         INIT_LIST_HEAD(&good->backrefs);
8606         INIT_LIST_HEAD(&good->dups);
8607         good->cache.start = good->start;
8608         good->cache.size = good->nr;
8609         good->content_checked = 0;
8610         good->owner_ref_checked = 0;
8611         good->num_duplicates = 0;
8612         good->refs = rec->refs;
8613         list_splice_init(&rec->backrefs, &good->backrefs);
8614         while (1) {
8615                 cache = lookup_cache_extent(extent_cache, good->start,
8616                                             good->nr);
8617                 if (!cache)
8618                         break;
8619                 tmp = container_of(cache, struct extent_record, cache);
8620
8621                 /*
8622                  * If we find another overlapping extent and it's found_rec is
8623                  * set then it's a duplicate and we need to try and delete
8624                  * something.
8625                  */
8626                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8627                         if (list_empty(&good->list))
8628                                 list_add_tail(&good->list,
8629                                               &duplicate_extents);
8630                         good->num_duplicates += tmp->num_duplicates + 1;
8631                         list_splice_init(&tmp->dups, &good->dups);
8632                         list_del_init(&tmp->list);
8633                         list_add_tail(&tmp->list, &good->dups);
8634                         remove_cache_extent(extent_cache, &tmp->cache);
8635                         continue;
8636                 }
8637
8638                 /*
8639                  * Ok we have another non extent item backed extent rec, so lets
8640                  * just add it to this extent and carry on like we did above.
8641                  */
8642                 good->refs += tmp->refs;
8643                 list_splice_init(&tmp->backrefs, &good->backrefs);
8644                 remove_cache_extent(extent_cache, &tmp->cache);
8645                 free(tmp);
8646         }
8647         ret = insert_cache_extent(extent_cache, &good->cache);
8648         BUG_ON(ret);
8649         free(rec);
8650         return good->num_duplicates ? 0 : 1;
8651 }
8652
8653 static int delete_duplicate_records(struct btrfs_root *root,
8654                                     struct extent_record *rec)
8655 {
8656         struct btrfs_trans_handle *trans;
8657         LIST_HEAD(delete_list);
8658         struct btrfs_path path;
8659         struct extent_record *tmp, *good, *n;
8660         int nr_del = 0;
8661         int ret = 0, err;
8662         struct btrfs_key key;
8663
8664         btrfs_init_path(&path);
8665
8666         good = rec;
8667         /* Find the record that covers all of the duplicates. */
8668         list_for_each_entry(tmp, &rec->dups, list) {
8669                 if (good->start < tmp->start)
8670                         continue;
8671                 if (good->nr > tmp->nr)
8672                         continue;
8673
8674                 if (tmp->start + tmp->nr < good->start + good->nr) {
8675                         fprintf(stderr, "Ok we have overlapping extents that "
8676                                 "aren't completely covered by each other, this "
8677                                 "is going to require more careful thought.  "
8678                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8679                                 tmp->start, tmp->nr, good->start, good->nr);
8680                         abort();
8681                 }
8682                 good = tmp;
8683         }
8684
8685         if (good != rec)
8686                 list_add_tail(&rec->list, &delete_list);
8687
8688         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8689                 if (tmp == good)
8690                         continue;
8691                 list_move_tail(&tmp->list, &delete_list);
8692         }
8693
8694         root = root->fs_info->extent_root;
8695         trans = btrfs_start_transaction(root, 1);
8696         if (IS_ERR(trans)) {
8697                 ret = PTR_ERR(trans);
8698                 goto out;
8699         }
8700
8701         list_for_each_entry(tmp, &delete_list, list) {
8702                 if (tmp->found_rec == 0)
8703                         continue;
8704                 key.objectid = tmp->start;
8705                 key.type = BTRFS_EXTENT_ITEM_KEY;
8706                 key.offset = tmp->nr;
8707
8708                 /* Shouldn't happen but just in case */
8709                 if (tmp->metadata) {
8710                         fprintf(stderr, "Well this shouldn't happen, extent "
8711                                 "record overlaps but is metadata? "
8712                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8713                         abort();
8714                 }
8715
8716                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8717                 if (ret) {
8718                         if (ret > 0)
8719                                 ret = -EINVAL;
8720                         break;
8721                 }
8722                 ret = btrfs_del_item(trans, root, &path);
8723                 if (ret)
8724                         break;
8725                 btrfs_release_path(&path);
8726                 nr_del++;
8727         }
8728         err = btrfs_commit_transaction(trans, root);
8729         if (err && !ret)
8730                 ret = err;
8731 out:
8732         while (!list_empty(&delete_list)) {
8733                 tmp = to_extent_record(delete_list.next);
8734                 list_del_init(&tmp->list);
8735                 if (tmp == rec)
8736                         continue;
8737                 free(tmp);
8738         }
8739
8740         while (!list_empty(&rec->dups)) {
8741                 tmp = to_extent_record(rec->dups.next);
8742                 list_del_init(&tmp->list);
8743                 free(tmp);
8744         }
8745
8746         btrfs_release_path(&path);
8747
8748         if (!ret && !nr_del)
8749                 rec->num_duplicates = 0;
8750
8751         return ret ? ret : nr_del;
8752 }
8753
8754 static int find_possible_backrefs(struct btrfs_fs_info *info,
8755                                   struct btrfs_path *path,
8756                                   struct cache_tree *extent_cache,
8757                                   struct extent_record *rec)
8758 {
8759         struct btrfs_root *root;
8760         struct extent_backref *back;
8761         struct data_backref *dback;
8762         struct cache_extent *cache;
8763         struct btrfs_file_extent_item *fi;
8764         struct btrfs_key key;
8765         u64 bytenr, bytes;
8766         int ret;
8767
8768         list_for_each_entry(back, &rec->backrefs, list) {
8769                 /* Don't care about full backrefs (poor unloved backrefs) */
8770                 if (back->full_backref || !back->is_data)
8771                         continue;
8772
8773                 dback = to_data_backref(back);
8774
8775                 /* We found this one, we don't need to do a lookup */
8776                 if (dback->found_ref)
8777                         continue;
8778
8779                 key.objectid = dback->root;
8780                 key.type = BTRFS_ROOT_ITEM_KEY;
8781                 key.offset = (u64)-1;
8782
8783                 root = btrfs_read_fs_root(info, &key);
8784
8785                 /* No root, definitely a bad ref, skip */
8786                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8787                         continue;
8788                 /* Other err, exit */
8789                 if (IS_ERR(root))
8790                         return PTR_ERR(root);
8791
8792                 key.objectid = dback->owner;
8793                 key.type = BTRFS_EXTENT_DATA_KEY;
8794                 key.offset = dback->offset;
8795                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8796                 if (ret) {
8797                         btrfs_release_path(path);
8798                         if (ret < 0)
8799                                 return ret;
8800                         /* Didn't find it, we can carry on */
8801                         ret = 0;
8802                         continue;
8803                 }
8804
8805                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8806                                     struct btrfs_file_extent_item);
8807                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8808                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8809                 btrfs_release_path(path);
8810                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8811                 if (cache) {
8812                         struct extent_record *tmp;
8813                         tmp = container_of(cache, struct extent_record, cache);
8814
8815                         /*
8816                          * If we found an extent record for the bytenr for this
8817                          * particular backref then we can't add it to our
8818                          * current extent record.  We only want to add backrefs
8819                          * that don't have a corresponding extent item in the
8820                          * extent tree since they likely belong to this record
8821                          * and we need to fix it if it doesn't match bytenrs.
8822                          */
8823                         if  (tmp->found_rec)
8824                                 continue;
8825                 }
8826
8827                 dback->found_ref += 1;
8828                 dback->disk_bytenr = bytenr;
8829                 dback->bytes = bytes;
8830
8831                 /*
8832                  * Set this so the verify backref code knows not to trust the
8833                  * values in this backref.
8834                  */
8835                 back->broken = 1;
8836         }
8837
8838         return 0;
8839 }
8840
8841 /*
8842  * Record orphan data ref into corresponding root.
8843  *
8844  * Return 0 if the extent item contains data ref and recorded.
8845  * Return 1 if the extent item contains no useful data ref
8846  *   On that case, it may contains only shared_dataref or metadata backref
8847  *   or the file extent exists(this should be handled by the extent bytenr
8848  *   recovery routine)
8849  * Return <0 if something goes wrong.
8850  */
8851 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8852                                       struct extent_record *rec)
8853 {
8854         struct btrfs_key key;
8855         struct btrfs_root *dest_root;
8856         struct extent_backref *back;
8857         struct data_backref *dback;
8858         struct orphan_data_extent *orphan;
8859         struct btrfs_path path;
8860         int recorded_data_ref = 0;
8861         int ret = 0;
8862
8863         if (rec->metadata)
8864                 return 1;
8865         btrfs_init_path(&path);
8866         list_for_each_entry(back, &rec->backrefs, list) {
8867                 if (back->full_backref || !back->is_data ||
8868                     !back->found_extent_tree)
8869                         continue;
8870                 dback = to_data_backref(back);
8871                 if (dback->found_ref)
8872                         continue;
8873                 key.objectid = dback->root;
8874                 key.type = BTRFS_ROOT_ITEM_KEY;
8875                 key.offset = (u64)-1;
8876
8877                 dest_root = btrfs_read_fs_root(fs_info, &key);
8878
8879                 /* For non-exist root we just skip it */
8880                 if (IS_ERR(dest_root) || !dest_root)
8881                         continue;
8882
8883                 key.objectid = dback->owner;
8884                 key.type = BTRFS_EXTENT_DATA_KEY;
8885                 key.offset = dback->offset;
8886
8887                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8888                 btrfs_release_path(&path);
8889                 /*
8890                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8891                  * we need to record it for inode/file extent rebuild.
8892                  * For ret > 0, we record it only for file extent rebuild.
8893                  * For ret == 0, the file extent exists but only bytenr
8894                  * mismatch, let the original bytenr fix routine to handle,
8895                  * don't record it.
8896                  */
8897                 if (ret == 0)
8898                         continue;
8899                 ret = 0;
8900                 orphan = malloc(sizeof(*orphan));
8901                 if (!orphan) {
8902                         ret = -ENOMEM;
8903                         goto out;
8904                 }
8905                 INIT_LIST_HEAD(&orphan->list);
8906                 orphan->root = dback->root;
8907                 orphan->objectid = dback->owner;
8908                 orphan->offset = dback->offset;
8909                 orphan->disk_bytenr = rec->cache.start;
8910                 orphan->disk_len = rec->cache.size;
8911                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8912                 recorded_data_ref = 1;
8913         }
8914 out:
8915         btrfs_release_path(&path);
8916         if (!ret)
8917                 return !recorded_data_ref;
8918         else
8919                 return ret;
8920 }
8921
8922 /*
8923  * when an incorrect extent item is found, this will delete
8924  * all of the existing entries for it and recreate them
8925  * based on what the tree scan found.
8926  */
8927 static int fixup_extent_refs(struct btrfs_fs_info *info,
8928                              struct cache_tree *extent_cache,
8929                              struct extent_record *rec)
8930 {
8931         struct btrfs_trans_handle *trans = NULL;
8932         int ret;
8933         struct btrfs_path path;
8934         struct list_head *cur = rec->backrefs.next;
8935         struct cache_extent *cache;
8936         struct extent_backref *back;
8937         int allocated = 0;
8938         u64 flags = 0;
8939
8940         if (rec->flag_block_full_backref)
8941                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8942
8943         btrfs_init_path(&path);
8944         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8945                 /*
8946                  * Sometimes the backrefs themselves are so broken they don't
8947                  * get attached to any meaningful rec, so first go back and
8948                  * check any of our backrefs that we couldn't find and throw
8949                  * them into the list if we find the backref so that
8950                  * verify_backrefs can figure out what to do.
8951                  */
8952                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8953                 if (ret < 0)
8954                         goto out;
8955         }
8956
8957         /* step one, make sure all of the backrefs agree */
8958         ret = verify_backrefs(info, &path, rec);
8959         if (ret < 0)
8960                 goto out;
8961
8962         trans = btrfs_start_transaction(info->extent_root, 1);
8963         if (IS_ERR(trans)) {
8964                 ret = PTR_ERR(trans);
8965                 goto out;
8966         }
8967
8968         /* step two, delete all the existing records */
8969         ret = delete_extent_records(trans, info->extent_root, &path,
8970                                     rec->start, rec->max_size);
8971
8972         if (ret < 0)
8973                 goto out;
8974
8975         /* was this block corrupt?  If so, don't add references to it */
8976         cache = lookup_cache_extent(info->corrupt_blocks,
8977                                     rec->start, rec->max_size);
8978         if (cache) {
8979                 ret = 0;
8980                 goto out;
8981         }
8982
8983         /* step three, recreate all the refs we did find */
8984         while(cur != &rec->backrefs) {
8985                 back = to_extent_backref(cur);
8986                 cur = cur->next;
8987
8988                 /*
8989                  * if we didn't find any references, don't create a
8990                  * new extent record
8991                  */
8992                 if (!back->found_ref)
8993                         continue;
8994
8995                 rec->bad_full_backref = 0;
8996                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8997                 allocated = 1;
8998
8999                 if (ret)
9000                         goto out;
9001         }
9002 out:
9003         if (trans) {
9004                 int err = btrfs_commit_transaction(trans, info->extent_root);
9005                 if (!ret)
9006                         ret = err;
9007         }
9008
9009         if (!ret)
9010                 fprintf(stderr, "Repaired extent references for %llu\n",
9011                                 (unsigned long long)rec->start);
9012
9013         btrfs_release_path(&path);
9014         return ret;
9015 }
9016
9017 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9018                               struct extent_record *rec)
9019 {
9020         struct btrfs_trans_handle *trans;
9021         struct btrfs_root *root = fs_info->extent_root;
9022         struct btrfs_path path;
9023         struct btrfs_extent_item *ei;
9024         struct btrfs_key key;
9025         u64 flags;
9026         int ret = 0;
9027
9028         key.objectid = rec->start;
9029         if (rec->metadata) {
9030                 key.type = BTRFS_METADATA_ITEM_KEY;
9031                 key.offset = rec->info_level;
9032         } else {
9033                 key.type = BTRFS_EXTENT_ITEM_KEY;
9034                 key.offset = rec->max_size;
9035         }
9036
9037         trans = btrfs_start_transaction(root, 0);
9038         if (IS_ERR(trans))
9039                 return PTR_ERR(trans);
9040
9041         btrfs_init_path(&path);
9042         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9043         if (ret < 0) {
9044                 btrfs_release_path(&path);
9045                 btrfs_commit_transaction(trans, root);
9046                 return ret;
9047         } else if (ret) {
9048                 fprintf(stderr, "Didn't find extent for %llu\n",
9049                         (unsigned long long)rec->start);
9050                 btrfs_release_path(&path);
9051                 btrfs_commit_transaction(trans, root);
9052                 return -ENOENT;
9053         }
9054
9055         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9056                             struct btrfs_extent_item);
9057         flags = btrfs_extent_flags(path.nodes[0], ei);
9058         if (rec->flag_block_full_backref) {
9059                 fprintf(stderr, "setting full backref on %llu\n",
9060                         (unsigned long long)key.objectid);
9061                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9062         } else {
9063                 fprintf(stderr, "clearing full backref on %llu\n",
9064                         (unsigned long long)key.objectid);
9065                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9066         }
9067         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9068         btrfs_mark_buffer_dirty(path.nodes[0]);
9069         btrfs_release_path(&path);
9070         ret = btrfs_commit_transaction(trans, root);
9071         if (!ret)
9072                 fprintf(stderr, "Repaired extent flags for %llu\n",
9073                                 (unsigned long long)rec->start);
9074
9075         return ret;
9076 }
9077
9078 /* right now we only prune from the extent allocation tree */
9079 static int prune_one_block(struct btrfs_trans_handle *trans,
9080                            struct btrfs_fs_info *info,
9081                            struct btrfs_corrupt_block *corrupt)
9082 {
9083         int ret;
9084         struct btrfs_path path;
9085         struct extent_buffer *eb;
9086         u64 found;
9087         int slot;
9088         int nritems;
9089         int level = corrupt->level + 1;
9090
9091         btrfs_init_path(&path);
9092 again:
9093         /* we want to stop at the parent to our busted block */
9094         path.lowest_level = level;
9095
9096         ret = btrfs_search_slot(trans, info->extent_root,
9097                                 &corrupt->key, &path, -1, 1);
9098
9099         if (ret < 0)
9100                 goto out;
9101
9102         eb = path.nodes[level];
9103         if (!eb) {
9104                 ret = -ENOENT;
9105                 goto out;
9106         }
9107
9108         /*
9109          * hopefully the search gave us the block we want to prune,
9110          * lets try that first
9111          */
9112         slot = path.slots[level];
9113         found =  btrfs_node_blockptr(eb, slot);
9114         if (found == corrupt->cache.start)
9115                 goto del_ptr;
9116
9117         nritems = btrfs_header_nritems(eb);
9118
9119         /* the search failed, lets scan this node and hope we find it */
9120         for (slot = 0; slot < nritems; slot++) {
9121                 found =  btrfs_node_blockptr(eb, slot);
9122                 if (found == corrupt->cache.start)
9123                         goto del_ptr;
9124         }
9125         /*
9126          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9127          * to this block
9128          */
9129         if (eb == info->extent_root->node) {
9130                 ret = -ENOENT;
9131                 goto out;
9132         } else {
9133                 level++;
9134                 btrfs_release_path(&path);
9135                 goto again;
9136         }
9137
9138 del_ptr:
9139         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9140         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
9141
9142 out:
9143         btrfs_release_path(&path);
9144         return ret;
9145 }
9146
9147 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9148 {
9149         struct btrfs_trans_handle *trans = NULL;
9150         struct cache_extent *cache;
9151         struct btrfs_corrupt_block *corrupt;
9152
9153         while (1) {
9154                 cache = search_cache_extent(info->corrupt_blocks, 0);
9155                 if (!cache)
9156                         break;
9157                 if (!trans) {
9158                         trans = btrfs_start_transaction(info->extent_root, 1);
9159                         if (IS_ERR(trans))
9160                                 return PTR_ERR(trans);
9161                 }
9162                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9163                 prune_one_block(trans, info, corrupt);
9164                 remove_cache_extent(info->corrupt_blocks, cache);
9165         }
9166         if (trans)
9167                 return btrfs_commit_transaction(trans, info->extent_root);
9168         return 0;
9169 }
9170
9171 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9172 {
9173         struct btrfs_block_group_cache *cache;
9174         u64 start, end;
9175         int ret;
9176
9177         while (1) {
9178                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9179                                             &start, &end, EXTENT_DIRTY);
9180                 if (ret)
9181                         break;
9182                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9183                                    GFP_NOFS);
9184         }
9185
9186         start = 0;
9187         while (1) {
9188                 cache = btrfs_lookup_first_block_group(fs_info, start);
9189                 if (!cache)
9190                         break;
9191                 if (cache->cached)
9192                         cache->cached = 0;
9193                 start = cache->key.objectid + cache->key.offset;
9194         }
9195 }
9196
9197 static int check_extent_refs(struct btrfs_root *root,
9198                              struct cache_tree *extent_cache)
9199 {
9200         struct extent_record *rec;
9201         struct cache_extent *cache;
9202         int ret = 0;
9203         int had_dups = 0;
9204
9205         if (repair) {
9206                 /*
9207                  * if we're doing a repair, we have to make sure
9208                  * we don't allocate from the problem extents.
9209                  * In the worst case, this will be all the
9210                  * extents in the FS
9211                  */
9212                 cache = search_cache_extent(extent_cache, 0);
9213                 while(cache) {
9214                         rec = container_of(cache, struct extent_record, cache);
9215                         set_extent_dirty(root->fs_info->excluded_extents,
9216                                          rec->start,
9217                                          rec->start + rec->max_size - 1,
9218                                          GFP_NOFS);
9219                         cache = next_cache_extent(cache);
9220                 }
9221
9222                 /* pin down all the corrupted blocks too */
9223                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9224                 while(cache) {
9225                         set_extent_dirty(root->fs_info->excluded_extents,
9226                                          cache->start,
9227                                          cache->start + cache->size - 1,
9228                                          GFP_NOFS);
9229                         cache = next_cache_extent(cache);
9230                 }
9231                 prune_corrupt_blocks(root->fs_info);
9232                 reset_cached_block_groups(root->fs_info);
9233         }
9234
9235         reset_cached_block_groups(root->fs_info);
9236
9237         /*
9238          * We need to delete any duplicate entries we find first otherwise we
9239          * could mess up the extent tree when we have backrefs that actually
9240          * belong to a different extent item and not the weird duplicate one.
9241          */
9242         while (repair && !list_empty(&duplicate_extents)) {
9243                 rec = to_extent_record(duplicate_extents.next);
9244                 list_del_init(&rec->list);
9245
9246                 /* Sometimes we can find a backref before we find an actual
9247                  * extent, so we need to process it a little bit to see if there
9248                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9249                  * if this is a backref screwup.  If we need to delete stuff
9250                  * process_duplicates() will return 0, otherwise it will return
9251                  * 1 and we
9252                  */
9253                 if (process_duplicates(root, extent_cache, rec))
9254                         continue;
9255                 ret = delete_duplicate_records(root, rec);
9256                 if (ret < 0)
9257                         return ret;
9258                 /*
9259                  * delete_duplicate_records will return the number of entries
9260                  * deleted, so if it's greater than 0 then we know we actually
9261                  * did something and we need to remove.
9262                  */
9263                 if (ret)
9264                         had_dups = 1;
9265         }
9266
9267         if (had_dups)
9268                 return -EAGAIN;
9269
9270         while(1) {
9271                 int cur_err = 0;
9272                 int fix = 0;
9273
9274                 cache = search_cache_extent(extent_cache, 0);
9275                 if (!cache)
9276                         break;
9277                 rec = container_of(cache, struct extent_record, cache);
9278                 if (rec->num_duplicates) {
9279                         fprintf(stderr, "extent item %llu has multiple extent "
9280                                 "items\n", (unsigned long long)rec->start);
9281                         cur_err = 1;
9282                 }
9283
9284                 if (rec->refs != rec->extent_item_refs) {
9285                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9286                                 (unsigned long long)rec->start,
9287                                 (unsigned long long)rec->nr);
9288                         fprintf(stderr, "extent item %llu, found %llu\n",
9289                                 (unsigned long long)rec->extent_item_refs,
9290                                 (unsigned long long)rec->refs);
9291                         ret = record_orphan_data_extents(root->fs_info, rec);
9292                         if (ret < 0)
9293                                 goto repair_abort;
9294                         fix = ret;
9295                         cur_err = 1;
9296                 }
9297                 if (all_backpointers_checked(rec, 1)) {
9298                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9299                                 (unsigned long long)rec->start,
9300                                 (unsigned long long)rec->nr);
9301                         fix = 1;
9302                         cur_err = 1;
9303                 }
9304                 if (!rec->owner_ref_checked) {
9305                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9306                                 (unsigned long long)rec->start,
9307                                 (unsigned long long)rec->nr);
9308                         fix = 1;
9309                         cur_err = 1;
9310                 }
9311
9312                 if (repair && fix) {
9313                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9314                         if (ret)
9315                                 goto repair_abort;
9316                 }
9317
9318
9319                 if (rec->bad_full_backref) {
9320                         fprintf(stderr, "bad full backref, on [%llu]\n",
9321                                 (unsigned long long)rec->start);
9322                         if (repair) {
9323                                 ret = fixup_extent_flags(root->fs_info, rec);
9324                                 if (ret)
9325                                         goto repair_abort;
9326                                 fix = 1;
9327                         }
9328                         cur_err = 1;
9329                 }
9330                 /*
9331                  * Although it's not a extent ref's problem, we reuse this
9332                  * routine for error reporting.
9333                  * No repair function yet.
9334                  */
9335                 if (rec->crossing_stripes) {
9336                         fprintf(stderr,
9337                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9338                                 rec->start, rec->start + rec->max_size);
9339                         cur_err = 1;
9340                 }
9341
9342                 if (rec->wrong_chunk_type) {
9343                         fprintf(stderr,
9344                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9345                                 rec->start, rec->start + rec->max_size);
9346                         cur_err = 1;
9347                 }
9348
9349                 remove_cache_extent(extent_cache, cache);
9350                 free_all_extent_backrefs(rec);
9351                 if (!init_extent_tree && repair && (!cur_err || fix))
9352                         clear_extent_dirty(root->fs_info->excluded_extents,
9353                                            rec->start,
9354                                            rec->start + rec->max_size - 1,
9355                                            GFP_NOFS);
9356                 free(rec);
9357         }
9358 repair_abort:
9359         if (repair) {
9360                 if (ret && ret != -EAGAIN) {
9361                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9362                         exit(1);
9363                 } else if (!ret) {
9364                         struct btrfs_trans_handle *trans;
9365
9366                         root = root->fs_info->extent_root;
9367                         trans = btrfs_start_transaction(root, 1);
9368                         if (IS_ERR(trans)) {
9369                                 ret = PTR_ERR(trans);
9370                                 goto repair_abort;
9371                         }
9372
9373                         btrfs_fix_block_accounting(trans, root);
9374                         ret = btrfs_commit_transaction(trans, root);
9375                         if (ret)
9376                                 goto repair_abort;
9377                 }
9378                 return ret;
9379         }
9380         return 0;
9381 }
9382
9383 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9384 {
9385         u64 stripe_size;
9386
9387         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9388                 stripe_size = length;
9389                 stripe_size /= num_stripes;
9390         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9391                 stripe_size = length * 2;
9392                 stripe_size /= num_stripes;
9393         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9394                 stripe_size = length;
9395                 stripe_size /= (num_stripes - 1);
9396         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9397                 stripe_size = length;
9398                 stripe_size /= (num_stripes - 2);
9399         } else {
9400                 stripe_size = length;
9401         }
9402         return stripe_size;
9403 }
9404
9405 /*
9406  * Check the chunk with its block group/dev list ref:
9407  * Return 0 if all refs seems valid.
9408  * Return 1 if part of refs seems valid, need later check for rebuild ref
9409  * like missing block group and needs to search extent tree to rebuild them.
9410  * Return -1 if essential refs are missing and unable to rebuild.
9411  */
9412 static int check_chunk_refs(struct chunk_record *chunk_rec,
9413                             struct block_group_tree *block_group_cache,
9414                             struct device_extent_tree *dev_extent_cache,
9415                             int silent)
9416 {
9417         struct cache_extent *block_group_item;
9418         struct block_group_record *block_group_rec;
9419         struct cache_extent *dev_extent_item;
9420         struct device_extent_record *dev_extent_rec;
9421         u64 devid;
9422         u64 offset;
9423         u64 length;
9424         int metadump_v2 = 0;
9425         int i;
9426         int ret = 0;
9427
9428         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9429                                                chunk_rec->offset,
9430                                                chunk_rec->length);
9431         if (block_group_item) {
9432                 block_group_rec = container_of(block_group_item,
9433                                                struct block_group_record,
9434                                                cache);
9435                 if (chunk_rec->length != block_group_rec->offset ||
9436                     chunk_rec->offset != block_group_rec->objectid ||
9437                     (!metadump_v2 &&
9438                      chunk_rec->type_flags != block_group_rec->flags)) {
9439                         if (!silent)
9440                                 fprintf(stderr,
9441                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9442                                         chunk_rec->objectid,
9443                                         chunk_rec->type,
9444                                         chunk_rec->offset,
9445                                         chunk_rec->length,
9446                                         chunk_rec->offset,
9447                                         chunk_rec->type_flags,
9448                                         block_group_rec->objectid,
9449                                         block_group_rec->type,
9450                                         block_group_rec->offset,
9451                                         block_group_rec->offset,
9452                                         block_group_rec->objectid,
9453                                         block_group_rec->flags);
9454                         ret = -1;
9455                 } else {
9456                         list_del_init(&block_group_rec->list);
9457                         chunk_rec->bg_rec = block_group_rec;
9458                 }
9459         } else {
9460                 if (!silent)
9461                         fprintf(stderr,
9462                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9463                                 chunk_rec->objectid,
9464                                 chunk_rec->type,
9465                                 chunk_rec->offset,
9466                                 chunk_rec->length,
9467                                 chunk_rec->offset,
9468                                 chunk_rec->type_flags);
9469                 ret = 1;
9470         }
9471
9472         if (metadump_v2)
9473                 return ret;
9474
9475         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9476                                     chunk_rec->num_stripes);
9477         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9478                 devid = chunk_rec->stripes[i].devid;
9479                 offset = chunk_rec->stripes[i].offset;
9480                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9481                                                        devid, offset, length);
9482                 if (dev_extent_item) {
9483                         dev_extent_rec = container_of(dev_extent_item,
9484                                                 struct device_extent_record,
9485                                                 cache);
9486                         if (dev_extent_rec->objectid != devid ||
9487                             dev_extent_rec->offset != offset ||
9488                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9489                             dev_extent_rec->length != length) {
9490                                 if (!silent)
9491                                         fprintf(stderr,
9492                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9493                                                 chunk_rec->objectid,
9494                                                 chunk_rec->type,
9495                                                 chunk_rec->offset,
9496                                                 chunk_rec->stripes[i].devid,
9497                                                 chunk_rec->stripes[i].offset,
9498                                                 dev_extent_rec->objectid,
9499                                                 dev_extent_rec->offset,
9500                                                 dev_extent_rec->length);
9501                                 ret = -1;
9502                         } else {
9503                                 list_move(&dev_extent_rec->chunk_list,
9504                                           &chunk_rec->dextents);
9505                         }
9506                 } else {
9507                         if (!silent)
9508                                 fprintf(stderr,
9509                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9510                                         chunk_rec->objectid,
9511                                         chunk_rec->type,
9512                                         chunk_rec->offset,
9513                                         chunk_rec->stripes[i].devid,
9514                                         chunk_rec->stripes[i].offset);
9515                         ret = -1;
9516                 }
9517         }
9518         return ret;
9519 }
9520
9521 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9522 int check_chunks(struct cache_tree *chunk_cache,
9523                  struct block_group_tree *block_group_cache,
9524                  struct device_extent_tree *dev_extent_cache,
9525                  struct list_head *good, struct list_head *bad,
9526                  struct list_head *rebuild, int silent)
9527 {
9528         struct cache_extent *chunk_item;
9529         struct chunk_record *chunk_rec;
9530         struct block_group_record *bg_rec;
9531         struct device_extent_record *dext_rec;
9532         int err;
9533         int ret = 0;
9534
9535         chunk_item = first_cache_extent(chunk_cache);
9536         while (chunk_item) {
9537                 chunk_rec = container_of(chunk_item, struct chunk_record,
9538                                          cache);
9539                 err = check_chunk_refs(chunk_rec, block_group_cache,
9540                                        dev_extent_cache, silent);
9541                 if (err < 0)
9542                         ret = err;
9543                 if (err == 0 && good)
9544                         list_add_tail(&chunk_rec->list, good);
9545                 if (err > 0 && rebuild)
9546                         list_add_tail(&chunk_rec->list, rebuild);
9547                 if (err < 0 && bad)
9548                         list_add_tail(&chunk_rec->list, bad);
9549                 chunk_item = next_cache_extent(chunk_item);
9550         }
9551
9552         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9553                 if (!silent)
9554                         fprintf(stderr,
9555                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9556                                 bg_rec->objectid,
9557                                 bg_rec->offset,
9558                                 bg_rec->flags);
9559                 if (!ret)
9560                         ret = 1;
9561         }
9562
9563         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9564                             chunk_list) {
9565                 if (!silent)
9566                         fprintf(stderr,
9567                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9568                                 dext_rec->objectid,
9569                                 dext_rec->offset,
9570                                 dext_rec->length);
9571                 if (!ret)
9572                         ret = 1;
9573         }
9574         return ret;
9575 }
9576
9577
9578 static int check_device_used(struct device_record *dev_rec,
9579                              struct device_extent_tree *dext_cache)
9580 {
9581         struct cache_extent *cache;
9582         struct device_extent_record *dev_extent_rec;
9583         u64 total_byte = 0;
9584
9585         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9586         while (cache) {
9587                 dev_extent_rec = container_of(cache,
9588                                               struct device_extent_record,
9589                                               cache);
9590                 if (dev_extent_rec->objectid != dev_rec->devid)
9591                         break;
9592
9593                 list_del_init(&dev_extent_rec->device_list);
9594                 total_byte += dev_extent_rec->length;
9595                 cache = next_cache_extent(cache);
9596         }
9597
9598         if (total_byte != dev_rec->byte_used) {
9599                 fprintf(stderr,
9600                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9601                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9602                         dev_rec->type, dev_rec->offset);
9603                 return -1;
9604         } else {
9605                 return 0;
9606         }
9607 }
9608
9609 /* check btrfs_dev_item -> btrfs_dev_extent */
9610 static int check_devices(struct rb_root *dev_cache,
9611                          struct device_extent_tree *dev_extent_cache)
9612 {
9613         struct rb_node *dev_node;
9614         struct device_record *dev_rec;
9615         struct device_extent_record *dext_rec;
9616         int err;
9617         int ret = 0;
9618
9619         dev_node = rb_first(dev_cache);
9620         while (dev_node) {
9621                 dev_rec = container_of(dev_node, struct device_record, node);
9622                 err = check_device_used(dev_rec, dev_extent_cache);
9623                 if (err)
9624                         ret = err;
9625
9626                 dev_node = rb_next(dev_node);
9627         }
9628         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9629                             device_list) {
9630                 fprintf(stderr,
9631                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9632                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9633                 if (!ret)
9634                         ret = 1;
9635         }
9636         return ret;
9637 }
9638
9639 static int add_root_item_to_list(struct list_head *head,
9640                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9641                                   u8 level, u8 drop_level,
9642                                   int level_size, struct btrfs_key *drop_key)
9643 {
9644
9645         struct root_item_record *ri_rec;
9646         ri_rec = malloc(sizeof(*ri_rec));
9647         if (!ri_rec)
9648                 return -ENOMEM;
9649         ri_rec->bytenr = bytenr;
9650         ri_rec->objectid = objectid;
9651         ri_rec->level = level;
9652         ri_rec->level_size = level_size;
9653         ri_rec->drop_level = drop_level;
9654         ri_rec->last_snapshot = last_snapshot;
9655         if (drop_key)
9656                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9657         list_add_tail(&ri_rec->list, head);
9658
9659         return 0;
9660 }
9661
9662 static void free_root_item_list(struct list_head *list)
9663 {
9664         struct root_item_record *ri_rec;
9665
9666         while (!list_empty(list)) {
9667                 ri_rec = list_first_entry(list, struct root_item_record,
9668                                           list);
9669                 list_del_init(&ri_rec->list);
9670                 free(ri_rec);
9671         }
9672 }
9673
9674 static int deal_root_from_list(struct list_head *list,
9675                                struct btrfs_root *root,
9676                                struct block_info *bits,
9677                                int bits_nr,
9678                                struct cache_tree *pending,
9679                                struct cache_tree *seen,
9680                                struct cache_tree *reada,
9681                                struct cache_tree *nodes,
9682                                struct cache_tree *extent_cache,
9683                                struct cache_tree *chunk_cache,
9684                                struct rb_root *dev_cache,
9685                                struct block_group_tree *block_group_cache,
9686                                struct device_extent_tree *dev_extent_cache)
9687 {
9688         int ret = 0;
9689         u64 last;
9690
9691         while (!list_empty(list)) {
9692                 struct root_item_record *rec;
9693                 struct extent_buffer *buf;
9694                 rec = list_entry(list->next,
9695                                  struct root_item_record, list);
9696                 last = 0;
9697                 buf = read_tree_block(root->fs_info->tree_root,
9698                                       rec->bytenr, rec->level_size, 0);
9699                 if (!extent_buffer_uptodate(buf)) {
9700                         free_extent_buffer(buf);
9701                         ret = -EIO;
9702                         break;
9703                 }
9704                 ret = add_root_to_pending(buf, extent_cache, pending,
9705                                     seen, nodes, rec->objectid);
9706                 if (ret < 0)
9707                         break;
9708                 /*
9709                  * To rebuild extent tree, we need deal with snapshot
9710                  * one by one, otherwise we deal with node firstly which
9711                  * can maximize readahead.
9712                  */
9713                 while (1) {
9714                         ret = run_next_block(root, bits, bits_nr, &last,
9715                                              pending, seen, reada, nodes,
9716                                              extent_cache, chunk_cache,
9717                                              dev_cache, block_group_cache,
9718                                              dev_extent_cache, rec);
9719                         if (ret != 0)
9720                                 break;
9721                 }
9722                 free_extent_buffer(buf);
9723                 list_del(&rec->list);
9724                 free(rec);
9725                 if (ret < 0)
9726                         break;
9727         }
9728         while (ret >= 0) {
9729                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9730                                      reada, nodes, extent_cache, chunk_cache,
9731                                      dev_cache, block_group_cache,
9732                                      dev_extent_cache, NULL);
9733                 if (ret != 0) {
9734                         if (ret > 0)
9735                                 ret = 0;
9736                         break;
9737                 }
9738         }
9739         return ret;
9740 }
9741
9742 static int check_chunks_and_extents(struct btrfs_root *root)
9743 {
9744         struct rb_root dev_cache;
9745         struct cache_tree chunk_cache;
9746         struct block_group_tree block_group_cache;
9747         struct device_extent_tree dev_extent_cache;
9748         struct cache_tree extent_cache;
9749         struct cache_tree seen;
9750         struct cache_tree pending;
9751         struct cache_tree reada;
9752         struct cache_tree nodes;
9753         struct extent_io_tree excluded_extents;
9754         struct cache_tree corrupt_blocks;
9755         struct btrfs_path path;
9756         struct btrfs_key key;
9757         struct btrfs_key found_key;
9758         int ret, err = 0;
9759         struct block_info *bits;
9760         int bits_nr;
9761         struct extent_buffer *leaf;
9762         int slot;
9763         struct btrfs_root_item ri;
9764         struct list_head dropping_trees;
9765         struct list_head normal_trees;
9766         struct btrfs_root *root1;
9767         u64 objectid;
9768         u32 level_size;
9769         u8 level;
9770
9771         dev_cache = RB_ROOT;
9772         cache_tree_init(&chunk_cache);
9773         block_group_tree_init(&block_group_cache);
9774         device_extent_tree_init(&dev_extent_cache);
9775
9776         cache_tree_init(&extent_cache);
9777         cache_tree_init(&seen);
9778         cache_tree_init(&pending);
9779         cache_tree_init(&nodes);
9780         cache_tree_init(&reada);
9781         cache_tree_init(&corrupt_blocks);
9782         extent_io_tree_init(&excluded_extents);
9783         INIT_LIST_HEAD(&dropping_trees);
9784         INIT_LIST_HEAD(&normal_trees);
9785
9786         if (repair) {
9787                 root->fs_info->excluded_extents = &excluded_extents;
9788                 root->fs_info->fsck_extent_cache = &extent_cache;
9789                 root->fs_info->free_extent_hook = free_extent_hook;
9790                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9791         }
9792
9793         bits_nr = 1024;
9794         bits = malloc(bits_nr * sizeof(struct block_info));
9795         if (!bits) {
9796                 perror("malloc");
9797                 exit(1);
9798         }
9799
9800         if (ctx.progress_enabled) {
9801                 ctx.tp = TASK_EXTENTS;
9802                 task_start(ctx.info);
9803         }
9804
9805 again:
9806         root1 = root->fs_info->tree_root;
9807         level = btrfs_header_level(root1->node);
9808         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9809                                     root1->node->start, 0, level, 0,
9810                                     root1->nodesize, NULL);
9811         if (ret < 0)
9812                 goto out;
9813         root1 = root->fs_info->chunk_root;
9814         level = btrfs_header_level(root1->node);
9815         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9816                                     root1->node->start, 0, level, 0,
9817                                     root1->nodesize, NULL);
9818         if (ret < 0)
9819                 goto out;
9820         btrfs_init_path(&path);
9821         key.offset = 0;
9822         key.objectid = 0;
9823         key.type = BTRFS_ROOT_ITEM_KEY;
9824         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9825                                         &key, &path, 0, 0);
9826         if (ret < 0)
9827                 goto out;
9828         while(1) {
9829                 leaf = path.nodes[0];
9830                 slot = path.slots[0];
9831                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9832                         ret = btrfs_next_leaf(root, &path);
9833                         if (ret != 0)
9834                                 break;
9835                         leaf = path.nodes[0];
9836                         slot = path.slots[0];
9837                 }
9838                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9839                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9840                         unsigned long offset;
9841                         u64 last_snapshot;
9842
9843                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9844                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9845                         last_snapshot = btrfs_root_last_snapshot(&ri);
9846                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9847                                 level = btrfs_root_level(&ri);
9848                                 level_size = root->nodesize;
9849                                 ret = add_root_item_to_list(&normal_trees,
9850                                                 found_key.objectid,
9851                                                 btrfs_root_bytenr(&ri),
9852                                                 last_snapshot, level,
9853                                                 0, level_size, NULL);
9854                                 if (ret < 0)
9855                                         goto out;
9856                         } else {
9857                                 level = btrfs_root_level(&ri);
9858                                 level_size = root->nodesize;
9859                                 objectid = found_key.objectid;
9860                                 btrfs_disk_key_to_cpu(&found_key,
9861                                                       &ri.drop_progress);
9862                                 ret = add_root_item_to_list(&dropping_trees,
9863                                                 objectid,
9864                                                 btrfs_root_bytenr(&ri),
9865                                                 last_snapshot, level,
9866                                                 ri.drop_level,
9867                                                 level_size, &found_key);
9868                                 if (ret < 0)
9869                                         goto out;
9870                         }
9871                 }
9872                 path.slots[0]++;
9873         }
9874         btrfs_release_path(&path);
9875
9876         /*
9877          * check_block can return -EAGAIN if it fixes something, please keep
9878          * this in mind when dealing with return values from these functions, if
9879          * we get -EAGAIN we want to fall through and restart the loop.
9880          */
9881         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9882                                   &seen, &reada, &nodes, &extent_cache,
9883                                   &chunk_cache, &dev_cache, &block_group_cache,
9884                                   &dev_extent_cache);
9885         if (ret < 0) {
9886                 if (ret == -EAGAIN)
9887                         goto loop;
9888                 goto out;
9889         }
9890         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9891                                   &pending, &seen, &reada, &nodes,
9892                                   &extent_cache, &chunk_cache, &dev_cache,
9893                                   &block_group_cache, &dev_extent_cache);
9894         if (ret < 0) {
9895                 if (ret == -EAGAIN)
9896                         goto loop;
9897                 goto out;
9898         }
9899
9900         ret = check_chunks(&chunk_cache, &block_group_cache,
9901                            &dev_extent_cache, NULL, NULL, NULL, 0);
9902         if (ret) {
9903                 if (ret == -EAGAIN)
9904                         goto loop;
9905                 err = ret;
9906         }
9907
9908         ret = check_extent_refs(root, &extent_cache);
9909         if (ret < 0) {
9910                 if (ret == -EAGAIN)
9911                         goto loop;
9912                 goto out;
9913         }
9914
9915         ret = check_devices(&dev_cache, &dev_extent_cache);
9916         if (ret && err)
9917                 ret = err;
9918
9919 out:
9920         task_stop(ctx.info);
9921         if (repair) {
9922                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9923                 extent_io_tree_cleanup(&excluded_extents);
9924                 root->fs_info->fsck_extent_cache = NULL;
9925                 root->fs_info->free_extent_hook = NULL;
9926                 root->fs_info->corrupt_blocks = NULL;
9927                 root->fs_info->excluded_extents = NULL;
9928         }
9929         free(bits);
9930         free_chunk_cache_tree(&chunk_cache);
9931         free_device_cache_tree(&dev_cache);
9932         free_block_group_tree(&block_group_cache);
9933         free_device_extent_tree(&dev_extent_cache);
9934         free_extent_cache_tree(&seen);
9935         free_extent_cache_tree(&pending);
9936         free_extent_cache_tree(&reada);
9937         free_extent_cache_tree(&nodes);
9938         return ret;
9939 loop:
9940         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9941         free_extent_cache_tree(&seen);
9942         free_extent_cache_tree(&pending);
9943         free_extent_cache_tree(&reada);
9944         free_extent_cache_tree(&nodes);
9945         free_chunk_cache_tree(&chunk_cache);
9946         free_block_group_tree(&block_group_cache);
9947         free_device_cache_tree(&dev_cache);
9948         free_device_extent_tree(&dev_extent_cache);
9949         free_extent_record_cache(root->fs_info, &extent_cache);
9950         free_root_item_list(&normal_trees);
9951         free_root_item_list(&dropping_trees);
9952         extent_io_tree_cleanup(&excluded_extents);
9953         goto again;
9954 }
9955
9956 /*
9957  * Check backrefs of a tree block given by @bytenr or @eb.
9958  *
9959  * @root:       the root containing the @bytenr or @eb
9960  * @eb:         tree block extent buffer, can be NULL
9961  * @bytenr:     bytenr of the tree block to search
9962  * @level:      tree level of the tree block
9963  * @owner:      owner of the tree block
9964  *
9965  * Return >0 for any error found and output error message
9966  * Return 0 for no error found
9967  */
9968 static int check_tree_block_ref(struct btrfs_root *root,
9969                                 struct extent_buffer *eb, u64 bytenr,
9970                                 int level, u64 owner)
9971 {
9972         struct btrfs_key key;
9973         struct btrfs_root *extent_root = root->fs_info->extent_root;
9974         struct btrfs_path path;
9975         struct btrfs_extent_item *ei;
9976         struct btrfs_extent_inline_ref *iref;
9977         struct extent_buffer *leaf;
9978         unsigned long end;
9979         unsigned long ptr;
9980         int slot;
9981         int skinny_level;
9982         int type;
9983         u32 nodesize = root->nodesize;
9984         u32 item_size;
9985         u64 offset;
9986         int tree_reloc_root = 0;
9987         int found_ref = 0;
9988         int err = 0;
9989         int ret;
9990
9991         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9992             btrfs_header_bytenr(root->node) == bytenr)
9993                 tree_reloc_root = 1;
9994
9995         btrfs_init_path(&path);
9996         key.objectid = bytenr;
9997         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9998                 key.type = BTRFS_METADATA_ITEM_KEY;
9999         else
10000                 key.type = BTRFS_EXTENT_ITEM_KEY;
10001         key.offset = (u64)-1;
10002
10003         /* Search for the backref in extent tree */
10004         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10005         if (ret < 0) {
10006                 err |= BACKREF_MISSING;
10007                 goto out;
10008         }
10009         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10010         if (ret) {
10011                 err |= BACKREF_MISSING;
10012                 goto out;
10013         }
10014
10015         leaf = path.nodes[0];
10016         slot = path.slots[0];
10017         btrfs_item_key_to_cpu(leaf, &key, slot);
10018
10019         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10020
10021         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10022                 skinny_level = (int)key.offset;
10023                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10024         } else {
10025                 struct btrfs_tree_block_info *info;
10026
10027                 info = (struct btrfs_tree_block_info *)(ei + 1);
10028                 skinny_level = btrfs_tree_block_level(leaf, info);
10029                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10030         }
10031
10032         if (eb) {
10033                 u64 header_gen;
10034                 u64 extent_gen;
10035
10036                 if (!(btrfs_extent_flags(leaf, ei) &
10037                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10038                         error(
10039                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10040                                 key.objectid, nodesize,
10041                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10042                         err = BACKREF_MISMATCH;
10043                 }
10044                 header_gen = btrfs_header_generation(eb);
10045                 extent_gen = btrfs_extent_generation(leaf, ei);
10046                 if (header_gen != extent_gen) {
10047                         error(
10048         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10049                                 key.objectid, nodesize, header_gen,
10050                                 extent_gen);
10051                         err = BACKREF_MISMATCH;
10052                 }
10053                 if (level != skinny_level) {
10054                         error(
10055                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10056                                 key.objectid, nodesize, level, skinny_level);
10057                         err = BACKREF_MISMATCH;
10058                 }
10059                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10060                         error(
10061                         "extent[%llu %u] is referred by other roots than %llu",
10062                                 key.objectid, nodesize, root->objectid);
10063                         err = BACKREF_MISMATCH;
10064                 }
10065         }
10066
10067         /*
10068          * Iterate the extent/metadata item to find the exact backref
10069          */
10070         item_size = btrfs_item_size_nr(leaf, slot);
10071         ptr = (unsigned long)iref;
10072         end = (unsigned long)ei + item_size;
10073         while (ptr < end) {
10074                 iref = (struct btrfs_extent_inline_ref *)ptr;
10075                 type = btrfs_extent_inline_ref_type(leaf, iref);
10076                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10077
10078                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10079                         (offset == root->objectid || offset == owner)) {
10080                         found_ref = 1;
10081                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10082                         /*
10083                          * Backref of tree reloc root points to itself, no need
10084                          * to check backref any more.
10085                          */
10086                         if (tree_reloc_root)
10087                                 found_ref = 1;
10088                         else
10089                         /* Check if the backref points to valid referencer */
10090                                 found_ref = !check_tree_block_ref(root, NULL,
10091                                                 offset, level + 1, owner);
10092                 }
10093
10094                 if (found_ref)
10095                         break;
10096                 ptr += btrfs_extent_inline_ref_size(type);
10097         }
10098
10099         /*
10100          * Inlined extent item doesn't have what we need, check
10101          * TREE_BLOCK_REF_KEY
10102          */
10103         if (!found_ref) {
10104                 btrfs_release_path(&path);
10105                 key.objectid = bytenr;
10106                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10107                 key.offset = root->objectid;
10108
10109                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10110                 if (!ret)
10111                         found_ref = 1;
10112         }
10113         if (!found_ref)
10114                 err |= BACKREF_MISSING;
10115 out:
10116         btrfs_release_path(&path);
10117         if (eb && (err & BACKREF_MISSING))
10118                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10119                         bytenr, nodesize, owner, level);
10120         return err;
10121 }
10122
10123 /*
10124  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10125  *
10126  * Return >0 any error found and output error message
10127  * Return 0 for no error found
10128  */
10129 static int check_extent_data_item(struct btrfs_root *root,
10130                                   struct extent_buffer *eb, int slot)
10131 {
10132         struct btrfs_file_extent_item *fi;
10133         struct btrfs_path path;
10134         struct btrfs_root *extent_root = root->fs_info->extent_root;
10135         struct btrfs_key fi_key;
10136         struct btrfs_key dbref_key;
10137         struct extent_buffer *leaf;
10138         struct btrfs_extent_item *ei;
10139         struct btrfs_extent_inline_ref *iref;
10140         struct btrfs_extent_data_ref *dref;
10141         u64 owner;
10142         u64 disk_bytenr;
10143         u64 disk_num_bytes;
10144         u64 extent_num_bytes;
10145         u64 extent_flags;
10146         u32 item_size;
10147         unsigned long end;
10148         unsigned long ptr;
10149         int type;
10150         u64 ref_root;
10151         int found_dbackref = 0;
10152         int err = 0;
10153         int ret;
10154
10155         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10156         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10157
10158         /* Nothing to check for hole and inline data extents */
10159         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10160             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10161                 return 0;
10162
10163         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10164         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10165         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10166
10167         /* Check unaligned disk_num_bytes and num_bytes */
10168         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10169                 error(
10170 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10171                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10172                         root->sectorsize);
10173                 err |= BYTES_UNALIGNED;
10174         } else {
10175                 data_bytes_allocated += disk_num_bytes;
10176         }
10177         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10178                 error(
10179 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10180                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10181                         root->sectorsize);
10182                 err |= BYTES_UNALIGNED;
10183         } else {
10184                 data_bytes_referenced += extent_num_bytes;
10185         }
10186         owner = btrfs_header_owner(eb);
10187
10188         /* Check the extent item of the file extent in extent tree */
10189         btrfs_init_path(&path);
10190         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10191         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10192         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10193
10194         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10195         if (ret) {
10196                 err |= BACKREF_MISSING;
10197                 goto error;
10198         }
10199
10200         leaf = path.nodes[0];
10201         slot = path.slots[0];
10202         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10203
10204         extent_flags = btrfs_extent_flags(leaf, ei);
10205
10206         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10207                 error(
10208                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10209                     disk_bytenr, disk_num_bytes,
10210                     BTRFS_EXTENT_FLAG_DATA);
10211                 err |= BACKREF_MISMATCH;
10212         }
10213
10214         /* Check data backref inside that extent item */
10215         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10216         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10217         ptr = (unsigned long)iref;
10218         end = (unsigned long)ei + item_size;
10219         while (ptr < end) {
10220                 iref = (struct btrfs_extent_inline_ref *)ptr;
10221                 type = btrfs_extent_inline_ref_type(leaf, iref);
10222                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10223
10224                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10225                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10226                         if (ref_root == owner || ref_root == root->objectid)
10227                                 found_dbackref = 1;
10228                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10229                         found_dbackref = !check_tree_block_ref(root, NULL,
10230                                 btrfs_extent_inline_ref_offset(leaf, iref),
10231                                 0, owner);
10232                 }
10233
10234                 if (found_dbackref)
10235                         break;
10236                 ptr += btrfs_extent_inline_ref_size(type);
10237         }
10238
10239         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10240         if (!found_dbackref) {
10241                 btrfs_release_path(&path);
10242
10243                 btrfs_init_path(&path);
10244                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10245                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10246                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10247                                 fi_key.objectid, fi_key.offset);
10248
10249                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10250                                         &dbref_key, &path, 0, 0);
10251                 if (!ret)
10252                         found_dbackref = 1;
10253         }
10254
10255         if (!found_dbackref)
10256                 err |= BACKREF_MISSING;
10257 error:
10258         btrfs_release_path(&path);
10259         if (err & BACKREF_MISSING) {
10260                 error("data extent[%llu %llu] backref lost",
10261                       disk_bytenr, disk_num_bytes);
10262         }
10263         return err;
10264 }
10265
10266 /*
10267  * Get real tree block level for the case like shared block
10268  * Return >= 0 as tree level
10269  * Return <0 for error
10270  */
10271 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10272 {
10273         struct extent_buffer *eb;
10274         struct btrfs_path path;
10275         struct btrfs_key key;
10276         struct btrfs_extent_item *ei;
10277         u64 flags;
10278         u64 transid;
10279         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10280         u8 backref_level;
10281         u8 header_level;
10282         int ret;
10283
10284         /* Search extent tree for extent generation and level */
10285         key.objectid = bytenr;
10286         key.type = BTRFS_METADATA_ITEM_KEY;
10287         key.offset = (u64)-1;
10288
10289         btrfs_init_path(&path);
10290         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10291         if (ret < 0)
10292                 goto release_out;
10293         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10294         if (ret < 0)
10295                 goto release_out;
10296         if (ret > 0) {
10297                 ret = -ENOENT;
10298                 goto release_out;
10299         }
10300
10301         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10302         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10303                             struct btrfs_extent_item);
10304         flags = btrfs_extent_flags(path.nodes[0], ei);
10305         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10306                 ret = -ENOENT;
10307                 goto release_out;
10308         }
10309
10310         /* Get transid for later read_tree_block() check */
10311         transid = btrfs_extent_generation(path.nodes[0], ei);
10312
10313         /* Get backref level as one source */
10314         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10315                 backref_level = key.offset;
10316         } else {
10317                 struct btrfs_tree_block_info *info;
10318
10319                 info = (struct btrfs_tree_block_info *)(ei + 1);
10320                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10321         }
10322         btrfs_release_path(&path);
10323
10324         /* Get level from tree block as an alternative source */
10325         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10326         if (!extent_buffer_uptodate(eb)) {
10327                 free_extent_buffer(eb);
10328                 return -EIO;
10329         }
10330         header_level = btrfs_header_level(eb);
10331         free_extent_buffer(eb);
10332
10333         if (header_level != backref_level)
10334                 return -EIO;
10335         return header_level;
10336
10337 release_out:
10338         btrfs_release_path(&path);
10339         return ret;
10340 }
10341
10342 /*
10343  * Check if a tree block backref is valid (points to a valid tree block)
10344  * if level == -1, level will be resolved
10345  * Return >0 for any error found and print error message
10346  */
10347 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10348                                     u64 bytenr, int level)
10349 {
10350         struct btrfs_root *root;
10351         struct btrfs_key key;
10352         struct btrfs_path path;
10353         struct extent_buffer *eb;
10354         struct extent_buffer *node;
10355         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10356         int err = 0;
10357         int ret;
10358
10359         /* Query level for level == -1 special case */
10360         if (level == -1)
10361                 level = query_tree_block_level(fs_info, bytenr);
10362         if (level < 0) {
10363                 err |= REFERENCER_MISSING;
10364                 goto out;
10365         }
10366
10367         key.objectid = root_id;
10368         key.type = BTRFS_ROOT_ITEM_KEY;
10369         key.offset = (u64)-1;
10370
10371         root = btrfs_read_fs_root(fs_info, &key);
10372         if (IS_ERR(root)) {
10373                 err |= REFERENCER_MISSING;
10374                 goto out;
10375         }
10376
10377         /* Read out the tree block to get item/node key */
10378         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10379         if (!extent_buffer_uptodate(eb)) {
10380                 err |= REFERENCER_MISSING;
10381                 free_extent_buffer(eb);
10382                 goto out;
10383         }
10384
10385         /* Empty tree, no need to check key */
10386         if (!btrfs_header_nritems(eb) && !level) {
10387                 free_extent_buffer(eb);
10388                 goto out;
10389         }
10390
10391         if (level)
10392                 btrfs_node_key_to_cpu(eb, &key, 0);
10393         else
10394                 btrfs_item_key_to_cpu(eb, &key, 0);
10395
10396         free_extent_buffer(eb);
10397
10398         btrfs_init_path(&path);
10399         path.lowest_level = level;
10400         /* Search with the first key, to ensure we can reach it */
10401         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10402         if (ret < 0) {
10403                 err |= REFERENCER_MISSING;
10404                 goto release_out;
10405         }
10406
10407         node = path.nodes[level];
10408         if (btrfs_header_bytenr(node) != bytenr) {
10409                 error(
10410         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10411                         bytenr, nodesize, bytenr,
10412                         btrfs_header_bytenr(node));
10413                 err |= REFERENCER_MISMATCH;
10414         }
10415         if (btrfs_header_level(node) != level) {
10416                 error(
10417         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10418                         bytenr, nodesize, level,
10419                         btrfs_header_level(node));
10420                 err |= REFERENCER_MISMATCH;
10421         }
10422
10423 release_out:
10424         btrfs_release_path(&path);
10425 out:
10426         if (err & REFERENCER_MISSING) {
10427                 if (level < 0)
10428                         error("extent [%llu %d] lost referencer (owner: %llu)",
10429                                 bytenr, nodesize, root_id);
10430                 else
10431                         error(
10432                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10433                                 bytenr, nodesize, root_id, level);
10434         }
10435
10436         return err;
10437 }
10438
10439 /*
10440  * Check if tree block @eb is tree reloc root.
10441  * Return 0 if it's not or any problem happens
10442  * Return 1 if it's a tree reloc root
10443  */
10444 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10445                                  struct extent_buffer *eb)
10446 {
10447         struct btrfs_root *tree_reloc_root;
10448         struct btrfs_key key;
10449         u64 bytenr = btrfs_header_bytenr(eb);
10450         u64 owner = btrfs_header_owner(eb);
10451         int ret = 0;
10452
10453         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10454         key.offset = owner;
10455         key.type = BTRFS_ROOT_ITEM_KEY;
10456
10457         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10458         if (IS_ERR(tree_reloc_root))
10459                 return 0;
10460
10461         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10462                 ret = 1;
10463         btrfs_free_fs_root(tree_reloc_root);
10464         return ret;
10465 }
10466
10467 /*
10468  * Check referencer for shared block backref
10469  * If level == -1, this function will resolve the level.
10470  */
10471 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10472                                      u64 parent, u64 bytenr, int level)
10473 {
10474         struct extent_buffer *eb;
10475         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10476         u32 nr;
10477         int found_parent = 0;
10478         int i;
10479
10480         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10481         if (!extent_buffer_uptodate(eb))
10482                 goto out;
10483
10484         if (level == -1)
10485                 level = query_tree_block_level(fs_info, bytenr);
10486         if (level < 0)
10487                 goto out;
10488
10489         /* It's possible it's a tree reloc root */
10490         if (parent == bytenr) {
10491                 if (is_tree_reloc_root(fs_info, eb))
10492                         found_parent = 1;
10493                 goto out;
10494         }
10495
10496         if (level + 1 != btrfs_header_level(eb))
10497                 goto out;
10498
10499         nr = btrfs_header_nritems(eb);
10500         for (i = 0; i < nr; i++) {
10501                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10502                         found_parent = 1;
10503                         break;
10504                 }
10505         }
10506 out:
10507         free_extent_buffer(eb);
10508         if (!found_parent) {
10509                 error(
10510         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10511                         bytenr, nodesize, parent, level);
10512                 return REFERENCER_MISSING;
10513         }
10514         return 0;
10515 }
10516
10517 /*
10518  * Check referencer for normal (inlined) data ref
10519  * If len == 0, it will be resolved by searching in extent tree
10520  */
10521 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10522                                      u64 root_id, u64 objectid, u64 offset,
10523                                      u64 bytenr, u64 len, u32 count)
10524 {
10525         struct btrfs_root *root;
10526         struct btrfs_root *extent_root = fs_info->extent_root;
10527         struct btrfs_key key;
10528         struct btrfs_path path;
10529         struct extent_buffer *leaf;
10530         struct btrfs_file_extent_item *fi;
10531         u32 found_count = 0;
10532         int slot;
10533         int ret = 0;
10534
10535         if (!len) {
10536                 key.objectid = bytenr;
10537                 key.type = BTRFS_EXTENT_ITEM_KEY;
10538                 key.offset = (u64)-1;
10539
10540                 btrfs_init_path(&path);
10541                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10542                 if (ret < 0)
10543                         goto out;
10544                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10545                 if (ret)
10546                         goto out;
10547                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10548                 if (key.objectid != bytenr ||
10549                     key.type != BTRFS_EXTENT_ITEM_KEY)
10550                         goto out;
10551                 len = key.offset;
10552                 btrfs_release_path(&path);
10553         }
10554         key.objectid = root_id;
10555         key.type = BTRFS_ROOT_ITEM_KEY;
10556         key.offset = (u64)-1;
10557         btrfs_init_path(&path);
10558
10559         root = btrfs_read_fs_root(fs_info, &key);
10560         if (IS_ERR(root))
10561                 goto out;
10562
10563         key.objectid = objectid;
10564         key.type = BTRFS_EXTENT_DATA_KEY;
10565         /*
10566          * It can be nasty as data backref offset is
10567          * file offset - file extent offset, which is smaller or
10568          * equal to original backref offset.  The only special case is
10569          * overflow.  So we need to special check and do further search.
10570          */
10571         key.offset = offset & (1ULL << 63) ? 0 : offset;
10572
10573         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10574         if (ret < 0)
10575                 goto out;
10576
10577         /*
10578          * Search afterwards to get correct one
10579          * NOTE: As we must do a comprehensive check on the data backref to
10580          * make sure the dref count also matches, we must iterate all file
10581          * extents for that inode.
10582          */
10583         while (1) {
10584                 leaf = path.nodes[0];
10585                 slot = path.slots[0];
10586
10587                 btrfs_item_key_to_cpu(leaf, &key, slot);
10588                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10589                         break;
10590                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10591                 /*
10592                  * Except normal disk bytenr and disk num bytes, we still
10593                  * need to do extra check on dbackref offset as
10594                  * dbackref offset = file_offset - file_extent_offset
10595                  */
10596                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10597                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10598                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10599                     offset)
10600                         found_count++;
10601
10602                 ret = btrfs_next_item(root, &path);
10603                 if (ret)
10604                         break;
10605         }
10606 out:
10607         btrfs_release_path(&path);
10608         if (found_count != count) {
10609                 error(
10610 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10611                         bytenr, len, root_id, objectid, offset, count, found_count);
10612                 return REFERENCER_MISSING;
10613         }
10614         return 0;
10615 }
10616
10617 /*
10618  * Check if the referencer of a shared data backref exists
10619  */
10620 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10621                                      u64 parent, u64 bytenr)
10622 {
10623         struct extent_buffer *eb;
10624         struct btrfs_key key;
10625         struct btrfs_file_extent_item *fi;
10626         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10627         u32 nr;
10628         int found_parent = 0;
10629         int i;
10630
10631         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10632         if (!extent_buffer_uptodate(eb))
10633                 goto out;
10634
10635         nr = btrfs_header_nritems(eb);
10636         for (i = 0; i < nr; i++) {
10637                 btrfs_item_key_to_cpu(eb, &key, i);
10638                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10639                         continue;
10640
10641                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10642                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10643                         continue;
10644
10645                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10646                         found_parent = 1;
10647                         break;
10648                 }
10649         }
10650
10651 out:
10652         free_extent_buffer(eb);
10653         if (!found_parent) {
10654                 error("shared extent %llu referencer lost (parent: %llu)",
10655                         bytenr, parent);
10656                 return REFERENCER_MISSING;
10657         }
10658         return 0;
10659 }
10660
10661 /*
10662  * This function will check a given extent item, including its backref and
10663  * itself (like crossing stripe boundary and type)
10664  *
10665  * Since we don't use extent_record anymore, introduce new error bit
10666  */
10667 static int check_extent_item(struct btrfs_fs_info *fs_info,
10668                              struct extent_buffer *eb, int slot)
10669 {
10670         struct btrfs_extent_item *ei;
10671         struct btrfs_extent_inline_ref *iref;
10672         struct btrfs_extent_data_ref *dref;
10673         unsigned long end;
10674         unsigned long ptr;
10675         int type;
10676         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10677         u32 item_size = btrfs_item_size_nr(eb, slot);
10678         u64 flags;
10679         u64 offset;
10680         int metadata = 0;
10681         int level;
10682         struct btrfs_key key;
10683         int ret;
10684         int err = 0;
10685
10686         btrfs_item_key_to_cpu(eb, &key, slot);
10687         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10688                 bytes_used += key.offset;
10689         else
10690                 bytes_used += nodesize;
10691
10692         if (item_size < sizeof(*ei)) {
10693                 /*
10694                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10695                  * old thing when on disk format is still un-determined.
10696                  * No need to care about it anymore
10697                  */
10698                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10699                 return -ENOTTY;
10700         }
10701
10702         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10703         flags = btrfs_extent_flags(eb, ei);
10704
10705         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10706                 metadata = 1;
10707         if (metadata && check_crossing_stripes(global_info, key.objectid,
10708                                                eb->len)) {
10709                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10710                       key.objectid, key.objectid + nodesize);
10711                 err |= CROSSING_STRIPE_BOUNDARY;
10712         }
10713
10714         ptr = (unsigned long)(ei + 1);
10715
10716         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10717                 /* Old EXTENT_ITEM metadata */
10718                 struct btrfs_tree_block_info *info;
10719
10720                 info = (struct btrfs_tree_block_info *)ptr;
10721                 level = btrfs_tree_block_level(eb, info);
10722                 ptr += sizeof(struct btrfs_tree_block_info);
10723         } else {
10724                 /* New METADATA_ITEM */
10725                 level = key.offset;
10726         }
10727         end = (unsigned long)ei + item_size;
10728
10729         if (ptr >= end) {
10730                 err |= ITEM_SIZE_MISMATCH;
10731                 goto out;
10732         }
10733
10734         /* Now check every backref in this extent item */
10735 next:
10736         iref = (struct btrfs_extent_inline_ref *)ptr;
10737         type = btrfs_extent_inline_ref_type(eb, iref);
10738         offset = btrfs_extent_inline_ref_offset(eb, iref);
10739         switch (type) {
10740         case BTRFS_TREE_BLOCK_REF_KEY:
10741                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10742                                                level);
10743                 err |= ret;
10744                 break;
10745         case BTRFS_SHARED_BLOCK_REF_KEY:
10746                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10747                                                  level);
10748                 err |= ret;
10749                 break;
10750         case BTRFS_EXTENT_DATA_REF_KEY:
10751                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10752                 ret = check_extent_data_backref(fs_info,
10753                                 btrfs_extent_data_ref_root(eb, dref),
10754                                 btrfs_extent_data_ref_objectid(eb, dref),
10755                                 btrfs_extent_data_ref_offset(eb, dref),
10756                                 key.objectid, key.offset,
10757                                 btrfs_extent_data_ref_count(eb, dref));
10758                 err |= ret;
10759                 break;
10760         case BTRFS_SHARED_DATA_REF_KEY:
10761                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10762                 err |= ret;
10763                 break;
10764         default:
10765                 error("extent[%llu %d %llu] has unknown ref type: %d",
10766                         key.objectid, key.type, key.offset, type);
10767                 err |= UNKNOWN_TYPE;
10768                 goto out;
10769         }
10770
10771         ptr += btrfs_extent_inline_ref_size(type);
10772         if (ptr < end)
10773                 goto next;
10774
10775 out:
10776         return err;
10777 }
10778
10779 /*
10780  * Check if a dev extent item is referred correctly by its chunk
10781  */
10782 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10783                                  struct extent_buffer *eb, int slot)
10784 {
10785         struct btrfs_root *chunk_root = fs_info->chunk_root;
10786         struct btrfs_dev_extent *ptr;
10787         struct btrfs_path path;
10788         struct btrfs_key chunk_key;
10789         struct btrfs_key devext_key;
10790         struct btrfs_chunk *chunk;
10791         struct extent_buffer *l;
10792         int num_stripes;
10793         u64 length;
10794         int i;
10795         int found_chunk = 0;
10796         int ret;
10797
10798         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10799         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10800         length = btrfs_dev_extent_length(eb, ptr);
10801
10802         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10803         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10804         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10805
10806         btrfs_init_path(&path);
10807         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10808         if (ret)
10809                 goto out;
10810
10811         l = path.nodes[0];
10812         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10813         if (btrfs_chunk_length(l, chunk) != length)
10814                 goto out;
10815
10816         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10817         for (i = 0; i < num_stripes; i++) {
10818                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10819                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10820
10821                 if (devid == devext_key.objectid &&
10822                     offset == devext_key.offset) {
10823                         found_chunk = 1;
10824                         break;
10825                 }
10826         }
10827 out:
10828         btrfs_release_path(&path);
10829         if (!found_chunk) {
10830                 error(
10831                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10832                         devext_key.objectid, devext_key.offset, length);
10833                 return REFERENCER_MISSING;
10834         }
10835         return 0;
10836 }
10837
10838 /*
10839  * Check if the used space is correct with the dev item
10840  */
10841 static int check_dev_item(struct btrfs_fs_info *fs_info,
10842                           struct extent_buffer *eb, int slot)
10843 {
10844         struct btrfs_root *dev_root = fs_info->dev_root;
10845         struct btrfs_dev_item *dev_item;
10846         struct btrfs_path path;
10847         struct btrfs_key key;
10848         struct btrfs_dev_extent *ptr;
10849         u64 dev_id;
10850         u64 used;
10851         u64 total = 0;
10852         int ret;
10853
10854         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10855         dev_id = btrfs_device_id(eb, dev_item);
10856         used = btrfs_device_bytes_used(eb, dev_item);
10857
10858         key.objectid = dev_id;
10859         key.type = BTRFS_DEV_EXTENT_KEY;
10860         key.offset = 0;
10861
10862         btrfs_init_path(&path);
10863         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10864         if (ret < 0) {
10865                 btrfs_item_key_to_cpu(eb, &key, slot);
10866                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10867                         key.objectid, key.type, key.offset);
10868                 btrfs_release_path(&path);
10869                 return REFERENCER_MISSING;
10870         }
10871
10872         /* Iterate dev_extents to calculate the used space of a device */
10873         while (1) {
10874                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10875
10876                 if (key.objectid > dev_id)
10877                         break;
10878                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10879                         goto next;
10880
10881                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10882                                      struct btrfs_dev_extent);
10883                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10884 next:
10885                 ret = btrfs_next_item(dev_root, &path);
10886                 if (ret)
10887                         break;
10888         }
10889         btrfs_release_path(&path);
10890
10891         if (used != total) {
10892                 btrfs_item_key_to_cpu(eb, &key, slot);
10893                 error(
10894 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10895                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10896                         BTRFS_DEV_EXTENT_KEY, dev_id);
10897                 return ACCOUNTING_MISMATCH;
10898         }
10899         return 0;
10900 }
10901
10902 /*
10903  * Check a block group item with its referener (chunk) and its used space
10904  * with extent/metadata item
10905  */
10906 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10907                                   struct extent_buffer *eb, int slot)
10908 {
10909         struct btrfs_root *extent_root = fs_info->extent_root;
10910         struct btrfs_root *chunk_root = fs_info->chunk_root;
10911         struct btrfs_block_group_item *bi;
10912         struct btrfs_block_group_item bg_item;
10913         struct btrfs_path path;
10914         struct btrfs_key bg_key;
10915         struct btrfs_key chunk_key;
10916         struct btrfs_key extent_key;
10917         struct btrfs_chunk *chunk;
10918         struct extent_buffer *leaf;
10919         struct btrfs_extent_item *ei;
10920         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10921         u64 flags;
10922         u64 bg_flags;
10923         u64 used;
10924         u64 total = 0;
10925         int ret;
10926         int err = 0;
10927
10928         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10929         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10930         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10931         used = btrfs_block_group_used(&bg_item);
10932         bg_flags = btrfs_block_group_flags(&bg_item);
10933
10934         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10935         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10936         chunk_key.offset = bg_key.objectid;
10937
10938         btrfs_init_path(&path);
10939         /* Search for the referencer chunk */
10940         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10941         if (ret) {
10942                 error(
10943                 "block group[%llu %llu] did not find the related chunk item",
10944                         bg_key.objectid, bg_key.offset);
10945                 err |= REFERENCER_MISSING;
10946         } else {
10947                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10948                                         struct btrfs_chunk);
10949                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10950                                                 bg_key.offset) {
10951                         error(
10952         "block group[%llu %llu] related chunk item length does not match",
10953                                 bg_key.objectid, bg_key.offset);
10954                         err |= REFERENCER_MISMATCH;
10955                 }
10956         }
10957         btrfs_release_path(&path);
10958
10959         /* Search from the block group bytenr */
10960         extent_key.objectid = bg_key.objectid;
10961         extent_key.type = 0;
10962         extent_key.offset = 0;
10963
10964         btrfs_init_path(&path);
10965         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10966         if (ret < 0)
10967                 goto out;
10968
10969         /* Iterate extent tree to account used space */
10970         while (1) {
10971                 leaf = path.nodes[0];
10972                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10973                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10974                         break;
10975
10976                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10977                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10978                         goto next;
10979                 if (extent_key.objectid < bg_key.objectid)
10980                         goto next;
10981
10982                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10983                         total += nodesize;
10984                 else
10985                         total += extent_key.offset;
10986
10987                 ei = btrfs_item_ptr(leaf, path.slots[0],
10988                                     struct btrfs_extent_item);
10989                 flags = btrfs_extent_flags(leaf, ei);
10990                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10991                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10992                                 error(
10993                         "bad extent[%llu, %llu) type mismatch with chunk",
10994                                         extent_key.objectid,
10995                                         extent_key.objectid + extent_key.offset);
10996                                 err |= CHUNK_TYPE_MISMATCH;
10997                         }
10998                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10999                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11000                                     BTRFS_BLOCK_GROUP_METADATA))) {
11001                                 error(
11002                         "bad extent[%llu, %llu) type mismatch with chunk",
11003                                         extent_key.objectid,
11004                                         extent_key.objectid + nodesize);
11005                                 err |= CHUNK_TYPE_MISMATCH;
11006                         }
11007                 }
11008 next:
11009                 ret = btrfs_next_item(extent_root, &path);
11010                 if (ret)
11011                         break;
11012         }
11013
11014 out:
11015         btrfs_release_path(&path);
11016
11017         if (total != used) {
11018                 error(
11019                 "block group[%llu %llu] used %llu but extent items used %llu",
11020                         bg_key.objectid, bg_key.offset, used, total);
11021                 err |= ACCOUNTING_MISMATCH;
11022         }
11023         return err;
11024 }
11025
11026 /*
11027  * Check a chunk item.
11028  * Including checking all referred dev_extents and block group
11029  */
11030 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11031                             struct extent_buffer *eb, int slot)
11032 {
11033         struct btrfs_root *extent_root = fs_info->extent_root;
11034         struct btrfs_root *dev_root = fs_info->dev_root;
11035         struct btrfs_path path;
11036         struct btrfs_key chunk_key;
11037         struct btrfs_key bg_key;
11038         struct btrfs_key devext_key;
11039         struct btrfs_chunk *chunk;
11040         struct extent_buffer *leaf;
11041         struct btrfs_block_group_item *bi;
11042         struct btrfs_block_group_item bg_item;
11043         struct btrfs_dev_extent *ptr;
11044         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11045         u64 length;
11046         u64 chunk_end;
11047         u64 type;
11048         u64 profile;
11049         int num_stripes;
11050         u64 offset;
11051         u64 objectid;
11052         int i;
11053         int ret;
11054         int err = 0;
11055
11056         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11057         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11058         length = btrfs_chunk_length(eb, chunk);
11059         chunk_end = chunk_key.offset + length;
11060         if (!IS_ALIGNED(length, sectorsize)) {
11061                 error("chunk[%llu %llu) not aligned to %u",
11062                         chunk_key.offset, chunk_end, sectorsize);
11063                 err |= BYTES_UNALIGNED;
11064                 goto out;
11065         }
11066
11067         type = btrfs_chunk_type(eb, chunk);
11068         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11069         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11070                 error("chunk[%llu %llu) has no chunk type",
11071                         chunk_key.offset, chunk_end);
11072                 err |= UNKNOWN_TYPE;
11073         }
11074         if (profile && (profile & (profile - 1))) {
11075                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11076                         chunk_key.offset, chunk_end, profile);
11077                 err |= UNKNOWN_TYPE;
11078         }
11079
11080         bg_key.objectid = chunk_key.offset;
11081         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11082         bg_key.offset = length;
11083
11084         btrfs_init_path(&path);
11085         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11086         if (ret) {
11087                 error(
11088                 "chunk[%llu %llu) did not find the related block group item",
11089                         chunk_key.offset, chunk_end);
11090                 err |= REFERENCER_MISSING;
11091         } else{
11092                 leaf = path.nodes[0];
11093                 bi = btrfs_item_ptr(leaf, path.slots[0],
11094                                     struct btrfs_block_group_item);
11095                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11096                                    sizeof(bg_item));
11097                 if (btrfs_block_group_flags(&bg_item) != type) {
11098                         error(
11099 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11100                                 chunk_key.offset, chunk_end, type,
11101                                 btrfs_block_group_flags(&bg_item));
11102                         err |= REFERENCER_MISSING;
11103                 }
11104         }
11105
11106         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11107         for (i = 0; i < num_stripes; i++) {
11108                 btrfs_release_path(&path);
11109                 btrfs_init_path(&path);
11110                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11111                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11112                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11113
11114                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11115                                         0, 0);
11116                 if (ret)
11117                         goto not_match_dev;
11118
11119                 leaf = path.nodes[0];
11120                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11121                                      struct btrfs_dev_extent);
11122                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11123                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11124                 if (objectid != chunk_key.objectid ||
11125                     offset != chunk_key.offset ||
11126                     btrfs_dev_extent_length(leaf, ptr) != length)
11127                         goto not_match_dev;
11128                 continue;
11129 not_match_dev:
11130                 err |= BACKREF_MISSING;
11131                 error(
11132                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11133                         chunk_key.objectid, chunk_end, i);
11134                 continue;
11135         }
11136         btrfs_release_path(&path);
11137 out:
11138         return err;
11139 }
11140
11141 /*
11142  * Main entry function to check known items and update related accounting info
11143  */
11144 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11145 {
11146         struct btrfs_fs_info *fs_info = root->fs_info;
11147         struct btrfs_key key;
11148         int slot = 0;
11149         int type;
11150         struct btrfs_extent_data_ref *dref;
11151         int ret;
11152         int err = 0;
11153
11154 next:
11155         btrfs_item_key_to_cpu(eb, &key, slot);
11156         type = key.type;
11157
11158         switch (type) {
11159         case BTRFS_EXTENT_DATA_KEY:
11160                 ret = check_extent_data_item(root, eb, slot);
11161                 err |= ret;
11162                 break;
11163         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11164                 ret = check_block_group_item(fs_info, eb, slot);
11165                 err |= ret;
11166                 break;
11167         case BTRFS_DEV_ITEM_KEY:
11168                 ret = check_dev_item(fs_info, eb, slot);
11169                 err |= ret;
11170                 break;
11171         case BTRFS_CHUNK_ITEM_KEY:
11172                 ret = check_chunk_item(fs_info, eb, slot);
11173                 err |= ret;
11174                 break;
11175         case BTRFS_DEV_EXTENT_KEY:
11176                 ret = check_dev_extent_item(fs_info, eb, slot);
11177                 err |= ret;
11178                 break;
11179         case BTRFS_EXTENT_ITEM_KEY:
11180         case BTRFS_METADATA_ITEM_KEY:
11181                 ret = check_extent_item(fs_info, eb, slot);
11182                 err |= ret;
11183                 break;
11184         case BTRFS_EXTENT_CSUM_KEY:
11185                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11186                 break;
11187         case BTRFS_TREE_BLOCK_REF_KEY:
11188                 ret = check_tree_block_backref(fs_info, key.offset,
11189                                                key.objectid, -1);
11190                 err |= ret;
11191                 break;
11192         case BTRFS_EXTENT_DATA_REF_KEY:
11193                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11194                 ret = check_extent_data_backref(fs_info,
11195                                 btrfs_extent_data_ref_root(eb, dref),
11196                                 btrfs_extent_data_ref_objectid(eb, dref),
11197                                 btrfs_extent_data_ref_offset(eb, dref),
11198                                 key.objectid, 0,
11199                                 btrfs_extent_data_ref_count(eb, dref));
11200                 err |= ret;
11201                 break;
11202         case BTRFS_SHARED_BLOCK_REF_KEY:
11203                 ret = check_shared_block_backref(fs_info, key.offset,
11204                                                  key.objectid, -1);
11205                 err |= ret;
11206                 break;
11207         case BTRFS_SHARED_DATA_REF_KEY:
11208                 ret = check_shared_data_backref(fs_info, key.offset,
11209                                                 key.objectid);
11210                 err |= ret;
11211                 break;
11212         default:
11213                 break;
11214         }
11215
11216         if (++slot < btrfs_header_nritems(eb))
11217                 goto next;
11218
11219         return err;
11220 }
11221
11222 /*
11223  * Helper function for later fs/subvol tree check.  To determine if a tree
11224  * block should be checked.
11225  * This function will ensure only the direct referencer with lowest rootid to
11226  * check a fs/subvolume tree block.
11227  *
11228  * Backref check at extent tree would detect errors like missing subvolume
11229  * tree, so we can do aggressive check to reduce duplicated checks.
11230  */
11231 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11232 {
11233         struct btrfs_root *extent_root = root->fs_info->extent_root;
11234         struct btrfs_key key;
11235         struct btrfs_path path;
11236         struct extent_buffer *leaf;
11237         int slot;
11238         struct btrfs_extent_item *ei;
11239         unsigned long ptr;
11240         unsigned long end;
11241         int type;
11242         u32 item_size;
11243         u64 offset;
11244         struct btrfs_extent_inline_ref *iref;
11245         int ret;
11246
11247         btrfs_init_path(&path);
11248         key.objectid = btrfs_header_bytenr(eb);
11249         key.type = BTRFS_METADATA_ITEM_KEY;
11250         key.offset = (u64)-1;
11251
11252         /*
11253          * Any failure in backref resolving means we can't determine
11254          * whom the tree block belongs to.
11255          * So in that case, we need to check that tree block
11256          */
11257         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11258         if (ret < 0)
11259                 goto need_check;
11260
11261         ret = btrfs_previous_extent_item(extent_root, &path,
11262                                          btrfs_header_bytenr(eb));
11263         if (ret)
11264                 goto need_check;
11265
11266         leaf = path.nodes[0];
11267         slot = path.slots[0];
11268         btrfs_item_key_to_cpu(leaf, &key, slot);
11269         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11270
11271         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11272                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11273         } else {
11274                 struct btrfs_tree_block_info *info;
11275
11276                 info = (struct btrfs_tree_block_info *)(ei + 1);
11277                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11278         }
11279
11280         item_size = btrfs_item_size_nr(leaf, slot);
11281         ptr = (unsigned long)iref;
11282         end = (unsigned long)ei + item_size;
11283         while (ptr < end) {
11284                 iref = (struct btrfs_extent_inline_ref *)ptr;
11285                 type = btrfs_extent_inline_ref_type(leaf, iref);
11286                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11287
11288                 /*
11289                  * We only check the tree block if current root is
11290                  * the lowest referencer of it.
11291                  */
11292                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11293                     offset < root->objectid) {
11294                         btrfs_release_path(&path);
11295                         return 0;
11296                 }
11297
11298                 ptr += btrfs_extent_inline_ref_size(type);
11299         }
11300         /*
11301          * Normally we should also check keyed tree block ref, but that may be
11302          * very time consuming.  Inlined ref should already make us skip a lot
11303          * of refs now.  So skip search keyed tree block ref.
11304          */
11305
11306 need_check:
11307         btrfs_release_path(&path);
11308         return 1;
11309 }
11310
11311 /*
11312  * Traversal function for tree block. We will do:
11313  * 1) Skip shared fs/subvolume tree blocks
11314  * 2) Update related bytes accounting
11315  * 3) Pre-order traversal
11316  */
11317 static int traverse_tree_block(struct btrfs_root *root,
11318                                 struct extent_buffer *node)
11319 {
11320         struct extent_buffer *eb;
11321         struct btrfs_key key;
11322         struct btrfs_key drop_key;
11323         int level;
11324         u64 nr;
11325         int i;
11326         int err = 0;
11327         int ret;
11328
11329         /*
11330          * Skip shared fs/subvolume tree block, in that case they will
11331          * be checked by referencer with lowest rootid
11332          */
11333         if (is_fstree(root->objectid) && !should_check(root, node))
11334                 return 0;
11335
11336         /* Update bytes accounting */
11337         total_btree_bytes += node->len;
11338         if (fs_root_objectid(btrfs_header_owner(node)))
11339                 total_fs_tree_bytes += node->len;
11340         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11341                 total_extent_tree_bytes += node->len;
11342         if (!found_old_backref &&
11343             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11344             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11345             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11346                 found_old_backref = 1;
11347
11348         /* pre-order tranversal, check itself first */
11349         level = btrfs_header_level(node);
11350         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11351                                    btrfs_header_level(node),
11352                                    btrfs_header_owner(node));
11353         err |= ret;
11354         if (err)
11355                 error(
11356         "check %s failed root %llu bytenr %llu level %d, force continue check",
11357                         level ? "node":"leaf", root->objectid,
11358                         btrfs_header_bytenr(node), btrfs_header_level(node));
11359
11360         if (!level) {
11361                 btree_space_waste += btrfs_leaf_free_space(root, node);
11362                 ret = check_leaf_items(root, node);
11363                 err |= ret;
11364                 return err;
11365         }
11366
11367         nr = btrfs_header_nritems(node);
11368         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11369         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11370                 sizeof(struct btrfs_key_ptr);
11371
11372         /* Then check all its children */
11373         for (i = 0; i < nr; i++) {
11374                 u64 blocknr = btrfs_node_blockptr(node, i);
11375
11376                 btrfs_node_key_to_cpu(node, &key, i);
11377                 if (level == root->root_item.drop_level &&
11378                     is_dropped_key(&key, &drop_key))
11379                         continue;
11380
11381                 /*
11382                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11383                  * to call the function itself.
11384                  */
11385                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11386                 if (extent_buffer_uptodate(eb)) {
11387                         ret = traverse_tree_block(root, eb);
11388                         err |= ret;
11389                 }
11390                 free_extent_buffer(eb);
11391         }
11392
11393         return err;
11394 }
11395
11396 /*
11397  * Low memory usage version check_chunks_and_extents.
11398  */
11399 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11400 {
11401         struct btrfs_path path;
11402         struct btrfs_key key;
11403         struct btrfs_root *root1;
11404         struct btrfs_root *cur_root;
11405         int err = 0;
11406         int ret;
11407
11408         root1 = root->fs_info->chunk_root;
11409         ret = traverse_tree_block(root1, root1->node);
11410         err |= ret;
11411
11412         root1 = root->fs_info->tree_root;
11413         ret = traverse_tree_block(root1, root1->node);
11414         err |= ret;
11415
11416         btrfs_init_path(&path);
11417         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11418         key.offset = 0;
11419         key.type = BTRFS_ROOT_ITEM_KEY;
11420
11421         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11422         if (ret) {
11423                 error("cannot find extent treet in tree_root");
11424                 goto out;
11425         }
11426
11427         while (1) {
11428                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11429                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11430                         goto next;
11431                 key.offset = (u64)-1;
11432
11433                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11434                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11435                                         &key);
11436                 else
11437                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11438                 if (IS_ERR(cur_root) || !cur_root) {
11439                         error("failed to read tree: %lld", key.objectid);
11440                         goto next;
11441                 }
11442
11443                 ret = traverse_tree_block(cur_root, cur_root->node);
11444                 err |= ret;
11445
11446                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11447                         btrfs_free_fs_root(cur_root);
11448 next:
11449                 ret = btrfs_next_item(root1, &path);
11450                 if (ret)
11451                         goto out;
11452         }
11453
11454 out:
11455         btrfs_release_path(&path);
11456         return err;
11457 }
11458
11459 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11460                            struct btrfs_root *root, int overwrite)
11461 {
11462         struct extent_buffer *c;
11463         struct extent_buffer *old = root->node;
11464         int level;
11465         int ret;
11466         struct btrfs_disk_key disk_key = {0,0,0};
11467
11468         level = 0;
11469
11470         if (overwrite) {
11471                 c = old;
11472                 extent_buffer_get(c);
11473                 goto init;
11474         }
11475         c = btrfs_alloc_free_block(trans, root,
11476                                    root->nodesize,
11477                                    root->root_key.objectid,
11478                                    &disk_key, level, 0, 0);
11479         if (IS_ERR(c)) {
11480                 c = old;
11481                 extent_buffer_get(c);
11482                 overwrite = 1;
11483         }
11484 init:
11485         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11486         btrfs_set_header_level(c, level);
11487         btrfs_set_header_bytenr(c, c->start);
11488         btrfs_set_header_generation(c, trans->transid);
11489         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11490         btrfs_set_header_owner(c, root->root_key.objectid);
11491
11492         write_extent_buffer(c, root->fs_info->fsid,
11493                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11494
11495         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11496                             btrfs_header_chunk_tree_uuid(c),
11497                             BTRFS_UUID_SIZE);
11498
11499         btrfs_mark_buffer_dirty(c);
11500         /*
11501          * this case can happen in the following case:
11502          *
11503          * 1.overwrite previous root.
11504          *
11505          * 2.reinit reloc data root, this is because we skip pin
11506          * down reloc data tree before which means we can allocate
11507          * same block bytenr here.
11508          */
11509         if (old->start == c->start) {
11510                 btrfs_set_root_generation(&root->root_item,
11511                                           trans->transid);
11512                 root->root_item.level = btrfs_header_level(root->node);
11513                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11514                                         &root->root_key, &root->root_item);
11515                 if (ret) {
11516                         free_extent_buffer(c);
11517                         return ret;
11518                 }
11519         }
11520         free_extent_buffer(old);
11521         root->node = c;
11522         add_root_to_dirty_list(root);
11523         return 0;
11524 }
11525
11526 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11527                                 struct extent_buffer *eb, int tree_root)
11528 {
11529         struct extent_buffer *tmp;
11530         struct btrfs_root_item *ri;
11531         struct btrfs_key key;
11532         u64 bytenr;
11533         u32 nodesize;
11534         int level = btrfs_header_level(eb);
11535         int nritems;
11536         int ret;
11537         int i;
11538
11539         /*
11540          * If we have pinned this block before, don't pin it again.
11541          * This can not only avoid forever loop with broken filesystem
11542          * but also give us some speedups.
11543          */
11544         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11545                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11546                 return 0;
11547
11548         btrfs_pin_extent(fs_info, eb->start, eb->len);
11549
11550         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11551         nritems = btrfs_header_nritems(eb);
11552         for (i = 0; i < nritems; i++) {
11553                 if (level == 0) {
11554                         btrfs_item_key_to_cpu(eb, &key, i);
11555                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11556                                 continue;
11557                         /* Skip the extent root and reloc roots */
11558                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11559                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11560                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11561                                 continue;
11562                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11563                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11564
11565                         /*
11566                          * If at any point we start needing the real root we
11567                          * will have to build a stump root for the root we are
11568                          * in, but for now this doesn't actually use the root so
11569                          * just pass in extent_root.
11570                          */
11571                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11572                                               nodesize, 0);
11573                         if (!extent_buffer_uptodate(tmp)) {
11574                                 fprintf(stderr, "Error reading root block\n");
11575                                 return -EIO;
11576                         }
11577                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11578                         free_extent_buffer(tmp);
11579                         if (ret)
11580                                 return ret;
11581                 } else {
11582                         bytenr = btrfs_node_blockptr(eb, i);
11583
11584                         /* If we aren't the tree root don't read the block */
11585                         if (level == 1 && !tree_root) {
11586                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11587                                 continue;
11588                         }
11589
11590                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11591                                               nodesize, 0);
11592                         if (!extent_buffer_uptodate(tmp)) {
11593                                 fprintf(stderr, "Error reading tree block\n");
11594                                 return -EIO;
11595                         }
11596                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11597                         free_extent_buffer(tmp);
11598                         if (ret)
11599                                 return ret;
11600                 }
11601         }
11602
11603         return 0;
11604 }
11605
11606 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11607 {
11608         int ret;
11609
11610         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11611         if (ret)
11612                 return ret;
11613
11614         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11615 }
11616
11617 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11618 {
11619         struct btrfs_block_group_cache *cache;
11620         struct btrfs_path path;
11621         struct extent_buffer *leaf;
11622         struct btrfs_chunk *chunk;
11623         struct btrfs_key key;
11624         int ret;
11625         u64 start;
11626
11627         btrfs_init_path(&path);
11628         key.objectid = 0;
11629         key.type = BTRFS_CHUNK_ITEM_KEY;
11630         key.offset = 0;
11631         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11632         if (ret < 0) {
11633                 btrfs_release_path(&path);
11634                 return ret;
11635         }
11636
11637         /*
11638          * We do this in case the block groups were screwed up and had alloc
11639          * bits that aren't actually set on the chunks.  This happens with
11640          * restored images every time and could happen in real life I guess.
11641          */
11642         fs_info->avail_data_alloc_bits = 0;
11643         fs_info->avail_metadata_alloc_bits = 0;
11644         fs_info->avail_system_alloc_bits = 0;
11645
11646         /* First we need to create the in-memory block groups */
11647         while (1) {
11648                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11649                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11650                         if (ret < 0) {
11651                                 btrfs_release_path(&path);
11652                                 return ret;
11653                         }
11654                         if (ret) {
11655                                 ret = 0;
11656                                 break;
11657                         }
11658                 }
11659                 leaf = path.nodes[0];
11660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11661                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11662                         path.slots[0]++;
11663                         continue;
11664                 }
11665
11666                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11667                 btrfs_add_block_group(fs_info, 0,
11668                                       btrfs_chunk_type(leaf, chunk),
11669                                       key.objectid, key.offset,
11670                                       btrfs_chunk_length(leaf, chunk));
11671                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11672                                  key.offset + btrfs_chunk_length(leaf, chunk),
11673                                  GFP_NOFS);
11674                 path.slots[0]++;
11675         }
11676         start = 0;
11677         while (1) {
11678                 cache = btrfs_lookup_first_block_group(fs_info, start);
11679                 if (!cache)
11680                         break;
11681                 cache->cached = 1;
11682                 start = cache->key.objectid + cache->key.offset;
11683         }
11684
11685         btrfs_release_path(&path);
11686         return 0;
11687 }
11688
11689 static int reset_balance(struct btrfs_trans_handle *trans,
11690                          struct btrfs_fs_info *fs_info)
11691 {
11692         struct btrfs_root *root = fs_info->tree_root;
11693         struct btrfs_path path;
11694         struct extent_buffer *leaf;
11695         struct btrfs_key key;
11696         int del_slot, del_nr = 0;
11697         int ret;
11698         int found = 0;
11699
11700         btrfs_init_path(&path);
11701         key.objectid = BTRFS_BALANCE_OBJECTID;
11702         key.type = BTRFS_BALANCE_ITEM_KEY;
11703         key.offset = 0;
11704         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11705         if (ret) {
11706                 if (ret > 0)
11707                         ret = 0;
11708                 if (!ret)
11709                         goto reinit_data_reloc;
11710                 else
11711                         goto out;
11712         }
11713
11714         ret = btrfs_del_item(trans, root, &path);
11715         if (ret)
11716                 goto out;
11717         btrfs_release_path(&path);
11718
11719         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11720         key.type = BTRFS_ROOT_ITEM_KEY;
11721         key.offset = 0;
11722         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11723         if (ret < 0)
11724                 goto out;
11725         while (1) {
11726                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11727                         if (!found)
11728                                 break;
11729
11730                         if (del_nr) {
11731                                 ret = btrfs_del_items(trans, root, &path,
11732                                                       del_slot, del_nr);
11733                                 del_nr = 0;
11734                                 if (ret)
11735                                         goto out;
11736                         }
11737                         key.offset++;
11738                         btrfs_release_path(&path);
11739
11740                         found = 0;
11741                         ret = btrfs_search_slot(trans, root, &key, &path,
11742                                                 -1, 1);
11743                         if (ret < 0)
11744                                 goto out;
11745                         continue;
11746                 }
11747                 found = 1;
11748                 leaf = path.nodes[0];
11749                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11750                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11751                         break;
11752                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11753                         path.slots[0]++;
11754                         continue;
11755                 }
11756                 if (!del_nr) {
11757                         del_slot = path.slots[0];
11758                         del_nr = 1;
11759                 } else {
11760                         del_nr++;
11761                 }
11762                 path.slots[0]++;
11763         }
11764
11765         if (del_nr) {
11766                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11767                 if (ret)
11768                         goto out;
11769         }
11770         btrfs_release_path(&path);
11771
11772 reinit_data_reloc:
11773         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11774         key.type = BTRFS_ROOT_ITEM_KEY;
11775         key.offset = (u64)-1;
11776         root = btrfs_read_fs_root(fs_info, &key);
11777         if (IS_ERR(root)) {
11778                 fprintf(stderr, "Error reading data reloc tree\n");
11779                 ret = PTR_ERR(root);
11780                 goto out;
11781         }
11782         record_root_in_trans(trans, root);
11783         ret = btrfs_fsck_reinit_root(trans, root, 0);
11784         if (ret)
11785                 goto out;
11786         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11787 out:
11788         btrfs_release_path(&path);
11789         return ret;
11790 }
11791
11792 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11793                               struct btrfs_fs_info *fs_info)
11794 {
11795         u64 start = 0;
11796         int ret;
11797
11798         /*
11799          * The only reason we don't do this is because right now we're just
11800          * walking the trees we find and pinning down their bytes, we don't look
11801          * at any of the leaves.  In order to do mixed groups we'd have to check
11802          * the leaves of any fs roots and pin down the bytes for any file
11803          * extents we find.  Not hard but why do it if we don't have to?
11804          */
11805         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11806                 fprintf(stderr, "We don't support re-initing the extent tree "
11807                         "for mixed block groups yet, please notify a btrfs "
11808                         "developer you want to do this so they can add this "
11809                         "functionality.\n");
11810                 return -EINVAL;
11811         }
11812
11813         /*
11814          * first we need to walk all of the trees except the extent tree and pin
11815          * down the bytes that are in use so we don't overwrite any existing
11816          * metadata.
11817          */
11818         ret = pin_metadata_blocks(fs_info);
11819         if (ret) {
11820                 fprintf(stderr, "error pinning down used bytes\n");
11821                 return ret;
11822         }
11823
11824         /*
11825          * Need to drop all the block groups since we're going to recreate all
11826          * of them again.
11827          */
11828         btrfs_free_block_groups(fs_info);
11829         ret = reset_block_groups(fs_info);
11830         if (ret) {
11831                 fprintf(stderr, "error resetting the block groups\n");
11832                 return ret;
11833         }
11834
11835         /* Ok we can allocate now, reinit the extent root */
11836         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11837         if (ret) {
11838                 fprintf(stderr, "extent root initialization failed\n");
11839                 /*
11840                  * When the transaction code is updated we should end the
11841                  * transaction, but for now progs only knows about commit so
11842                  * just return an error.
11843                  */
11844                 return ret;
11845         }
11846
11847         /*
11848          * Now we have all the in-memory block groups setup so we can make
11849          * allocations properly, and the metadata we care about is safe since we
11850          * pinned all of it above.
11851          */
11852         while (1) {
11853                 struct btrfs_block_group_cache *cache;
11854
11855                 cache = btrfs_lookup_first_block_group(fs_info, start);
11856                 if (!cache)
11857                         break;
11858                 start = cache->key.objectid + cache->key.offset;
11859                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11860                                         &cache->key, &cache->item,
11861                                         sizeof(cache->item));
11862                 if (ret) {
11863                         fprintf(stderr, "Error adding block group\n");
11864                         return ret;
11865                 }
11866                 btrfs_extent_post_op(trans, fs_info->extent_root);
11867         }
11868
11869         ret = reset_balance(trans, fs_info);
11870         if (ret)
11871                 fprintf(stderr, "error resetting the pending balance\n");
11872
11873         return ret;
11874 }
11875
11876 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11877 {
11878         struct btrfs_path path;
11879         struct btrfs_trans_handle *trans;
11880         struct btrfs_key key;
11881         int ret;
11882
11883         printf("Recowing metadata block %llu\n", eb->start);
11884         key.objectid = btrfs_header_owner(eb);
11885         key.type = BTRFS_ROOT_ITEM_KEY;
11886         key.offset = (u64)-1;
11887
11888         root = btrfs_read_fs_root(root->fs_info, &key);
11889         if (IS_ERR(root)) {
11890                 fprintf(stderr, "Couldn't find owner root %llu\n",
11891                         key.objectid);
11892                 return PTR_ERR(root);
11893         }
11894
11895         trans = btrfs_start_transaction(root, 1);
11896         if (IS_ERR(trans))
11897                 return PTR_ERR(trans);
11898
11899         btrfs_init_path(&path);
11900         path.lowest_level = btrfs_header_level(eb);
11901         if (path.lowest_level)
11902                 btrfs_node_key_to_cpu(eb, &key, 0);
11903         else
11904                 btrfs_item_key_to_cpu(eb, &key, 0);
11905
11906         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11907         btrfs_commit_transaction(trans, root);
11908         btrfs_release_path(&path);
11909         return ret;
11910 }
11911
11912 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11913 {
11914         struct btrfs_path path;
11915         struct btrfs_trans_handle *trans;
11916         struct btrfs_key key;
11917         int ret;
11918
11919         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11920                bad->key.type, bad->key.offset);
11921         key.objectid = bad->root_id;
11922         key.type = BTRFS_ROOT_ITEM_KEY;
11923         key.offset = (u64)-1;
11924
11925         root = btrfs_read_fs_root(root->fs_info, &key);
11926         if (IS_ERR(root)) {
11927                 fprintf(stderr, "Couldn't find owner root %llu\n",
11928                         key.objectid);
11929                 return PTR_ERR(root);
11930         }
11931
11932         trans = btrfs_start_transaction(root, 1);
11933         if (IS_ERR(trans))
11934                 return PTR_ERR(trans);
11935
11936         btrfs_init_path(&path);
11937         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11938         if (ret) {
11939                 if (ret > 0)
11940                         ret = 0;
11941                 goto out;
11942         }
11943         ret = btrfs_del_item(trans, root, &path);
11944 out:
11945         btrfs_commit_transaction(trans, root);
11946         btrfs_release_path(&path);
11947         return ret;
11948 }
11949
11950 static int zero_log_tree(struct btrfs_root *root)
11951 {
11952         struct btrfs_trans_handle *trans;
11953         int ret;
11954
11955         trans = btrfs_start_transaction(root, 1);
11956         if (IS_ERR(trans)) {
11957                 ret = PTR_ERR(trans);
11958                 return ret;
11959         }
11960         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11961         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11962         ret = btrfs_commit_transaction(trans, root);
11963         return ret;
11964 }
11965
11966 static int populate_csum(struct btrfs_trans_handle *trans,
11967                          struct btrfs_root *csum_root, char *buf, u64 start,
11968                          u64 len)
11969 {
11970         u64 offset = 0;
11971         u64 sectorsize;
11972         int ret = 0;
11973
11974         while (offset < len) {
11975                 sectorsize = csum_root->sectorsize;
11976                 ret = read_extent_data(csum_root, buf, start + offset,
11977                                        &sectorsize, 0);
11978                 if (ret)
11979                         break;
11980                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11981                                             start + offset, buf, sectorsize);
11982                 if (ret)
11983                         break;
11984                 offset += sectorsize;
11985         }
11986         return ret;
11987 }
11988
11989 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11990                                       struct btrfs_root *csum_root,
11991                                       struct btrfs_root *cur_root)
11992 {
11993         struct btrfs_path path;
11994         struct btrfs_key key;
11995         struct extent_buffer *node;
11996         struct btrfs_file_extent_item *fi;
11997         char *buf = NULL;
11998         u64 start = 0;
11999         u64 len = 0;
12000         int slot = 0;
12001         int ret = 0;
12002
12003         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12004         if (!buf)
12005                 return -ENOMEM;
12006
12007         btrfs_init_path(&path);
12008         key.objectid = 0;
12009         key.offset = 0;
12010         key.type = 0;
12011         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12012         if (ret < 0)
12013                 goto out;
12014         /* Iterate all regular file extents and fill its csum */
12015         while (1) {
12016                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12017
12018                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12019                         goto next;
12020                 node = path.nodes[0];
12021                 slot = path.slots[0];
12022                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12023                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12024                         goto next;
12025                 start = btrfs_file_extent_disk_bytenr(node, fi);
12026                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12027
12028                 ret = populate_csum(trans, csum_root, buf, start, len);
12029                 if (ret == -EEXIST)
12030                         ret = 0;
12031                 if (ret < 0)
12032                         goto out;
12033 next:
12034                 /*
12035                  * TODO: if next leaf is corrupted, jump to nearest next valid
12036                  * leaf.
12037                  */
12038                 ret = btrfs_next_item(cur_root, &path);
12039                 if (ret < 0)
12040                         goto out;
12041                 if (ret > 0) {
12042                         ret = 0;
12043                         goto out;
12044                 }
12045         }
12046
12047 out:
12048         btrfs_release_path(&path);
12049         free(buf);
12050         return ret;
12051 }
12052
12053 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12054                                   struct btrfs_root *csum_root)
12055 {
12056         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12057         struct btrfs_path path;
12058         struct btrfs_root *tree_root = fs_info->tree_root;
12059         struct btrfs_root *cur_root;
12060         struct extent_buffer *node;
12061         struct btrfs_key key;
12062         int slot = 0;
12063         int ret = 0;
12064
12065         btrfs_init_path(&path);
12066         key.objectid = BTRFS_FS_TREE_OBJECTID;
12067         key.offset = 0;
12068         key.type = BTRFS_ROOT_ITEM_KEY;
12069         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12070         if (ret < 0)
12071                 goto out;
12072         if (ret > 0) {
12073                 ret = -ENOENT;
12074                 goto out;
12075         }
12076
12077         while (1) {
12078                 node = path.nodes[0];
12079                 slot = path.slots[0];
12080                 btrfs_item_key_to_cpu(node, &key, slot);
12081                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12082                         goto out;
12083                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12084                         goto next;
12085                 if (!is_fstree(key.objectid))
12086                         goto next;
12087                 key.offset = (u64)-1;
12088
12089                 cur_root = btrfs_read_fs_root(fs_info, &key);
12090                 if (IS_ERR(cur_root) || !cur_root) {
12091                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12092                                 key.objectid);
12093                         goto out;
12094                 }
12095                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12096                                 cur_root);
12097                 if (ret < 0)
12098                         goto out;
12099 next:
12100                 ret = btrfs_next_item(tree_root, &path);
12101                 if (ret > 0) {
12102                         ret = 0;
12103                         goto out;
12104                 }
12105                 if (ret < 0)
12106                         goto out;
12107         }
12108
12109 out:
12110         btrfs_release_path(&path);
12111         return ret;
12112 }
12113
12114 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12115                                       struct btrfs_root *csum_root)
12116 {
12117         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12118         struct btrfs_path path;
12119         struct btrfs_extent_item *ei;
12120         struct extent_buffer *leaf;
12121         char *buf;
12122         struct btrfs_key key;
12123         int ret;
12124
12125         btrfs_init_path(&path);
12126         key.objectid = 0;
12127         key.type = BTRFS_EXTENT_ITEM_KEY;
12128         key.offset = 0;
12129         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12130         if (ret < 0) {
12131                 btrfs_release_path(&path);
12132                 return ret;
12133         }
12134
12135         buf = malloc(csum_root->sectorsize);
12136         if (!buf) {
12137                 btrfs_release_path(&path);
12138                 return -ENOMEM;
12139         }
12140
12141         while (1) {
12142                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12143                         ret = btrfs_next_leaf(extent_root, &path);
12144                         if (ret < 0)
12145                                 break;
12146                         if (ret) {
12147                                 ret = 0;
12148                                 break;
12149                         }
12150                 }
12151                 leaf = path.nodes[0];
12152
12153                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12154                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12155                         path.slots[0]++;
12156                         continue;
12157                 }
12158
12159                 ei = btrfs_item_ptr(leaf, path.slots[0],
12160                                     struct btrfs_extent_item);
12161                 if (!(btrfs_extent_flags(leaf, ei) &
12162                       BTRFS_EXTENT_FLAG_DATA)) {
12163                         path.slots[0]++;
12164                         continue;
12165                 }
12166
12167                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12168                                     key.offset);
12169                 if (ret)
12170                         break;
12171                 path.slots[0]++;
12172         }
12173
12174         btrfs_release_path(&path);
12175         free(buf);
12176         return ret;
12177 }
12178
12179 /*
12180  * Recalculate the csum and put it into the csum tree.
12181  *
12182  * Extent tree init will wipe out all the extent info, so in that case, we
12183  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12184  * will use fs/subvol trees to init the csum tree.
12185  */
12186 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12187                           struct btrfs_root *csum_root,
12188                           int search_fs_tree)
12189 {
12190         if (search_fs_tree)
12191                 return fill_csum_tree_from_fs(trans, csum_root);
12192         else
12193                 return fill_csum_tree_from_extent(trans, csum_root);
12194 }
12195
12196 static void free_roots_info_cache(void)
12197 {
12198         if (!roots_info_cache)
12199                 return;
12200
12201         while (!cache_tree_empty(roots_info_cache)) {
12202                 struct cache_extent *entry;
12203                 struct root_item_info *rii;
12204
12205                 entry = first_cache_extent(roots_info_cache);
12206                 if (!entry)
12207                         break;
12208                 remove_cache_extent(roots_info_cache, entry);
12209                 rii = container_of(entry, struct root_item_info, cache_extent);
12210                 free(rii);
12211         }
12212
12213         free(roots_info_cache);
12214         roots_info_cache = NULL;
12215 }
12216
12217 static int build_roots_info_cache(struct btrfs_fs_info *info)
12218 {
12219         int ret = 0;
12220         struct btrfs_key key;
12221         struct extent_buffer *leaf;
12222         struct btrfs_path path;
12223
12224         if (!roots_info_cache) {
12225                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12226                 if (!roots_info_cache)
12227                         return -ENOMEM;
12228                 cache_tree_init(roots_info_cache);
12229         }
12230
12231         btrfs_init_path(&path);
12232         key.objectid = 0;
12233         key.type = BTRFS_EXTENT_ITEM_KEY;
12234         key.offset = 0;
12235         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12236         if (ret < 0)
12237                 goto out;
12238         leaf = path.nodes[0];
12239
12240         while (1) {
12241                 struct btrfs_key found_key;
12242                 struct btrfs_extent_item *ei;
12243                 struct btrfs_extent_inline_ref *iref;
12244                 int slot = path.slots[0];
12245                 int type;
12246                 u64 flags;
12247                 u64 root_id;
12248                 u8 level;
12249                 struct cache_extent *entry;
12250                 struct root_item_info *rii;
12251
12252                 if (slot >= btrfs_header_nritems(leaf)) {
12253                         ret = btrfs_next_leaf(info->extent_root, &path);
12254                         if (ret < 0) {
12255                                 break;
12256                         } else if (ret) {
12257                                 ret = 0;
12258                                 break;
12259                         }
12260                         leaf = path.nodes[0];
12261                         slot = path.slots[0];
12262                 }
12263
12264                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12265
12266                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12267                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12268                         goto next;
12269
12270                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12271                 flags = btrfs_extent_flags(leaf, ei);
12272
12273                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12274                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12275                         goto next;
12276
12277                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12278                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12279                         level = found_key.offset;
12280                 } else {
12281                         struct btrfs_tree_block_info *binfo;
12282
12283                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12284                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12285                         level = btrfs_tree_block_level(leaf, binfo);
12286                 }
12287
12288                 /*
12289                  * For a root extent, it must be of the following type and the
12290                  * first (and only one) iref in the item.
12291                  */
12292                 type = btrfs_extent_inline_ref_type(leaf, iref);
12293                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12294                         goto next;
12295
12296                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12297                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12298                 if (!entry) {
12299                         rii = malloc(sizeof(struct root_item_info));
12300                         if (!rii) {
12301                                 ret = -ENOMEM;
12302                                 goto out;
12303                         }
12304                         rii->cache_extent.start = root_id;
12305                         rii->cache_extent.size = 1;
12306                         rii->level = (u8)-1;
12307                         entry = &rii->cache_extent;
12308                         ret = insert_cache_extent(roots_info_cache, entry);
12309                         ASSERT(ret == 0);
12310                 } else {
12311                         rii = container_of(entry, struct root_item_info,
12312                                            cache_extent);
12313                 }
12314
12315                 ASSERT(rii->cache_extent.start == root_id);
12316                 ASSERT(rii->cache_extent.size == 1);
12317
12318                 if (level > rii->level || rii->level == (u8)-1) {
12319                         rii->level = level;
12320                         rii->bytenr = found_key.objectid;
12321                         rii->gen = btrfs_extent_generation(leaf, ei);
12322                         rii->node_count = 1;
12323                 } else if (level == rii->level) {
12324                         rii->node_count++;
12325                 }
12326 next:
12327                 path.slots[0]++;
12328         }
12329
12330 out:
12331         btrfs_release_path(&path);
12332
12333         return ret;
12334 }
12335
12336 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12337                                   struct btrfs_path *path,
12338                                   const struct btrfs_key *root_key,
12339                                   const int read_only_mode)
12340 {
12341         const u64 root_id = root_key->objectid;
12342         struct cache_extent *entry;
12343         struct root_item_info *rii;
12344         struct btrfs_root_item ri;
12345         unsigned long offset;
12346
12347         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12348         if (!entry) {
12349                 fprintf(stderr,
12350                         "Error: could not find extent items for root %llu\n",
12351                         root_key->objectid);
12352                 return -ENOENT;
12353         }
12354
12355         rii = container_of(entry, struct root_item_info, cache_extent);
12356         ASSERT(rii->cache_extent.start == root_id);
12357         ASSERT(rii->cache_extent.size == 1);
12358
12359         if (rii->node_count != 1) {
12360                 fprintf(stderr,
12361                         "Error: could not find btree root extent for root %llu\n",
12362                         root_id);
12363                 return -ENOENT;
12364         }
12365
12366         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12367         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12368
12369         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12370             btrfs_root_level(&ri) != rii->level ||
12371             btrfs_root_generation(&ri) != rii->gen) {
12372
12373                 /*
12374                  * If we're in repair mode but our caller told us to not update
12375                  * the root item, i.e. just check if it needs to be updated, don't
12376                  * print this message, since the caller will call us again shortly
12377                  * for the same root item without read only mode (the caller will
12378                  * open a transaction first).
12379                  */
12380                 if (!(read_only_mode && repair))
12381                         fprintf(stderr,
12382                                 "%sroot item for root %llu,"
12383                                 " current bytenr %llu, current gen %llu, current level %u,"
12384                                 " new bytenr %llu, new gen %llu, new level %u\n",
12385                                 (read_only_mode ? "" : "fixing "),
12386                                 root_id,
12387                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12388                                 btrfs_root_level(&ri),
12389                                 rii->bytenr, rii->gen, rii->level);
12390
12391                 if (btrfs_root_generation(&ri) > rii->gen) {
12392                         fprintf(stderr,
12393                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12394                                 root_id, btrfs_root_generation(&ri), rii->gen);
12395                         return -EINVAL;
12396                 }
12397
12398                 if (!read_only_mode) {
12399                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12400                         btrfs_set_root_level(&ri, rii->level);
12401                         btrfs_set_root_generation(&ri, rii->gen);
12402                         write_extent_buffer(path->nodes[0], &ri,
12403                                             offset, sizeof(ri));
12404                 }
12405
12406                 return 1;
12407         }
12408
12409         return 0;
12410 }
12411
12412 /*
12413  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12414  * caused read-only snapshots to be corrupted if they were created at a moment
12415  * when the source subvolume/snapshot had orphan items. The issue was that the
12416  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12417  * node instead of the post orphan cleanup root node.
12418  * So this function, and its callees, just detects and fixes those cases. Even
12419  * though the regression was for read-only snapshots, this function applies to
12420  * any snapshot/subvolume root.
12421  * This must be run before any other repair code - not doing it so, makes other
12422  * repair code delete or modify backrefs in the extent tree for example, which
12423  * will result in an inconsistent fs after repairing the root items.
12424  */
12425 static int repair_root_items(struct btrfs_fs_info *info)
12426 {
12427         struct btrfs_path path;
12428         struct btrfs_key key;
12429         struct extent_buffer *leaf;
12430         struct btrfs_trans_handle *trans = NULL;
12431         int ret = 0;
12432         int bad_roots = 0;
12433         int need_trans = 0;
12434
12435         btrfs_init_path(&path);
12436
12437         ret = build_roots_info_cache(info);
12438         if (ret)
12439                 goto out;
12440
12441         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12442         key.type = BTRFS_ROOT_ITEM_KEY;
12443         key.offset = 0;
12444
12445 again:
12446         /*
12447          * Avoid opening and committing transactions if a leaf doesn't have
12448          * any root items that need to be fixed, so that we avoid rotating
12449          * backup roots unnecessarily.
12450          */
12451         if (need_trans) {
12452                 trans = btrfs_start_transaction(info->tree_root, 1);
12453                 if (IS_ERR(trans)) {
12454                         ret = PTR_ERR(trans);
12455                         goto out;
12456                 }
12457         }
12458
12459         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12460                                 0, trans ? 1 : 0);
12461         if (ret < 0)
12462                 goto out;
12463         leaf = path.nodes[0];
12464
12465         while (1) {
12466                 struct btrfs_key found_key;
12467
12468                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12469                         int no_more_keys = find_next_key(&path, &key);
12470
12471                         btrfs_release_path(&path);
12472                         if (trans) {
12473                                 ret = btrfs_commit_transaction(trans,
12474                                                                info->tree_root);
12475                                 trans = NULL;
12476                                 if (ret < 0)
12477                                         goto out;
12478                         }
12479                         need_trans = 0;
12480                         if (no_more_keys)
12481                                 break;
12482                         goto again;
12483                 }
12484
12485                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12486
12487                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12488                         goto next;
12489                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12490                         goto next;
12491
12492                 ret = maybe_repair_root_item(info, &path, &found_key,
12493                                              trans ? 0 : 1);
12494                 if (ret < 0)
12495                         goto out;
12496                 if (ret) {
12497                         if (!trans && repair) {
12498                                 need_trans = 1;
12499                                 key = found_key;
12500                                 btrfs_release_path(&path);
12501                                 goto again;
12502                         }
12503                         bad_roots++;
12504                 }
12505 next:
12506                 path.slots[0]++;
12507         }
12508         ret = 0;
12509 out:
12510         free_roots_info_cache();
12511         btrfs_release_path(&path);
12512         if (trans)
12513                 btrfs_commit_transaction(trans, info->tree_root);
12514         if (ret < 0)
12515                 return ret;
12516
12517         return bad_roots;
12518 }
12519
12520 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12521 {
12522         struct btrfs_trans_handle *trans;
12523         struct btrfs_block_group_cache *bg_cache;
12524         u64 current = 0;
12525         int ret = 0;
12526
12527         /* Clear all free space cache inodes and its extent data */
12528         while (1) {
12529                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12530                 if (!bg_cache)
12531                         break;
12532                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12533                 if (ret < 0)
12534                         return ret;
12535                 current = bg_cache->key.objectid + bg_cache->key.offset;
12536         }
12537
12538         /* Don't forget to set cache_generation to -1 */
12539         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12540         if (IS_ERR(trans)) {
12541                 error("failed to update super block cache generation");
12542                 return PTR_ERR(trans);
12543         }
12544         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12545         btrfs_commit_transaction(trans, fs_info->tree_root);
12546
12547         return ret;
12548 }
12549
12550 const char * const cmd_check_usage[] = {
12551         "btrfs check [options] <device>",
12552         "Check structural integrity of a filesystem (unmounted).",
12553         "Check structural integrity of an unmounted filesystem. Verify internal",
12554         "trees' consistency and item connectivity. In the repair mode try to",
12555         "fix the problems found. ",
12556         "WARNING: the repair mode is considered dangerous",
12557         "",
12558         "-s|--super <superblock>     use this superblock copy",
12559         "-b|--backup                 use the first valid backup root copy",
12560         "--repair                    try to repair the filesystem",
12561         "--readonly                  run in read-only mode (default)",
12562         "--init-csum-tree            create a new CRC tree",
12563         "--init-extent-tree          create a new extent tree",
12564         "--mode <MODE>               allows choice of memory/IO trade-offs",
12565         "                            where MODE is one of:",
12566         "                            original - read inodes and extents to memory (requires",
12567         "                                       more memory, does less IO)",
12568         "                            lowmem   - try to use less memory but read blocks again",
12569         "                                       when needed",
12570         "--check-data-csum           verify checksums of data blocks",
12571         "-Q|--qgroup-report          print a report on qgroup consistency",
12572         "-E|--subvol-extents <subvolid>",
12573         "                            print subvolume extents and sharing state",
12574         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12575         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12576         "-p|--progress               indicate progress",
12577         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12578         NULL
12579 };
12580
12581 int cmd_check(int argc, char **argv)
12582 {
12583         struct cache_tree root_cache;
12584         struct btrfs_root *root;
12585         struct btrfs_fs_info *info;
12586         u64 bytenr = 0;
12587         u64 subvolid = 0;
12588         u64 tree_root_bytenr = 0;
12589         u64 chunk_root_bytenr = 0;
12590         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12591         int ret;
12592         int err = 0;
12593         u64 num;
12594         int init_csum_tree = 0;
12595         int readonly = 0;
12596         int clear_space_cache = 0;
12597         int qgroup_report = 0;
12598         int qgroups_repaired = 0;
12599         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12600
12601         while(1) {
12602                 int c;
12603                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12604                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12605                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12606                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12607                 static const struct option long_options[] = {
12608                         { "super", required_argument, NULL, 's' },
12609                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12610                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12611                         { "init-csum-tree", no_argument, NULL,
12612                                 GETOPT_VAL_INIT_CSUM },
12613                         { "init-extent-tree", no_argument, NULL,
12614                                 GETOPT_VAL_INIT_EXTENT },
12615                         { "check-data-csum", no_argument, NULL,
12616                                 GETOPT_VAL_CHECK_CSUM },
12617                         { "backup", no_argument, NULL, 'b' },
12618                         { "subvol-extents", required_argument, NULL, 'E' },
12619                         { "qgroup-report", no_argument, NULL, 'Q' },
12620                         { "tree-root", required_argument, NULL, 'r' },
12621                         { "chunk-root", required_argument, NULL,
12622                                 GETOPT_VAL_CHUNK_TREE },
12623                         { "progress", no_argument, NULL, 'p' },
12624                         { "mode", required_argument, NULL,
12625                                 GETOPT_VAL_MODE },
12626                         { "clear-space-cache", required_argument, NULL,
12627                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12628                         { NULL, 0, NULL, 0}
12629                 };
12630
12631                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12632                 if (c < 0)
12633                         break;
12634                 switch(c) {
12635                         case 'a': /* ignored */ break;
12636                         case 'b':
12637                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12638                                 break;
12639                         case 's':
12640                                 num = arg_strtou64(optarg);
12641                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12642                                         error(
12643                                         "super mirror should be less than %d",
12644                                                 BTRFS_SUPER_MIRROR_MAX);
12645                                         exit(1);
12646                                 }
12647                                 bytenr = btrfs_sb_offset(((int)num));
12648                                 printf("using SB copy %llu, bytenr %llu\n", num,
12649                                        (unsigned long long)bytenr);
12650                                 break;
12651                         case 'Q':
12652                                 qgroup_report = 1;
12653                                 break;
12654                         case 'E':
12655                                 subvolid = arg_strtou64(optarg);
12656                                 break;
12657                         case 'r':
12658                                 tree_root_bytenr = arg_strtou64(optarg);
12659                                 break;
12660                         case GETOPT_VAL_CHUNK_TREE:
12661                                 chunk_root_bytenr = arg_strtou64(optarg);
12662                                 break;
12663                         case 'p':
12664                                 ctx.progress_enabled = true;
12665                                 break;
12666                         case '?':
12667                         case 'h':
12668                                 usage(cmd_check_usage);
12669                         case GETOPT_VAL_REPAIR:
12670                                 printf("enabling repair mode\n");
12671                                 repair = 1;
12672                                 ctree_flags |= OPEN_CTREE_WRITES;
12673                                 break;
12674                         case GETOPT_VAL_READONLY:
12675                                 readonly = 1;
12676                                 break;
12677                         case GETOPT_VAL_INIT_CSUM:
12678                                 printf("Creating a new CRC tree\n");
12679                                 init_csum_tree = 1;
12680                                 repair = 1;
12681                                 ctree_flags |= OPEN_CTREE_WRITES;
12682                                 break;
12683                         case GETOPT_VAL_INIT_EXTENT:
12684                                 init_extent_tree = 1;
12685                                 ctree_flags |= (OPEN_CTREE_WRITES |
12686                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12687                                 repair = 1;
12688                                 break;
12689                         case GETOPT_VAL_CHECK_CSUM:
12690                                 check_data_csum = 1;
12691                                 break;
12692                         case GETOPT_VAL_MODE:
12693                                 check_mode = parse_check_mode(optarg);
12694                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12695                                         error("unknown mode: %s", optarg);
12696                                         exit(1);
12697                                 }
12698                                 break;
12699                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12700                                 if (strcmp(optarg, "v1") == 0) {
12701                                         clear_space_cache = 1;
12702                                 } else if (strcmp(optarg, "v2") == 0) {
12703                                         clear_space_cache = 2;
12704                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12705                                 } else {
12706                                         error(
12707                 "invalid argument to --clear-space-cache, must be v1 or v2");
12708                                         exit(1);
12709                                 }
12710                                 ctree_flags |= OPEN_CTREE_WRITES;
12711                                 break;
12712                 }
12713         }
12714
12715         if (check_argc_exact(argc - optind, 1))
12716                 usage(cmd_check_usage);
12717
12718         if (ctx.progress_enabled) {
12719                 ctx.tp = TASK_NOTHING;
12720                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12721         }
12722
12723         /* This check is the only reason for --readonly to exist */
12724         if (readonly && repair) {
12725                 error("repair options are not compatible with --readonly");
12726                 exit(1);
12727         }
12728
12729         /*
12730          * Not supported yet
12731          */
12732         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12733                 error("low memory mode doesn't support repair yet");
12734                 exit(1);
12735         }
12736
12737         radix_tree_init();
12738         cache_tree_init(&root_cache);
12739
12740         if((ret = check_mounted(argv[optind])) < 0) {
12741                 error("could not check mount status: %s", strerror(-ret));
12742                 err |= !!ret;
12743                 goto err_out;
12744         } else if(ret) {
12745                 error("%s is currently mounted, aborting", argv[optind]);
12746                 ret = -EBUSY;
12747                 err |= !!ret;
12748                 goto err_out;
12749         }
12750
12751         /* only allow partial opening under repair mode */
12752         if (repair)
12753                 ctree_flags |= OPEN_CTREE_PARTIAL;
12754
12755         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12756                                   chunk_root_bytenr, ctree_flags);
12757         if (!info) {
12758                 error("cannot open file system");
12759                 ret = -EIO;
12760                 err |= !!ret;
12761                 goto err_out;
12762         }
12763
12764         global_info = info;
12765         root = info->fs_root;
12766         if (clear_space_cache == 1) {
12767                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12768                         error(
12769                 "free space cache v2 detected, use --clear-space-cache v2");
12770                         ret = 1;
12771                         goto close_out;
12772                 }
12773                 printf("Clearing free space cache\n");
12774                 ret = clear_free_space_cache(info);
12775                 if (ret) {
12776                         error("failed to clear free space cache");
12777                         ret = 1;
12778                 } else {
12779                         printf("Free space cache cleared\n");
12780                 }
12781                 goto close_out;
12782         } else if (clear_space_cache == 2) {
12783                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12784                         printf("no free space cache v2 to clear\n");
12785                         ret = 0;
12786                         goto close_out;
12787                 }
12788                 printf("Clear free space cache v2\n");
12789                 ret = btrfs_clear_free_space_tree(info);
12790                 if (ret) {
12791                         error("failed to clear free space cache v2: %d", ret);
12792                         ret = 1;
12793                 } else {
12794                         printf("free space cache v2 cleared\n");
12795                 }
12796                 goto close_out;
12797         }
12798
12799         /*
12800          * repair mode will force us to commit transaction which
12801          * will make us fail to load log tree when mounting.
12802          */
12803         if (repair && btrfs_super_log_root(info->super_copy)) {
12804                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12805                 if (!ret) {
12806                         ret = 1;
12807                         err |= !!ret;
12808                         goto close_out;
12809                 }
12810                 ret = zero_log_tree(root);
12811                 err |= !!ret;
12812                 if (ret) {
12813                         error("failed to zero log tree: %d", ret);
12814                         goto close_out;
12815                 }
12816         }
12817
12818         uuid_unparse(info->super_copy->fsid, uuidbuf);
12819         if (qgroup_report) {
12820                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12821                        uuidbuf);
12822                 ret = qgroup_verify_all(info);
12823                 err |= !!ret;
12824                 if (ret == 0)
12825                         report_qgroups(1);
12826                 goto close_out;
12827         }
12828         if (subvolid) {
12829                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12830                        subvolid, argv[optind], uuidbuf);
12831                 ret = print_extent_state(info, subvolid);
12832                 err |= !!ret;
12833                 goto close_out;
12834         }
12835         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12836
12837         if (!extent_buffer_uptodate(info->tree_root->node) ||
12838             !extent_buffer_uptodate(info->dev_root->node) ||
12839             !extent_buffer_uptodate(info->chunk_root->node)) {
12840                 error("critical roots corrupted, unable to check the filesystem");
12841                 err |= !!ret;
12842                 ret = -EIO;
12843                 goto close_out;
12844         }
12845
12846         if (init_extent_tree || init_csum_tree) {
12847                 struct btrfs_trans_handle *trans;
12848
12849                 trans = btrfs_start_transaction(info->extent_root, 0);
12850                 if (IS_ERR(trans)) {
12851                         error("error starting transaction");
12852                         ret = PTR_ERR(trans);
12853                         err |= !!ret;
12854                         goto close_out;
12855                 }
12856
12857                 if (init_extent_tree) {
12858                         printf("Creating a new extent tree\n");
12859                         ret = reinit_extent_tree(trans, info);
12860                         err |= !!ret;
12861                         if (ret)
12862                                 goto close_out;
12863                 }
12864
12865                 if (init_csum_tree) {
12866                         printf("Reinitialize checksum tree\n");
12867                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12868                         if (ret) {
12869                                 error("checksum tree initialization failed: %d",
12870                                                 ret);
12871                                 ret = -EIO;
12872                                 err |= !!ret;
12873                                 goto close_out;
12874                         }
12875
12876                         ret = fill_csum_tree(trans, info->csum_root,
12877                                              init_extent_tree);
12878                         err |= !!ret;
12879                         if (ret) {
12880                                 error("checksum tree refilling failed: %d", ret);
12881                                 return -EIO;
12882                         }
12883                 }
12884                 /*
12885                  * Ok now we commit and run the normal fsck, which will add
12886                  * extent entries for all of the items it finds.
12887                  */
12888                 ret = btrfs_commit_transaction(trans, info->extent_root);
12889                 err |= !!ret;
12890                 if (ret)
12891                         goto close_out;
12892         }
12893         if (!extent_buffer_uptodate(info->extent_root->node)) {
12894                 error("critical: extent_root, unable to check the filesystem");
12895                 ret = -EIO;
12896                 err |= !!ret;
12897                 goto close_out;
12898         }
12899         if (!extent_buffer_uptodate(info->csum_root->node)) {
12900                 error("critical: csum_root, unable to check the filesystem");
12901                 ret = -EIO;
12902                 err |= !!ret;
12903                 goto close_out;
12904         }
12905
12906         if (!ctx.progress_enabled)
12907                 fprintf(stderr, "checking extents\n");
12908         if (check_mode == CHECK_MODE_LOWMEM)
12909                 ret = check_chunks_and_extents_v2(root);
12910         else
12911                 ret = check_chunks_and_extents(root);
12912         err |= !!ret;
12913         if (ret)
12914                 error(
12915                 "errors found in extent allocation tree or chunk allocation");
12916
12917         ret = repair_root_items(info);
12918         err |= !!ret;
12919         if (ret < 0)
12920                 goto close_out;
12921         if (repair) {
12922                 fprintf(stderr, "Fixed %d roots.\n", ret);
12923                 ret = 0;
12924         } else if (ret > 0) {
12925                 fprintf(stderr,
12926                        "Found %d roots with an outdated root item.\n",
12927                        ret);
12928                 fprintf(stderr,
12929                         "Please run a filesystem check with the option --repair to fix them.\n");
12930                 ret = 1;
12931                 err |= !!ret;
12932                 goto close_out;
12933         }
12934
12935         if (!ctx.progress_enabled) {
12936                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12937                         fprintf(stderr, "checking free space tree\n");
12938                 else
12939                         fprintf(stderr, "checking free space cache\n");
12940         }
12941         ret = check_space_cache(root);
12942         err |= !!ret;
12943         if (ret)
12944                 goto out;
12945
12946         /*
12947          * We used to have to have these hole extents in between our real
12948          * extents so if we don't have this flag set we need to make sure there
12949          * are no gaps in the file extents for inodes, otherwise we can just
12950          * ignore it when this happens.
12951          */
12952         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12953         if (!ctx.progress_enabled)
12954                 fprintf(stderr, "checking fs roots\n");
12955         if (check_mode == CHECK_MODE_LOWMEM)
12956                 ret = check_fs_roots_v2(root->fs_info);
12957         else
12958                 ret = check_fs_roots(root, &root_cache);
12959         err |= !!ret;
12960         if (ret)
12961                 goto out;
12962
12963         fprintf(stderr, "checking csums\n");
12964         ret = check_csums(root);
12965         err |= !!ret;
12966         if (ret)
12967                 goto out;
12968
12969         fprintf(stderr, "checking root refs\n");
12970         /* For low memory mode, check_fs_roots_v2 handles root refs */
12971         if (check_mode != CHECK_MODE_LOWMEM) {
12972                 ret = check_root_refs(root, &root_cache);
12973                 err |= !!ret;
12974                 if (ret)
12975                         goto out;
12976         }
12977
12978         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12979                 struct extent_buffer *eb;
12980
12981                 eb = list_first_entry(&root->fs_info->recow_ebs,
12982                                       struct extent_buffer, recow);
12983                 list_del_init(&eb->recow);
12984                 ret = recow_extent_buffer(root, eb);
12985                 err |= !!ret;
12986                 if (ret)
12987                         break;
12988         }
12989
12990         while (!list_empty(&delete_items)) {
12991                 struct bad_item *bad;
12992
12993                 bad = list_first_entry(&delete_items, struct bad_item, list);
12994                 list_del_init(&bad->list);
12995                 if (repair) {
12996                         ret = delete_bad_item(root, bad);
12997                         err |= !!ret;
12998                 }
12999                 free(bad);
13000         }
13001
13002         if (info->quota_enabled) {
13003                 fprintf(stderr, "checking quota groups\n");
13004                 ret = qgroup_verify_all(info);
13005                 err |= !!ret;
13006                 if (ret)
13007                         goto out;
13008                 report_qgroups(0);
13009                 ret = repair_qgroups(info, &qgroups_repaired);
13010                 err |= !!ret;
13011                 if (err)
13012                         goto out;
13013                 ret = 0;
13014         }
13015
13016         if (!list_empty(&root->fs_info->recow_ebs)) {
13017                 error("transid errors in file system");
13018                 ret = 1;
13019                 err |= !!ret;
13020         }
13021 out:
13022         if (found_old_backref) { /*
13023                  * there was a disk format change when mixed
13024                  * backref was in testing tree. The old format
13025                  * existed about one week.
13026                  */
13027                 printf("\n * Found old mixed backref format. "
13028                        "The old format is not supported! *"
13029                        "\n * Please mount the FS in readonly mode, "
13030                        "backup data and re-format the FS. *\n\n");
13031                 err |= 1;
13032         }
13033         printf("found %llu bytes used err is %d\n",
13034                (unsigned long long)bytes_used, ret);
13035         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13036         printf("total tree bytes: %llu\n",
13037                (unsigned long long)total_btree_bytes);
13038         printf("total fs tree bytes: %llu\n",
13039                (unsigned long long)total_fs_tree_bytes);
13040         printf("total extent tree bytes: %llu\n",
13041                (unsigned long long)total_extent_tree_bytes);
13042         printf("btree space waste bytes: %llu\n",
13043                (unsigned long long)btree_space_waste);
13044         printf("file data blocks allocated: %llu\n referenced %llu\n",
13045                 (unsigned long long)data_bytes_allocated,
13046                 (unsigned long long)data_bytes_referenced);
13047
13048         free_qgroup_counts();
13049         free_root_recs_tree(&root_cache);
13050 close_out:
13051         close_ctree(root);
13052 err_out:
13053         if (ctx.progress_enabled)
13054                 task_deinit(ctx.info);
13055
13056         return err;
13057 }