btrfs-progs: check: skip shared node or leaf check for low_memory mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         int level_size;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize, root->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct btrfs_root *root,
1481                             struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(root, eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         int root_level = btrfs_header_level(root->node);
1879         int i;
1880         int ret = 0; /* Final return value */
1881         int err = 0; /* Positive error bitmap */
1882
1883         cur_bytenr = cur->start;
1884
1885         /* skip to first inode item in this leaf */
1886         nritems = btrfs_header_nritems(cur);
1887         for (i = 0; i < nritems; i++) {
1888                 btrfs_item_key_to_cpu(cur, &key, i);
1889                 if (key.type == BTRFS_INODE_ITEM_KEY)
1890                         break;
1891         }
1892         if (i == nritems) {
1893                 path->slots[0] = nritems;
1894                 return 0;
1895         }
1896         path->slots[0] = i;
1897
1898 again:
1899         err |= check_inode_item(root, path, ext_ref);
1900
1901         if (err & LAST_ITEM)
1902                 goto out;
1903
1904         /* still have inode items in thie leaf */
1905         if (cur->start == cur_bytenr)
1906                 goto again;
1907
1908         /*
1909          * we have switched to another leaf, above nodes may
1910          * have changed, here walk down the path, if a node
1911          * or leaf is shared, check whether we can skip this
1912          * node or leaf.
1913          */
1914         for (i = root_level; i >= 0; i--) {
1915                 if (path->nodes[i]->start == nrefs->bytenr[i])
1916                         continue;
1917
1918                 ret = update_nodes_refs(root,
1919                                 path->nodes[i]->start,
1920                                 nrefs, i);
1921                 if (ret)
1922                         goto out;
1923
1924                 if (!nrefs->need_check[i]) {
1925                         *level += 1;
1926                         break;
1927                 }
1928         }
1929
1930         for (i = 0; i < *level; i++) {
1931                 free_extent_buffer(path->nodes[i]);
1932                 path->nodes[i] = NULL;
1933         }
1934 out:
1935         err &= ~LAST_ITEM;
1936         /*
1937          * Convert any error bitmap to -EIO, as we should avoid
1938          * mixing positive and negative return value to represent
1939          * error
1940          */
1941         if (err && !ret)
1942                 ret = -EIO;
1943         return ret;
1944 }
1945
1946 static void reada_walk_down(struct btrfs_root *root,
1947                             struct extent_buffer *node, int slot)
1948 {
1949         u64 bytenr;
1950         u64 ptr_gen;
1951         u32 nritems;
1952         u32 blocksize;
1953         int i;
1954         int level;
1955
1956         level = btrfs_header_level(node);
1957         if (level != 1)
1958                 return;
1959
1960         nritems = btrfs_header_nritems(node);
1961         blocksize = root->nodesize;
1962         for (i = slot; i < nritems; i++) {
1963                 bytenr = btrfs_node_blockptr(node, i);
1964                 ptr_gen = btrfs_node_ptr_generation(node, i);
1965                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1966         }
1967 }
1968
1969 /*
1970  * Check the child node/leaf by the following condition:
1971  * 1. the first item key of the node/leaf should be the same with the one
1972  *    in parent.
1973  * 2. block in parent node should match the child node/leaf.
1974  * 3. generation of parent node and child's header should be consistent.
1975  *
1976  * Or the child node/leaf pointed by the key in parent is not valid.
1977  *
1978  * We hope to check leaf owner too, but since subvol may share leaves,
1979  * which makes leaf owner check not so strong, key check should be
1980  * sufficient enough for that case.
1981  */
1982 static int check_child_node(struct btrfs_root *root,
1983                             struct extent_buffer *parent, int slot,
1984                             struct extent_buffer *child)
1985 {
1986         struct btrfs_key parent_key;
1987         struct btrfs_key child_key;
1988         int ret = 0;
1989
1990         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1991         if (btrfs_header_level(child) == 0)
1992                 btrfs_item_key_to_cpu(child, &child_key, 0);
1993         else
1994                 btrfs_node_key_to_cpu(child, &child_key, 0);
1995
1996         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1997                 ret = -EINVAL;
1998                 fprintf(stderr,
1999                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2000                         parent_key.objectid, parent_key.type, parent_key.offset,
2001                         child_key.objectid, child_key.type, child_key.offset);
2002         }
2003         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2004                 ret = -EINVAL;
2005                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2006                         btrfs_node_blockptr(parent, slot),
2007                         btrfs_header_bytenr(child));
2008         }
2009         if (btrfs_node_ptr_generation(parent, slot) !=
2010             btrfs_header_generation(child)) {
2011                 ret = -EINVAL;
2012                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2013                         btrfs_header_generation(child),
2014                         btrfs_node_ptr_generation(parent, slot));
2015         }
2016         return ret;
2017 }
2018
2019 /*
2020  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2021  * in every fs or file tree check. Here we find its all root ids, and only check
2022  * it in the fs or file tree which has the smallest root id.
2023  */
2024 static int need_check(struct btrfs_root *root, struct ulist *roots)
2025 {
2026         struct rb_node *node;
2027         struct ulist_node *u;
2028
2029         if (roots->nnodes == 1)
2030                 return 1;
2031
2032         node = rb_first(&roots->root);
2033         u = rb_entry(node, struct ulist_node, rb_node);
2034         /*
2035          * current root id is not smallest, we skip it and let it be checked
2036          * in the fs or file tree who hash the smallest root id.
2037          */
2038         if (root->objectid != u->val)
2039                 return 0;
2040
2041         return 1;
2042 }
2043
2044 /*
2045  * for a tree node or leaf, we record its reference count, so later if we still
2046  * process this node or leaf, don't need to compute its reference count again.
2047  */
2048 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2049                              struct node_refs *nrefs, u64 level)
2050 {
2051         int check, ret;
2052         u64 refs;
2053         struct ulist *roots;
2054
2055         if (nrefs->bytenr[level] != bytenr) {
2056                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2057                                        level, 1, &refs, NULL);
2058                 if (ret < 0)
2059                         return ret;
2060
2061                 nrefs->bytenr[level] = bytenr;
2062                 nrefs->refs[level] = refs;
2063                 if (refs > 1) {
2064                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2065                                                    0, &roots);
2066                         if (ret)
2067                                 return -EIO;
2068
2069                         check = need_check(root, roots);
2070                         ulist_free(roots);
2071                         nrefs->need_check[level] = check;
2072                 } else {
2073                         nrefs->need_check[level] = 1;
2074                 }
2075         }
2076
2077         return 0;
2078 }
2079
2080 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2081                           struct walk_control *wc, int *level,
2082                           struct node_refs *nrefs)
2083 {
2084         enum btrfs_tree_block_status status;
2085         u64 bytenr;
2086         u64 ptr_gen;
2087         struct extent_buffer *next;
2088         struct extent_buffer *cur;
2089         u32 blocksize;
2090         int ret, err = 0;
2091         u64 refs;
2092
2093         WARN_ON(*level < 0);
2094         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2095
2096         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2097                 refs = nrefs->refs[*level];
2098                 ret = 0;
2099         } else {
2100                 ret = btrfs_lookup_extent_info(NULL, root,
2101                                        path->nodes[*level]->start,
2102                                        *level, 1, &refs, NULL);
2103                 if (ret < 0) {
2104                         err = ret;
2105                         goto out;
2106                 }
2107                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2108                 nrefs->refs[*level] = refs;
2109         }
2110
2111         if (refs > 1) {
2112                 ret = enter_shared_node(root, path->nodes[*level]->start,
2113                                         refs, wc, *level);
2114                 if (ret > 0) {
2115                         err = ret;
2116                         goto out;
2117                 }
2118         }
2119
2120         while (*level >= 0) {
2121                 WARN_ON(*level < 0);
2122                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2123                 cur = path->nodes[*level];
2124
2125                 if (btrfs_header_level(cur) != *level)
2126                         WARN_ON(1);
2127
2128                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2129                         break;
2130                 if (*level == 0) {
2131                         ret = process_one_leaf(root, cur, wc);
2132                         if (ret < 0)
2133                                 err = ret;
2134                         break;
2135                 }
2136                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2137                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2138                 blocksize = root->nodesize;
2139
2140                 if (bytenr == nrefs->bytenr[*level - 1]) {
2141                         refs = nrefs->refs[*level - 1];
2142                 } else {
2143                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2144                                         *level - 1, 1, &refs, NULL);
2145                         if (ret < 0) {
2146                                 refs = 0;
2147                         } else {
2148                                 nrefs->bytenr[*level - 1] = bytenr;
2149                                 nrefs->refs[*level - 1] = refs;
2150                         }
2151                 }
2152
2153                 if (refs > 1) {
2154                         ret = enter_shared_node(root, bytenr, refs,
2155                                                 wc, *level - 1);
2156                         if (ret > 0) {
2157                                 path->slots[*level]++;
2158                                 continue;
2159                         }
2160                 }
2161
2162                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2163                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2164                         free_extent_buffer(next);
2165                         reada_walk_down(root, cur, path->slots[*level]);
2166                         next = read_tree_block(root, bytenr, blocksize,
2167                                                ptr_gen);
2168                         if (!extent_buffer_uptodate(next)) {
2169                                 struct btrfs_key node_key;
2170
2171                                 btrfs_node_key_to_cpu(path->nodes[*level],
2172                                                       &node_key,
2173                                                       path->slots[*level]);
2174                                 btrfs_add_corrupt_extent_record(root->fs_info,
2175                                                 &node_key,
2176                                                 path->nodes[*level]->start,
2177                                                 root->nodesize, *level);
2178                                 err = -EIO;
2179                                 goto out;
2180                         }
2181                 }
2182
2183                 ret = check_child_node(root, cur, path->slots[*level], next);
2184                 if (ret) {
2185                         err = ret;
2186                         goto out;
2187                 }
2188
2189                 if (btrfs_is_leaf(next))
2190                         status = btrfs_check_leaf(root, NULL, next);
2191                 else
2192                         status = btrfs_check_node(root, NULL, next);
2193                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2194                         free_extent_buffer(next);
2195                         err = -EIO;
2196                         goto out;
2197                 }
2198
2199                 *level = *level - 1;
2200                 free_extent_buffer(path->nodes[*level]);
2201                 path->nodes[*level] = next;
2202                 path->slots[*level] = 0;
2203         }
2204 out:
2205         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2206         return err;
2207 }
2208
2209 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2210                             unsigned int ext_ref);
2211
2212 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2213                              int *level, struct node_refs *nrefs, int ext_ref)
2214 {
2215         enum btrfs_tree_block_status status;
2216         u64 bytenr;
2217         u64 ptr_gen;
2218         struct extent_buffer *next;
2219         struct extent_buffer *cur;
2220         u32 blocksize;
2221         int ret;
2222
2223         WARN_ON(*level < 0);
2224         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2225
2226         ret = update_nodes_refs(root, path->nodes[*level]->start,
2227                                 nrefs, *level);
2228         if (ret < 0)
2229                 return ret;
2230
2231         while (*level >= 0) {
2232                 WARN_ON(*level < 0);
2233                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2234                 cur = path->nodes[*level];
2235
2236                 if (btrfs_header_level(cur) != *level)
2237                         WARN_ON(1);
2238
2239                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2240                         break;
2241                 /* Don't forgot to check leaf/node validation */
2242                 if (*level == 0) {
2243                         ret = btrfs_check_leaf(root, NULL, cur);
2244                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2245                                 ret = -EIO;
2246                                 break;
2247                         }
2248                         ret = process_one_leaf_v2(root, path, nrefs,
2249                                                   level, ext_ref);
2250                         break;
2251                 } else {
2252                         ret = btrfs_check_node(root, NULL, cur);
2253                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2254                                 ret = -EIO;
2255                                 break;
2256                         }
2257                 }
2258                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2259                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2260                 blocksize = root->nodesize;
2261
2262                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2263                 if (ret)
2264                         break;
2265                 if (!nrefs->need_check[*level - 1]) {
2266                         path->slots[*level]++;
2267                         continue;
2268                 }
2269
2270                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2271                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2272                         free_extent_buffer(next);
2273                         reada_walk_down(root, cur, path->slots[*level]);
2274                         next = read_tree_block(root, bytenr, blocksize,
2275                                                ptr_gen);
2276                         if (!extent_buffer_uptodate(next)) {
2277                                 struct btrfs_key node_key;
2278
2279                                 btrfs_node_key_to_cpu(path->nodes[*level],
2280                                                       &node_key,
2281                                                       path->slots[*level]);
2282                                 btrfs_add_corrupt_extent_record(root->fs_info,
2283                                                 &node_key,
2284                                                 path->nodes[*level]->start,
2285                                                 root->nodesize, *level);
2286                                 ret = -EIO;
2287                                 break;
2288                         }
2289                 }
2290
2291                 ret = check_child_node(root, cur, path->slots[*level], next);
2292                 if (ret < 0) 
2293                         break;
2294
2295                 if (btrfs_is_leaf(next))
2296                         status = btrfs_check_leaf(root, NULL, next);
2297                 else
2298                         status = btrfs_check_node(root, NULL, next);
2299                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2300                         free_extent_buffer(next);
2301                         ret = -EIO;
2302                         break;
2303                 }
2304
2305                 *level = *level - 1;
2306                 free_extent_buffer(path->nodes[*level]);
2307                 path->nodes[*level] = next;
2308                 path->slots[*level] = 0;
2309         }
2310         return ret;
2311 }
2312
2313 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2314                         struct walk_control *wc, int *level)
2315 {
2316         int i;
2317         struct extent_buffer *leaf;
2318
2319         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2320                 leaf = path->nodes[i];
2321                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2322                         path->slots[i]++;
2323                         *level = i;
2324                         return 0;
2325                 } else {
2326                         free_extent_buffer(path->nodes[*level]);
2327                         path->nodes[*level] = NULL;
2328                         BUG_ON(*level > wc->active_node);
2329                         if (*level == wc->active_node)
2330                                 leave_shared_node(root, wc, *level);
2331                         *level = i + 1;
2332                 }
2333         }
2334         return 1;
2335 }
2336
2337 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2338                            int *level)
2339 {
2340         int i;
2341         struct extent_buffer *leaf;
2342
2343         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2344                 leaf = path->nodes[i];
2345                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2346                         path->slots[i]++;
2347                         *level = i;
2348                         return 0;
2349                 } else {
2350                         free_extent_buffer(path->nodes[*level]);
2351                         path->nodes[*level] = NULL;
2352                         *level = i + 1;
2353                 }
2354         }
2355         return 1;
2356 }
2357
2358 static int check_root_dir(struct inode_record *rec)
2359 {
2360         struct inode_backref *backref;
2361         int ret = -1;
2362
2363         if (!rec->found_inode_item || rec->errors)
2364                 goto out;
2365         if (rec->nlink != 1 || rec->found_link != 0)
2366                 goto out;
2367         if (list_empty(&rec->backrefs))
2368                 goto out;
2369         backref = to_inode_backref(rec->backrefs.next);
2370         if (!backref->found_inode_ref)
2371                 goto out;
2372         if (backref->index != 0 || backref->namelen != 2 ||
2373             memcmp(backref->name, "..", 2))
2374                 goto out;
2375         if (backref->found_dir_index || backref->found_dir_item)
2376                 goto out;
2377         ret = 0;
2378 out:
2379         return ret;
2380 }
2381
2382 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2383                               struct btrfs_root *root, struct btrfs_path *path,
2384                               struct inode_record *rec)
2385 {
2386         struct btrfs_inode_item *ei;
2387         struct btrfs_key key;
2388         int ret;
2389
2390         key.objectid = rec->ino;
2391         key.type = BTRFS_INODE_ITEM_KEY;
2392         key.offset = (u64)-1;
2393
2394         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2395         if (ret < 0)
2396                 goto out;
2397         if (ret) {
2398                 if (!path->slots[0]) {
2399                         ret = -ENOENT;
2400                         goto out;
2401                 }
2402                 path->slots[0]--;
2403                 ret = 0;
2404         }
2405         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2406         if (key.objectid != rec->ino) {
2407                 ret = -ENOENT;
2408                 goto out;
2409         }
2410
2411         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2412                             struct btrfs_inode_item);
2413         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2414         btrfs_mark_buffer_dirty(path->nodes[0]);
2415         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2416         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2417                root->root_key.objectid);
2418 out:
2419         btrfs_release_path(path);
2420         return ret;
2421 }
2422
2423 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2424                                     struct btrfs_root *root,
2425                                     struct btrfs_path *path,
2426                                     struct inode_record *rec)
2427 {
2428         int ret;
2429
2430         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2431         btrfs_release_path(path);
2432         if (!ret)
2433                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2434         return ret;
2435 }
2436
2437 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2438                                struct btrfs_root *root,
2439                                struct btrfs_path *path,
2440                                struct inode_record *rec)
2441 {
2442         struct btrfs_inode_item *ei;
2443         struct btrfs_key key;
2444         int ret = 0;
2445
2446         key.objectid = rec->ino;
2447         key.type = BTRFS_INODE_ITEM_KEY;
2448         key.offset = 0;
2449
2450         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2451         if (ret) {
2452                 if (ret > 0)
2453                         ret = -ENOENT;
2454                 goto out;
2455         }
2456
2457         /* Since ret == 0, no need to check anything */
2458         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2459                             struct btrfs_inode_item);
2460         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2461         btrfs_mark_buffer_dirty(path->nodes[0]);
2462         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2463         printf("reset nbytes for ino %llu root %llu\n",
2464                rec->ino, root->root_key.objectid);
2465 out:
2466         btrfs_release_path(path);
2467         return ret;
2468 }
2469
2470 static int add_missing_dir_index(struct btrfs_root *root,
2471                                  struct cache_tree *inode_cache,
2472                                  struct inode_record *rec,
2473                                  struct inode_backref *backref)
2474 {
2475         struct btrfs_path path;
2476         struct btrfs_trans_handle *trans;
2477         struct btrfs_dir_item *dir_item;
2478         struct extent_buffer *leaf;
2479         struct btrfs_key key;
2480         struct btrfs_disk_key disk_key;
2481         struct inode_record *dir_rec;
2482         unsigned long name_ptr;
2483         u32 data_size = sizeof(*dir_item) + backref->namelen;
2484         int ret;
2485
2486         trans = btrfs_start_transaction(root, 1);
2487         if (IS_ERR(trans))
2488                 return PTR_ERR(trans);
2489
2490         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2491                 (unsigned long long)rec->ino);
2492
2493         btrfs_init_path(&path);
2494         key.objectid = backref->dir;
2495         key.type = BTRFS_DIR_INDEX_KEY;
2496         key.offset = backref->index;
2497         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2498         BUG_ON(ret);
2499
2500         leaf = path.nodes[0];
2501         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2502
2503         disk_key.objectid = cpu_to_le64(rec->ino);
2504         disk_key.type = BTRFS_INODE_ITEM_KEY;
2505         disk_key.offset = 0;
2506
2507         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2508         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2509         btrfs_set_dir_data_len(leaf, dir_item, 0);
2510         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2511         name_ptr = (unsigned long)(dir_item + 1);
2512         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2513         btrfs_mark_buffer_dirty(leaf);
2514         btrfs_release_path(&path);
2515         btrfs_commit_transaction(trans, root);
2516
2517         backref->found_dir_index = 1;
2518         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2519         BUG_ON(IS_ERR(dir_rec));
2520         if (!dir_rec)
2521                 return 0;
2522         dir_rec->found_size += backref->namelen;
2523         if (dir_rec->found_size == dir_rec->isize &&
2524             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2525                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2526         if (dir_rec->found_size != dir_rec->isize)
2527                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2528
2529         return 0;
2530 }
2531
2532 static int delete_dir_index(struct btrfs_root *root,
2533                             struct cache_tree *inode_cache,
2534                             struct inode_record *rec,
2535                             struct inode_backref *backref)
2536 {
2537         struct btrfs_trans_handle *trans;
2538         struct btrfs_dir_item *di;
2539         struct btrfs_path path;
2540         int ret = 0;
2541
2542         trans = btrfs_start_transaction(root, 1);
2543         if (IS_ERR(trans))
2544                 return PTR_ERR(trans);
2545
2546         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2547                 (unsigned long long)backref->dir,
2548                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2549                 (unsigned long long)root->objectid);
2550
2551         btrfs_init_path(&path);
2552         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2553                                     backref->name, backref->namelen,
2554                                     backref->index, -1);
2555         if (IS_ERR(di)) {
2556                 ret = PTR_ERR(di);
2557                 btrfs_release_path(&path);
2558                 btrfs_commit_transaction(trans, root);
2559                 if (ret == -ENOENT)
2560                         return 0;
2561                 return ret;
2562         }
2563
2564         if (!di)
2565                 ret = btrfs_del_item(trans, root, &path);
2566         else
2567                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2568         BUG_ON(ret);
2569         btrfs_release_path(&path);
2570         btrfs_commit_transaction(trans, root);
2571         return ret;
2572 }
2573
2574 static int create_inode_item(struct btrfs_root *root,
2575                              struct inode_record *rec,
2576                              struct inode_backref *backref, int root_dir)
2577 {
2578         struct btrfs_trans_handle *trans;
2579         struct btrfs_inode_item inode_item;
2580         time_t now = time(NULL);
2581         int ret;
2582
2583         trans = btrfs_start_transaction(root, 1);
2584         if (IS_ERR(trans)) {
2585                 ret = PTR_ERR(trans);
2586                 return ret;
2587         }
2588
2589         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2590                 "be incomplete, please check permissions and content after "
2591                 "the fsck completes.\n", (unsigned long long)root->objectid,
2592                 (unsigned long long)rec->ino);
2593
2594         memset(&inode_item, 0, sizeof(inode_item));
2595         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2596         if (root_dir)
2597                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2598         else
2599                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2600         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2601         if (rec->found_dir_item) {
2602                 if (rec->found_file_extent)
2603                         fprintf(stderr, "root %llu inode %llu has both a dir "
2604                                 "item and extents, unsure if it is a dir or a "
2605                                 "regular file so setting it as a directory\n",
2606                                 (unsigned long long)root->objectid,
2607                                 (unsigned long long)rec->ino);
2608                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2609                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2610         } else if (!rec->found_dir_item) {
2611                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2612                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2613         }
2614         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2615         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2616         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2617         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2618         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2619         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2620         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2621         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2622
2623         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2624         BUG_ON(ret);
2625         btrfs_commit_transaction(trans, root);
2626         return 0;
2627 }
2628
2629 static int repair_inode_backrefs(struct btrfs_root *root,
2630                                  struct inode_record *rec,
2631                                  struct cache_tree *inode_cache,
2632                                  int delete)
2633 {
2634         struct inode_backref *tmp, *backref;
2635         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2636         int ret = 0;
2637         int repaired = 0;
2638
2639         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2640                 if (!delete && rec->ino == root_dirid) {
2641                         if (!rec->found_inode_item) {
2642                                 ret = create_inode_item(root, rec, backref, 1);
2643                                 if (ret)
2644                                         break;
2645                                 repaired++;
2646                         }
2647                 }
2648
2649                 /* Index 0 for root dir's are special, don't mess with it */
2650                 if (rec->ino == root_dirid && backref->index == 0)
2651                         continue;
2652
2653                 if (delete &&
2654                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2655                      (backref->found_dir_index && backref->found_inode_ref &&
2656                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2657                         ret = delete_dir_index(root, inode_cache, rec, backref);
2658                         if (ret)
2659                                 break;
2660                         repaired++;
2661                         list_del(&backref->list);
2662                         free(backref);
2663                 }
2664
2665                 if (!delete && !backref->found_dir_index &&
2666                     backref->found_dir_item && backref->found_inode_ref) {
2667                         ret = add_missing_dir_index(root, inode_cache, rec,
2668                                                     backref);
2669                         if (ret)
2670                                 break;
2671                         repaired++;
2672                         if (backref->found_dir_item &&
2673                             backref->found_dir_index &&
2674                             backref->found_dir_index) {
2675                                 if (!backref->errors &&
2676                                     backref->found_inode_ref) {
2677                                         list_del(&backref->list);
2678                                         free(backref);
2679                                 }
2680                         }
2681                 }
2682
2683                 if (!delete && (!backref->found_dir_index &&
2684                                 !backref->found_dir_item &&
2685                                 backref->found_inode_ref)) {
2686                         struct btrfs_trans_handle *trans;
2687                         struct btrfs_key location;
2688
2689                         ret = check_dir_conflict(root, backref->name,
2690                                                  backref->namelen,
2691                                                  backref->dir,
2692                                                  backref->index);
2693                         if (ret) {
2694                                 /*
2695                                  * let nlink fixing routine to handle it,
2696                                  * which can do it better.
2697                                  */
2698                                 ret = 0;
2699                                 break;
2700                         }
2701                         location.objectid = rec->ino;
2702                         location.type = BTRFS_INODE_ITEM_KEY;
2703                         location.offset = 0;
2704
2705                         trans = btrfs_start_transaction(root, 1);
2706                         if (IS_ERR(trans)) {
2707                                 ret = PTR_ERR(trans);
2708                                 break;
2709                         }
2710                         fprintf(stderr, "adding missing dir index/item pair "
2711                                 "for inode %llu\n",
2712                                 (unsigned long long)rec->ino);
2713                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2714                                                     backref->namelen,
2715                                                     backref->dir, &location,
2716                                                     imode_to_type(rec->imode),
2717                                                     backref->index);
2718                         BUG_ON(ret);
2719                         btrfs_commit_transaction(trans, root);
2720                         repaired++;
2721                 }
2722
2723                 if (!delete && (backref->found_inode_ref &&
2724                                 backref->found_dir_index &&
2725                                 backref->found_dir_item &&
2726                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2727                                 !rec->found_inode_item)) {
2728                         ret = create_inode_item(root, rec, backref, 0);
2729                         if (ret)
2730                                 break;
2731                         repaired++;
2732                 }
2733
2734         }
2735         return ret ? ret : repaired;
2736 }
2737
2738 /*
2739  * To determine the file type for nlink/inode_item repair
2740  *
2741  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2742  * Return -ENOENT if file type is not found.
2743  */
2744 static int find_file_type(struct inode_record *rec, u8 *type)
2745 {
2746         struct inode_backref *backref;
2747
2748         /* For inode item recovered case */
2749         if (rec->found_inode_item) {
2750                 *type = imode_to_type(rec->imode);
2751                 return 0;
2752         }
2753
2754         list_for_each_entry(backref, &rec->backrefs, list) {
2755                 if (backref->found_dir_index || backref->found_dir_item) {
2756                         *type = backref->filetype;
2757                         return 0;
2758                 }
2759         }
2760         return -ENOENT;
2761 }
2762
2763 /*
2764  * To determine the file name for nlink repair
2765  *
2766  * Return 0 if file name is found, set name and namelen.
2767  * Return -ENOENT if file name is not found.
2768  */
2769 static int find_file_name(struct inode_record *rec,
2770                           char *name, int *namelen)
2771 {
2772         struct inode_backref *backref;
2773
2774         list_for_each_entry(backref, &rec->backrefs, list) {
2775                 if (backref->found_dir_index || backref->found_dir_item ||
2776                     backref->found_inode_ref) {
2777                         memcpy(name, backref->name, backref->namelen);
2778                         *namelen = backref->namelen;
2779                         return 0;
2780                 }
2781         }
2782         return -ENOENT;
2783 }
2784
2785 /* Reset the nlink of the inode to the correct one */
2786 static int reset_nlink(struct btrfs_trans_handle *trans,
2787                        struct btrfs_root *root,
2788                        struct btrfs_path *path,
2789                        struct inode_record *rec)
2790 {
2791         struct inode_backref *backref;
2792         struct inode_backref *tmp;
2793         struct btrfs_key key;
2794         struct btrfs_inode_item *inode_item;
2795         int ret = 0;
2796
2797         /* We don't believe this either, reset it and iterate backref */
2798         rec->found_link = 0;
2799
2800         /* Remove all backref including the valid ones */
2801         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2802                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2803                                    backref->index, backref->name,
2804                                    backref->namelen, 0);
2805                 if (ret < 0)
2806                         goto out;
2807
2808                 /* remove invalid backref, so it won't be added back */
2809                 if (!(backref->found_dir_index &&
2810                       backref->found_dir_item &&
2811                       backref->found_inode_ref)) {
2812                         list_del(&backref->list);
2813                         free(backref);
2814                 } else {
2815                         rec->found_link++;
2816                 }
2817         }
2818
2819         /* Set nlink to 0 */
2820         key.objectid = rec->ino;
2821         key.type = BTRFS_INODE_ITEM_KEY;
2822         key.offset = 0;
2823         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2824         if (ret < 0)
2825                 goto out;
2826         if (ret > 0) {
2827                 ret = -ENOENT;
2828                 goto out;
2829         }
2830         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2831                                     struct btrfs_inode_item);
2832         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2833         btrfs_mark_buffer_dirty(path->nodes[0]);
2834         btrfs_release_path(path);
2835
2836         /*
2837          * Add back valid inode_ref/dir_item/dir_index,
2838          * add_link() will handle the nlink inc, so new nlink must be correct
2839          */
2840         list_for_each_entry(backref, &rec->backrefs, list) {
2841                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2842                                      backref->name, backref->namelen,
2843                                      backref->filetype, &backref->index, 1);
2844                 if (ret < 0)
2845                         goto out;
2846         }
2847 out:
2848         btrfs_release_path(path);
2849         return ret;
2850 }
2851
2852 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2853                                struct btrfs_root *root,
2854                                struct btrfs_path *path,
2855                                struct inode_record *rec)
2856 {
2857         char *dir_name = "lost+found";
2858         char namebuf[BTRFS_NAME_LEN] = {0};
2859         u64 lost_found_ino;
2860         u32 mode = 0700;
2861         u8 type = 0;
2862         int namelen = 0;
2863         int name_recovered = 0;
2864         int type_recovered = 0;
2865         int ret = 0;
2866
2867         /*
2868          * Get file name and type first before these invalid inode ref
2869          * are deleted by remove_all_invalid_backref()
2870          */
2871         name_recovered = !find_file_name(rec, namebuf, &namelen);
2872         type_recovered = !find_file_type(rec, &type);
2873
2874         if (!name_recovered) {
2875                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2876                        rec->ino, rec->ino);
2877                 namelen = count_digits(rec->ino);
2878                 sprintf(namebuf, "%llu", rec->ino);
2879                 name_recovered = 1;
2880         }
2881         if (!type_recovered) {
2882                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2883                        rec->ino);
2884                 type = BTRFS_FT_REG_FILE;
2885                 type_recovered = 1;
2886         }
2887
2888         ret = reset_nlink(trans, root, path, rec);
2889         if (ret < 0) {
2890                 fprintf(stderr,
2891                         "Failed to reset nlink for inode %llu: %s\n",
2892                         rec->ino, strerror(-ret));
2893                 goto out;
2894         }
2895
2896         if (rec->found_link == 0) {
2897                 lost_found_ino = root->highest_inode;
2898                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2899                         ret = -EOVERFLOW;
2900                         goto out;
2901                 }
2902                 lost_found_ino++;
2903                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2904                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2905                                   mode);
2906                 if (ret < 0) {
2907                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2908                                 dir_name, strerror(-ret));
2909                         goto out;
2910                 }
2911                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2912                                      namebuf, namelen, type, NULL, 1);
2913                 /*
2914                  * Add ".INO" suffix several times to handle case where
2915                  * "FILENAME.INO" is already taken by another file.
2916                  */
2917                 while (ret == -EEXIST) {
2918                         /*
2919                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2920                          */
2921                         if (namelen + count_digits(rec->ino) + 1 >
2922                             BTRFS_NAME_LEN) {
2923                                 ret = -EFBIG;
2924                                 goto out;
2925                         }
2926                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2927                                  ".%llu", rec->ino);
2928                         namelen += count_digits(rec->ino) + 1;
2929                         ret = btrfs_add_link(trans, root, rec->ino,
2930                                              lost_found_ino, namebuf,
2931                                              namelen, type, NULL, 1);
2932                 }
2933                 if (ret < 0) {
2934                         fprintf(stderr,
2935                                 "Failed to link the inode %llu to %s dir: %s\n",
2936                                 rec->ino, dir_name, strerror(-ret));
2937                         goto out;
2938                 }
2939                 /*
2940                  * Just increase the found_link, don't actually add the
2941                  * backref. This will make things easier and this inode
2942                  * record will be freed after the repair is done.
2943                  * So fsck will not report problem about this inode.
2944                  */
2945                 rec->found_link++;
2946                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2947                        namelen, namebuf, dir_name);
2948         }
2949         printf("Fixed the nlink of inode %llu\n", rec->ino);
2950 out:
2951         /*
2952          * Clear the flag anyway, or we will loop forever for the same inode
2953          * as it will not be removed from the bad inode list and the dead loop
2954          * happens.
2955          */
2956         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2957         btrfs_release_path(path);
2958         return ret;
2959 }
2960
2961 /*
2962  * Check if there is any normal(reg or prealloc) file extent for given
2963  * ino.
2964  * This is used to determine the file type when neither its dir_index/item or
2965  * inode_item exists.
2966  *
2967  * This will *NOT* report error, if any error happens, just consider it does
2968  * not have any normal file extent.
2969  */
2970 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2971 {
2972         struct btrfs_path path;
2973         struct btrfs_key key;
2974         struct btrfs_key found_key;
2975         struct btrfs_file_extent_item *fi;
2976         u8 type;
2977         int ret = 0;
2978
2979         btrfs_init_path(&path);
2980         key.objectid = ino;
2981         key.type = BTRFS_EXTENT_DATA_KEY;
2982         key.offset = 0;
2983
2984         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2985         if (ret < 0) {
2986                 ret = 0;
2987                 goto out;
2988         }
2989         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2990                 ret = btrfs_next_leaf(root, &path);
2991                 if (ret) {
2992                         ret = 0;
2993                         goto out;
2994                 }
2995         }
2996         while (1) {
2997                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2998                                       path.slots[0]);
2999                 if (found_key.objectid != ino ||
3000                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3001                         break;
3002                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3003                                     struct btrfs_file_extent_item);
3004                 type = btrfs_file_extent_type(path.nodes[0], fi);
3005                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3006                         ret = 1;
3007                         goto out;
3008                 }
3009         }
3010 out:
3011         btrfs_release_path(&path);
3012         return ret;
3013 }
3014
3015 static u32 btrfs_type_to_imode(u8 type)
3016 {
3017         static u32 imode_by_btrfs_type[] = {
3018                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3019                 [BTRFS_FT_DIR]          = S_IFDIR,
3020                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3021                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3022                 [BTRFS_FT_FIFO]         = S_IFIFO,
3023                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3024                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3025         };
3026
3027         return imode_by_btrfs_type[(type)];
3028 }
3029
3030 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3031                                 struct btrfs_root *root,
3032                                 struct btrfs_path *path,
3033                                 struct inode_record *rec)
3034 {
3035         u8 filetype;
3036         u32 mode = 0700;
3037         int type_recovered = 0;
3038         int ret = 0;
3039
3040         printf("Trying to rebuild inode:%llu\n", rec->ino);
3041
3042         type_recovered = !find_file_type(rec, &filetype);
3043
3044         /*
3045          * Try to determine inode type if type not found.
3046          *
3047          * For found regular file extent, it must be FILE.
3048          * For found dir_item/index, it must be DIR.
3049          *
3050          * For undetermined one, use FILE as fallback.
3051          *
3052          * TODO:
3053          * 1. If found backref(inode_index/item is already handled) to it,
3054          *    it must be DIR.
3055          *    Need new inode-inode ref structure to allow search for that.
3056          */
3057         if (!type_recovered) {
3058                 if (rec->found_file_extent &&
3059                     find_normal_file_extent(root, rec->ino)) {
3060                         type_recovered = 1;
3061                         filetype = BTRFS_FT_REG_FILE;
3062                 } else if (rec->found_dir_item) {
3063                         type_recovered = 1;
3064                         filetype = BTRFS_FT_DIR;
3065                 } else if (!list_empty(&rec->orphan_extents)) {
3066                         type_recovered = 1;
3067                         filetype = BTRFS_FT_REG_FILE;
3068                 } else{
3069                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3070                                rec->ino);
3071                         type_recovered = 1;
3072                         filetype = BTRFS_FT_REG_FILE;
3073                 }
3074         }
3075
3076         ret = btrfs_new_inode(trans, root, rec->ino,
3077                               mode | btrfs_type_to_imode(filetype));
3078         if (ret < 0)
3079                 goto out;
3080
3081         /*
3082          * Here inode rebuild is done, we only rebuild the inode item,
3083          * don't repair the nlink(like move to lost+found).
3084          * That is the job of nlink repair.
3085          *
3086          * We just fill the record and return
3087          */
3088         rec->found_dir_item = 1;
3089         rec->imode = mode | btrfs_type_to_imode(filetype);
3090         rec->nlink = 0;
3091         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3092         /* Ensure the inode_nlinks repair function will be called */
3093         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3094 out:
3095         return ret;
3096 }
3097
3098 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3099                                       struct btrfs_root *root,
3100                                       struct btrfs_path *path,
3101                                       struct inode_record *rec)
3102 {
3103         struct orphan_data_extent *orphan;
3104         struct orphan_data_extent *tmp;
3105         int ret = 0;
3106
3107         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3108                 /*
3109                  * Check for conflicting file extents
3110                  *
3111                  * Here we don't know whether the extents is compressed or not,
3112                  * so we can only assume it not compressed nor data offset,
3113                  * and use its disk_len as extent length.
3114                  */
3115                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3116                                        orphan->offset, orphan->disk_len, 0);
3117                 btrfs_release_path(path);
3118                 if (ret < 0)
3119                         goto out;
3120                 if (!ret) {
3121                         fprintf(stderr,
3122                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3123                                 orphan->disk_bytenr, orphan->disk_len);
3124                         ret = btrfs_free_extent(trans,
3125                                         root->fs_info->extent_root,
3126                                         orphan->disk_bytenr, orphan->disk_len,
3127                                         0, root->objectid, orphan->objectid,
3128                                         orphan->offset);
3129                         if (ret < 0)
3130                                 goto out;
3131                 }
3132                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3133                                 orphan->offset, orphan->disk_bytenr,
3134                                 orphan->disk_len, orphan->disk_len);
3135                 if (ret < 0)
3136                         goto out;
3137
3138                 /* Update file size info */
3139                 rec->found_size += orphan->disk_len;
3140                 if (rec->found_size == rec->nbytes)
3141                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3142
3143                 /* Update the file extent hole info too */
3144                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3145                                            orphan->disk_len);
3146                 if (ret < 0)
3147                         goto out;
3148                 if (RB_EMPTY_ROOT(&rec->holes))
3149                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3150
3151                 list_del(&orphan->list);
3152                 free(orphan);
3153         }
3154         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3155 out:
3156         return ret;
3157 }
3158
3159 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3160                                         struct btrfs_root *root,
3161                                         struct btrfs_path *path,
3162                                         struct inode_record *rec)
3163 {
3164         struct rb_node *node;
3165         struct file_extent_hole *hole;
3166         int found = 0;
3167         int ret = 0;
3168
3169         node = rb_first(&rec->holes);
3170
3171         while (node) {
3172                 found = 1;
3173                 hole = rb_entry(node, struct file_extent_hole, node);
3174                 ret = btrfs_punch_hole(trans, root, rec->ino,
3175                                        hole->start, hole->len);
3176                 if (ret < 0)
3177                         goto out;
3178                 ret = del_file_extent_hole(&rec->holes, hole->start,
3179                                            hole->len);
3180                 if (ret < 0)
3181                         goto out;
3182                 if (RB_EMPTY_ROOT(&rec->holes))
3183                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3184                 node = rb_first(&rec->holes);
3185         }
3186         /* special case for a file losing all its file extent */
3187         if (!found) {
3188                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3189                                        round_up(rec->isize, root->sectorsize));
3190                 if (ret < 0)
3191                         goto out;
3192         }
3193         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3194                rec->ino, root->objectid);
3195 out:
3196         return ret;
3197 }
3198
3199 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3200 {
3201         struct btrfs_trans_handle *trans;
3202         struct btrfs_path path;
3203         int ret = 0;
3204
3205         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3206                              I_ERR_NO_ORPHAN_ITEM |
3207                              I_ERR_LINK_COUNT_WRONG |
3208                              I_ERR_NO_INODE_ITEM |
3209                              I_ERR_FILE_EXTENT_ORPHAN |
3210                              I_ERR_FILE_EXTENT_DISCOUNT|
3211                              I_ERR_FILE_NBYTES_WRONG)))
3212                 return rec->errors;
3213
3214         /*
3215          * For nlink repair, it may create a dir and add link, so
3216          * 2 for parent(256)'s dir_index and dir_item
3217          * 2 for lost+found dir's inode_item and inode_ref
3218          * 1 for the new inode_ref of the file
3219          * 2 for lost+found dir's dir_index and dir_item for the file
3220          */
3221         trans = btrfs_start_transaction(root, 7);
3222         if (IS_ERR(trans))
3223                 return PTR_ERR(trans);
3224
3225         btrfs_init_path(&path);
3226         if (rec->errors & I_ERR_NO_INODE_ITEM)
3227                 ret = repair_inode_no_item(trans, root, &path, rec);
3228         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3229                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3230         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3231                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3232         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3233                 ret = repair_inode_isize(trans, root, &path, rec);
3234         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3235                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3236         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3237                 ret = repair_inode_nlinks(trans, root, &path, rec);
3238         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3239                 ret = repair_inode_nbytes(trans, root, &path, rec);
3240         btrfs_commit_transaction(trans, root);
3241         btrfs_release_path(&path);
3242         return ret;
3243 }
3244
3245 static int check_inode_recs(struct btrfs_root *root,
3246                             struct cache_tree *inode_cache)
3247 {
3248         struct cache_extent *cache;
3249         struct ptr_node *node;
3250         struct inode_record *rec;
3251         struct inode_backref *backref;
3252         int stage = 0;
3253         int ret = 0;
3254         int err = 0;
3255         u64 error = 0;
3256         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3257
3258         if (btrfs_root_refs(&root->root_item) == 0) {
3259                 if (!cache_tree_empty(inode_cache))
3260                         fprintf(stderr, "warning line %d\n", __LINE__);
3261                 return 0;
3262         }
3263
3264         /*
3265          * We need to record the highest inode number for later 'lost+found'
3266          * dir creation.
3267          * We must select an ino not used/referred by any existing inode, or
3268          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3269          * this may cause 'lost+found' dir has wrong nlinks.
3270          */
3271         cache = last_cache_extent(inode_cache);
3272         if (cache) {
3273                 node = container_of(cache, struct ptr_node, cache);
3274                 rec = node->data;
3275                 if (rec->ino > root->highest_inode)
3276                         root->highest_inode = rec->ino;
3277         }
3278
3279         /*
3280          * We need to repair backrefs first because we could change some of the
3281          * errors in the inode recs.
3282          *
3283          * We also need to go through and delete invalid backrefs first and then
3284          * add the correct ones second.  We do this because we may get EEXIST
3285          * when adding back the correct index because we hadn't yet deleted the
3286          * invalid index.
3287          *
3288          * For example, if we were missing a dir index then the directories
3289          * isize would be wrong, so if we fixed the isize to what we thought it
3290          * would be and then fixed the backref we'd still have a invalid fs, so
3291          * we need to add back the dir index and then check to see if the isize
3292          * is still wrong.
3293          */
3294         while (stage < 3) {
3295                 stage++;
3296                 if (stage == 3 && !err)
3297                         break;
3298
3299                 cache = search_cache_extent(inode_cache, 0);
3300                 while (repair && cache) {
3301                         node = container_of(cache, struct ptr_node, cache);
3302                         rec = node->data;
3303                         cache = next_cache_extent(cache);
3304
3305                         /* Need to free everything up and rescan */
3306                         if (stage == 3) {
3307                                 remove_cache_extent(inode_cache, &node->cache);
3308                                 free(node);
3309                                 free_inode_rec(rec);
3310                                 continue;
3311                         }
3312
3313                         if (list_empty(&rec->backrefs))
3314                                 continue;
3315
3316                         ret = repair_inode_backrefs(root, rec, inode_cache,
3317                                                     stage == 1);
3318                         if (ret < 0) {
3319                                 err = ret;
3320                                 stage = 2;
3321                                 break;
3322                         } if (ret > 0) {
3323                                 err = -EAGAIN;
3324                         }
3325                 }
3326         }
3327         if (err)
3328                 return err;
3329
3330         rec = get_inode_rec(inode_cache, root_dirid, 0);
3331         BUG_ON(IS_ERR(rec));
3332         if (rec) {
3333                 ret = check_root_dir(rec);
3334                 if (ret) {
3335                         fprintf(stderr, "root %llu root dir %llu error\n",
3336                                 (unsigned long long)root->root_key.objectid,
3337                                 (unsigned long long)root_dirid);
3338                         print_inode_error(root, rec);
3339                         error++;
3340                 }
3341         } else {
3342                 if (repair) {
3343                         struct btrfs_trans_handle *trans;
3344
3345                         trans = btrfs_start_transaction(root, 1);
3346                         if (IS_ERR(trans)) {
3347                                 err = PTR_ERR(trans);
3348                                 return err;
3349                         }
3350
3351                         fprintf(stderr,
3352                                 "root %llu missing its root dir, recreating\n",
3353                                 (unsigned long long)root->objectid);
3354
3355                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3356                         BUG_ON(ret);
3357
3358                         btrfs_commit_transaction(trans, root);
3359                         return -EAGAIN;
3360                 }
3361
3362                 fprintf(stderr, "root %llu root dir %llu not found\n",
3363                         (unsigned long long)root->root_key.objectid,
3364                         (unsigned long long)root_dirid);
3365         }
3366
3367         while (1) {
3368                 cache = search_cache_extent(inode_cache, 0);
3369                 if (!cache)
3370                         break;
3371                 node = container_of(cache, struct ptr_node, cache);
3372                 rec = node->data;
3373                 remove_cache_extent(inode_cache, &node->cache);
3374                 free(node);
3375                 if (rec->ino == root_dirid ||
3376                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3377                         free_inode_rec(rec);
3378                         continue;
3379                 }
3380
3381                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3382                         ret = check_orphan_item(root, rec->ino);
3383                         if (ret == 0)
3384                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3385                         if (can_free_inode_rec(rec)) {
3386                                 free_inode_rec(rec);
3387                                 continue;
3388                         }
3389                 }
3390
3391                 if (!rec->found_inode_item)
3392                         rec->errors |= I_ERR_NO_INODE_ITEM;
3393                 if (rec->found_link != rec->nlink)
3394                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3395                 if (repair) {
3396                         ret = try_repair_inode(root, rec);
3397                         if (ret == 0 && can_free_inode_rec(rec)) {
3398                                 free_inode_rec(rec);
3399                                 continue;
3400                         }
3401                         ret = 0;
3402                 }
3403
3404                 if (!(repair && ret == 0))
3405                         error++;
3406                 print_inode_error(root, rec);
3407                 list_for_each_entry(backref, &rec->backrefs, list) {
3408                         if (!backref->found_dir_item)
3409                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3410                         if (!backref->found_dir_index)
3411                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3412                         if (!backref->found_inode_ref)
3413                                 backref->errors |= REF_ERR_NO_INODE_REF;
3414                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3415                                 " namelen %u name %s filetype %d errors %x",
3416                                 (unsigned long long)backref->dir,
3417                                 (unsigned long long)backref->index,
3418                                 backref->namelen, backref->name,
3419                                 backref->filetype, backref->errors);
3420                         print_ref_error(backref->errors);
3421                 }
3422                 free_inode_rec(rec);
3423         }
3424         return (error > 0) ? -1 : 0;
3425 }
3426
3427 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3428                                         u64 objectid)
3429 {
3430         struct cache_extent *cache;
3431         struct root_record *rec = NULL;
3432         int ret;
3433
3434         cache = lookup_cache_extent(root_cache, objectid, 1);
3435         if (cache) {
3436                 rec = container_of(cache, struct root_record, cache);
3437         } else {
3438                 rec = calloc(1, sizeof(*rec));
3439                 if (!rec)
3440                         return ERR_PTR(-ENOMEM);
3441                 rec->objectid = objectid;
3442                 INIT_LIST_HEAD(&rec->backrefs);
3443                 rec->cache.start = objectid;
3444                 rec->cache.size = 1;
3445
3446                 ret = insert_cache_extent(root_cache, &rec->cache);
3447                 if (ret)
3448                         return ERR_PTR(-EEXIST);
3449         }
3450         return rec;
3451 }
3452
3453 static struct root_backref *get_root_backref(struct root_record *rec,
3454                                              u64 ref_root, u64 dir, u64 index,
3455                                              const char *name, int namelen)
3456 {
3457         struct root_backref *backref;
3458
3459         list_for_each_entry(backref, &rec->backrefs, list) {
3460                 if (backref->ref_root != ref_root || backref->dir != dir ||
3461                     backref->namelen != namelen)
3462                         continue;
3463                 if (memcmp(name, backref->name, namelen))
3464                         continue;
3465                 return backref;
3466         }
3467
3468         backref = calloc(1, sizeof(*backref) + namelen + 1);
3469         if (!backref)
3470                 return NULL;
3471         backref->ref_root = ref_root;
3472         backref->dir = dir;
3473         backref->index = index;
3474         backref->namelen = namelen;
3475         memcpy(backref->name, name, namelen);
3476         backref->name[namelen] = '\0';
3477         list_add_tail(&backref->list, &rec->backrefs);
3478         return backref;
3479 }
3480
3481 static void free_root_record(struct cache_extent *cache)
3482 {
3483         struct root_record *rec;
3484         struct root_backref *backref;
3485
3486         rec = container_of(cache, struct root_record, cache);
3487         while (!list_empty(&rec->backrefs)) {
3488                 backref = to_root_backref(rec->backrefs.next);
3489                 list_del(&backref->list);
3490                 free(backref);
3491         }
3492
3493         free(rec);
3494 }
3495
3496 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3497
3498 static int add_root_backref(struct cache_tree *root_cache,
3499                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3500                             const char *name, int namelen,
3501                             int item_type, int errors)
3502 {
3503         struct root_record *rec;
3504         struct root_backref *backref;
3505
3506         rec = get_root_rec(root_cache, root_id);
3507         BUG_ON(IS_ERR(rec));
3508         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3509         BUG_ON(!backref);
3510
3511         backref->errors |= errors;
3512
3513         if (item_type != BTRFS_DIR_ITEM_KEY) {
3514                 if (backref->found_dir_index || backref->found_back_ref ||
3515                     backref->found_forward_ref) {
3516                         if (backref->index != index)
3517                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3518                 } else {
3519                         backref->index = index;
3520                 }
3521         }
3522
3523         if (item_type == BTRFS_DIR_ITEM_KEY) {
3524                 if (backref->found_forward_ref)
3525                         rec->found_ref++;
3526                 backref->found_dir_item = 1;
3527         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3528                 backref->found_dir_index = 1;
3529         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3530                 if (backref->found_forward_ref)
3531                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3532                 else if (backref->found_dir_item)
3533                         rec->found_ref++;
3534                 backref->found_forward_ref = 1;
3535         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3536                 if (backref->found_back_ref)
3537                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3538                 backref->found_back_ref = 1;
3539         } else {
3540                 BUG_ON(1);
3541         }
3542
3543         if (backref->found_forward_ref && backref->found_dir_item)
3544                 backref->reachable = 1;
3545         return 0;
3546 }
3547
3548 static int merge_root_recs(struct btrfs_root *root,
3549                            struct cache_tree *src_cache,
3550                            struct cache_tree *dst_cache)
3551 {
3552         struct cache_extent *cache;
3553         struct ptr_node *node;
3554         struct inode_record *rec;
3555         struct inode_backref *backref;
3556         int ret = 0;
3557
3558         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3559                 free_inode_recs_tree(src_cache);
3560                 return 0;
3561         }
3562
3563         while (1) {
3564                 cache = search_cache_extent(src_cache, 0);
3565                 if (!cache)
3566                         break;
3567                 node = container_of(cache, struct ptr_node, cache);
3568                 rec = node->data;
3569                 remove_cache_extent(src_cache, &node->cache);
3570                 free(node);
3571
3572                 ret = is_child_root(root, root->objectid, rec->ino);
3573                 if (ret < 0)
3574                         break;
3575                 else if (ret == 0)
3576                         goto skip;
3577
3578                 list_for_each_entry(backref, &rec->backrefs, list) {
3579                         BUG_ON(backref->found_inode_ref);
3580                         if (backref->found_dir_item)
3581                                 add_root_backref(dst_cache, rec->ino,
3582                                         root->root_key.objectid, backref->dir,
3583                                         backref->index, backref->name,
3584                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3585                                         backref->errors);
3586                         if (backref->found_dir_index)
3587                                 add_root_backref(dst_cache, rec->ino,
3588                                         root->root_key.objectid, backref->dir,
3589                                         backref->index, backref->name,
3590                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3591                                         backref->errors);
3592                 }
3593 skip:
3594                 free_inode_rec(rec);
3595         }
3596         if (ret < 0)
3597                 return ret;
3598         return 0;
3599 }
3600
3601 static int check_root_refs(struct btrfs_root *root,
3602                            struct cache_tree *root_cache)
3603 {
3604         struct root_record *rec;
3605         struct root_record *ref_root;
3606         struct root_backref *backref;
3607         struct cache_extent *cache;
3608         int loop = 1;
3609         int ret;
3610         int error;
3611         int errors = 0;
3612
3613         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3614         BUG_ON(IS_ERR(rec));
3615         rec->found_ref = 1;
3616
3617         /* fixme: this can not detect circular references */
3618         while (loop) {
3619                 loop = 0;
3620                 cache = search_cache_extent(root_cache, 0);
3621                 while (1) {
3622                         if (!cache)
3623                                 break;
3624                         rec = container_of(cache, struct root_record, cache);
3625                         cache = next_cache_extent(cache);
3626
3627                         if (rec->found_ref == 0)
3628                                 continue;
3629
3630                         list_for_each_entry(backref, &rec->backrefs, list) {
3631                                 if (!backref->reachable)
3632                                         continue;
3633
3634                                 ref_root = get_root_rec(root_cache,
3635                                                         backref->ref_root);
3636                                 BUG_ON(IS_ERR(ref_root));
3637                                 if (ref_root->found_ref > 0)
3638                                         continue;
3639
3640                                 backref->reachable = 0;
3641                                 rec->found_ref--;
3642                                 if (rec->found_ref == 0)
3643                                         loop = 1;
3644                         }
3645                 }
3646         }
3647
3648         cache = search_cache_extent(root_cache, 0);
3649         while (1) {
3650                 if (!cache)
3651                         break;
3652                 rec = container_of(cache, struct root_record, cache);
3653                 cache = next_cache_extent(cache);
3654
3655                 if (rec->found_ref == 0 &&
3656                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3657                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3658                         ret = check_orphan_item(root->fs_info->tree_root,
3659                                                 rec->objectid);
3660                         if (ret == 0)
3661                                 continue;
3662
3663                         /*
3664                          * If we don't have a root item then we likely just have
3665                          * a dir item in a snapshot for this root but no actual
3666                          * ref key or anything so it's meaningless.
3667                          */
3668                         if (!rec->found_root_item)
3669                                 continue;
3670                         errors++;
3671                         fprintf(stderr, "fs tree %llu not referenced\n",
3672                                 (unsigned long long)rec->objectid);
3673                 }
3674
3675                 error = 0;
3676                 if (rec->found_ref > 0 && !rec->found_root_item)
3677                         error = 1;
3678                 list_for_each_entry(backref, &rec->backrefs, list) {
3679                         if (!backref->found_dir_item)
3680                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3681                         if (!backref->found_dir_index)
3682                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3683                         if (!backref->found_back_ref)
3684                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3685                         if (!backref->found_forward_ref)
3686                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3687                         if (backref->reachable && backref->errors)
3688                                 error = 1;
3689                 }
3690                 if (!error)
3691                         continue;
3692
3693                 errors++;
3694                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3695                         (unsigned long long)rec->objectid, rec->found_ref,
3696                          rec->found_root_item ? "" : "not found");
3697
3698                 list_for_each_entry(backref, &rec->backrefs, list) {
3699                         if (!backref->reachable)
3700                                 continue;
3701                         if (!backref->errors && rec->found_root_item)
3702                                 continue;
3703                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3704                                 " index %llu namelen %u name %s errors %x\n",
3705                                 (unsigned long long)backref->ref_root,
3706                                 (unsigned long long)backref->dir,
3707                                 (unsigned long long)backref->index,
3708                                 backref->namelen, backref->name,
3709                                 backref->errors);
3710                         print_ref_error(backref->errors);
3711                 }
3712         }
3713         return errors > 0 ? 1 : 0;
3714 }
3715
3716 static int process_root_ref(struct extent_buffer *eb, int slot,
3717                             struct btrfs_key *key,
3718                             struct cache_tree *root_cache)
3719 {
3720         u64 dirid;
3721         u64 index;
3722         u32 len;
3723         u32 name_len;
3724         struct btrfs_root_ref *ref;
3725         char namebuf[BTRFS_NAME_LEN];
3726         int error;
3727
3728         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3729
3730         dirid = btrfs_root_ref_dirid(eb, ref);
3731         index = btrfs_root_ref_sequence(eb, ref);
3732         name_len = btrfs_root_ref_name_len(eb, ref);
3733
3734         if (name_len <= BTRFS_NAME_LEN) {
3735                 len = name_len;
3736                 error = 0;
3737         } else {
3738                 len = BTRFS_NAME_LEN;
3739                 error = REF_ERR_NAME_TOO_LONG;
3740         }
3741         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3742
3743         if (key->type == BTRFS_ROOT_REF_KEY) {
3744                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3745                                  index, namebuf, len, key->type, error);
3746         } else {
3747                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3748                                  index, namebuf, len, key->type, error);
3749         }
3750         return 0;
3751 }
3752
3753 static void free_corrupt_block(struct cache_extent *cache)
3754 {
3755         struct btrfs_corrupt_block *corrupt;
3756
3757         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3758         free(corrupt);
3759 }
3760
3761 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3762
3763 /*
3764  * Repair the btree of the given root.
3765  *
3766  * The fix is to remove the node key in corrupt_blocks cache_tree.
3767  * and rebalance the tree.
3768  * After the fix, the btree should be writeable.
3769  */
3770 static int repair_btree(struct btrfs_root *root,
3771                         struct cache_tree *corrupt_blocks)
3772 {
3773         struct btrfs_trans_handle *trans;
3774         struct btrfs_path path;
3775         struct btrfs_corrupt_block *corrupt;
3776         struct cache_extent *cache;
3777         struct btrfs_key key;
3778         u64 offset;
3779         int level;
3780         int ret = 0;
3781
3782         if (cache_tree_empty(corrupt_blocks))
3783                 return 0;
3784
3785         trans = btrfs_start_transaction(root, 1);
3786         if (IS_ERR(trans)) {
3787                 ret = PTR_ERR(trans);
3788                 fprintf(stderr, "Error starting transaction: %s\n",
3789                         strerror(-ret));
3790                 return ret;
3791         }
3792         btrfs_init_path(&path);
3793         cache = first_cache_extent(corrupt_blocks);
3794         while (cache) {
3795                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3796                                        cache);
3797                 level = corrupt->level;
3798                 path.lowest_level = level;
3799                 key.objectid = corrupt->key.objectid;
3800                 key.type = corrupt->key.type;
3801                 key.offset = corrupt->key.offset;
3802
3803                 /*
3804                  * Here we don't want to do any tree balance, since it may
3805                  * cause a balance with corrupted brother leaf/node,
3806                  * so ins_len set to 0 here.
3807                  * Balance will be done after all corrupt node/leaf is deleted.
3808                  */
3809                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3810                 if (ret < 0)
3811                         goto out;
3812                 offset = btrfs_node_blockptr(path.nodes[level],
3813                                              path.slots[level]);
3814
3815                 /* Remove the ptr */
3816                 ret = btrfs_del_ptr(trans, root, &path, level,
3817                                     path.slots[level]);
3818                 if (ret < 0)
3819                         goto out;
3820                 /*
3821                  * Remove the corresponding extent
3822                  * return value is not concerned.
3823                  */
3824                 btrfs_release_path(&path);
3825                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3826                                         0, root->root_key.objectid,
3827                                         level - 1, 0);
3828                 cache = next_cache_extent(cache);
3829         }
3830
3831         /* Balance the btree using btrfs_search_slot() */
3832         cache = first_cache_extent(corrupt_blocks);
3833         while (cache) {
3834                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3835                                        cache);
3836                 memcpy(&key, &corrupt->key, sizeof(key));
3837                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3838                 if (ret < 0)
3839                         goto out;
3840                 /* return will always >0 since it won't find the item */
3841                 ret = 0;
3842                 btrfs_release_path(&path);
3843                 cache = next_cache_extent(cache);
3844         }
3845 out:
3846         btrfs_commit_transaction(trans, root);
3847         btrfs_release_path(&path);
3848         return ret;
3849 }
3850
3851 static int check_fs_root(struct btrfs_root *root,
3852                          struct cache_tree *root_cache,
3853                          struct walk_control *wc)
3854 {
3855         int ret = 0;
3856         int err = 0;
3857         int wret;
3858         int level;
3859         struct btrfs_path path;
3860         struct shared_node root_node;
3861         struct root_record *rec;
3862         struct btrfs_root_item *root_item = &root->root_item;
3863         struct cache_tree corrupt_blocks;
3864         struct orphan_data_extent *orphan;
3865         struct orphan_data_extent *tmp;
3866         enum btrfs_tree_block_status status;
3867         struct node_refs nrefs;
3868
3869         /*
3870          * Reuse the corrupt_block cache tree to record corrupted tree block
3871          *
3872          * Unlike the usage in extent tree check, here we do it in a per
3873          * fs/subvol tree base.
3874          */
3875         cache_tree_init(&corrupt_blocks);
3876         root->fs_info->corrupt_blocks = &corrupt_blocks;
3877
3878         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3879                 rec = get_root_rec(root_cache, root->root_key.objectid);
3880                 BUG_ON(IS_ERR(rec));
3881                 if (btrfs_root_refs(root_item) > 0)
3882                         rec->found_root_item = 1;
3883         }
3884
3885         btrfs_init_path(&path);
3886         memset(&root_node, 0, sizeof(root_node));
3887         cache_tree_init(&root_node.root_cache);
3888         cache_tree_init(&root_node.inode_cache);
3889         memset(&nrefs, 0, sizeof(nrefs));
3890
3891         /* Move the orphan extent record to corresponding inode_record */
3892         list_for_each_entry_safe(orphan, tmp,
3893                                  &root->orphan_data_extents, list) {
3894                 struct inode_record *inode;
3895
3896                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3897                                       1);
3898                 BUG_ON(IS_ERR(inode));
3899                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3900                 list_move(&orphan->list, &inode->orphan_extents);
3901         }
3902
3903         level = btrfs_header_level(root->node);
3904         memset(wc->nodes, 0, sizeof(wc->nodes));
3905         wc->nodes[level] = &root_node;
3906         wc->active_node = level;
3907         wc->root_level = level;
3908
3909         /* We may not have checked the root block, lets do that now */
3910         if (btrfs_is_leaf(root->node))
3911                 status = btrfs_check_leaf(root, NULL, root->node);
3912         else
3913                 status = btrfs_check_node(root, NULL, root->node);
3914         if (status != BTRFS_TREE_BLOCK_CLEAN)
3915                 return -EIO;
3916
3917         if (btrfs_root_refs(root_item) > 0 ||
3918             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3919                 path.nodes[level] = root->node;
3920                 extent_buffer_get(root->node);
3921                 path.slots[level] = 0;
3922         } else {
3923                 struct btrfs_key key;
3924                 struct btrfs_disk_key found_key;
3925
3926                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3927                 level = root_item->drop_level;
3928                 path.lowest_level = level;
3929                 if (level > btrfs_header_level(root->node) ||
3930                     level >= BTRFS_MAX_LEVEL) {
3931                         error("ignoring invalid drop level: %u", level);
3932                         goto skip_walking;
3933                 }
3934                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3935                 if (wret < 0)
3936                         goto skip_walking;
3937                 btrfs_node_key(path.nodes[level], &found_key,
3938                                 path.slots[level]);
3939                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3940                                         sizeof(found_key)));
3941         }
3942
3943         while (1) {
3944                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3945                 if (wret < 0)
3946                         ret = wret;
3947                 if (wret != 0)
3948                         break;
3949
3950                 wret = walk_up_tree(root, &path, wc, &level);
3951                 if (wret < 0)
3952                         ret = wret;
3953                 if (wret != 0)
3954                         break;
3955         }
3956 skip_walking:
3957         btrfs_release_path(&path);
3958
3959         if (!cache_tree_empty(&corrupt_blocks)) {
3960                 struct cache_extent *cache;
3961                 struct btrfs_corrupt_block *corrupt;
3962
3963                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3964                        root->root_key.objectid);
3965                 cache = first_cache_extent(&corrupt_blocks);
3966                 while (cache) {
3967                         corrupt = container_of(cache,
3968                                                struct btrfs_corrupt_block,
3969                                                cache);
3970                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3971                                cache->start, corrupt->level,
3972                                corrupt->key.objectid, corrupt->key.type,
3973                                corrupt->key.offset);
3974                         cache = next_cache_extent(cache);
3975                 }
3976                 if (repair) {
3977                         printf("Try to repair the btree for root %llu\n",
3978                                root->root_key.objectid);
3979                         ret = repair_btree(root, &corrupt_blocks);
3980                         if (ret < 0)
3981                                 fprintf(stderr, "Failed to repair btree: %s\n",
3982                                         strerror(-ret));
3983                         if (!ret)
3984                                 printf("Btree for root %llu is fixed\n",
3985                                        root->root_key.objectid);
3986                 }
3987         }
3988
3989         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3990         if (err < 0)
3991                 ret = err;
3992
3993         if (root_node.current) {
3994                 root_node.current->checked = 1;
3995                 maybe_free_inode_rec(&root_node.inode_cache,
3996                                 root_node.current);
3997         }
3998
3999         err = check_inode_recs(root, &root_node.inode_cache);
4000         if (!ret)
4001                 ret = err;
4002
4003         free_corrupt_blocks_tree(&corrupt_blocks);
4004         root->fs_info->corrupt_blocks = NULL;
4005         free_orphan_data_extents(&root->orphan_data_extents);
4006         return ret;
4007 }
4008
4009 static int fs_root_objectid(u64 objectid)
4010 {
4011         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4012             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4013                 return 1;
4014         return is_fstree(objectid);
4015 }
4016
4017 static int check_fs_roots(struct btrfs_root *root,
4018                           struct cache_tree *root_cache)
4019 {
4020         struct btrfs_path path;
4021         struct btrfs_key key;
4022         struct walk_control wc;
4023         struct extent_buffer *leaf, *tree_node;
4024         struct btrfs_root *tmp_root;
4025         struct btrfs_root *tree_root = root->fs_info->tree_root;
4026         int ret;
4027         int err = 0;
4028
4029         if (ctx.progress_enabled) {
4030                 ctx.tp = TASK_FS_ROOTS;
4031                 task_start(ctx.info);
4032         }
4033
4034         /*
4035          * Just in case we made any changes to the extent tree that weren't
4036          * reflected into the free space cache yet.
4037          */
4038         if (repair)
4039                 reset_cached_block_groups(root->fs_info);
4040         memset(&wc, 0, sizeof(wc));
4041         cache_tree_init(&wc.shared);
4042         btrfs_init_path(&path);
4043
4044 again:
4045         key.offset = 0;
4046         key.objectid = 0;
4047         key.type = BTRFS_ROOT_ITEM_KEY;
4048         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4049         if (ret < 0) {
4050                 err = 1;
4051                 goto out;
4052         }
4053         tree_node = tree_root->node;
4054         while (1) {
4055                 if (tree_node != tree_root->node) {
4056                         free_root_recs_tree(root_cache);
4057                         btrfs_release_path(&path);
4058                         goto again;
4059                 }
4060                 leaf = path.nodes[0];
4061                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4062                         ret = btrfs_next_leaf(tree_root, &path);
4063                         if (ret) {
4064                                 if (ret < 0)
4065                                         err = 1;
4066                                 break;
4067                         }
4068                         leaf = path.nodes[0];
4069                 }
4070                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4071                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4072                     fs_root_objectid(key.objectid)) {
4073                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4074                                 tmp_root = btrfs_read_fs_root_no_cache(
4075                                                 root->fs_info, &key);
4076                         } else {
4077                                 key.offset = (u64)-1;
4078                                 tmp_root = btrfs_read_fs_root(
4079                                                 root->fs_info, &key);
4080                         }
4081                         if (IS_ERR(tmp_root)) {
4082                                 err = 1;
4083                                 goto next;
4084                         }
4085                         ret = check_fs_root(tmp_root, root_cache, &wc);
4086                         if (ret == -EAGAIN) {
4087                                 free_root_recs_tree(root_cache);
4088                                 btrfs_release_path(&path);
4089                                 goto again;
4090                         }
4091                         if (ret)
4092                                 err = 1;
4093                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4094                                 btrfs_free_fs_root(tmp_root);
4095                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4096                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4097                         process_root_ref(leaf, path.slots[0], &key,
4098                                          root_cache);
4099                 }
4100 next:
4101                 path.slots[0]++;
4102         }
4103 out:
4104         btrfs_release_path(&path);
4105         if (err)
4106                 free_extent_cache_tree(&wc.shared);
4107         if (!cache_tree_empty(&wc.shared))
4108                 fprintf(stderr, "warning line %d\n", __LINE__);
4109
4110         task_stop(ctx.info);
4111
4112         return err;
4113 }
4114
4115 /*
4116  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4117  * INODE_REF/INODE_EXTREF match.
4118  *
4119  * @root:       the root of the fs/file tree
4120  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4121  * @key:        the key of the DIR_ITEM/DIR_INDEX
4122  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4123  *              distinguish root_dir between normal dir/file
4124  * @name:       the name in the INODE_REF/INODE_EXTREF
4125  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4126  * @mode:       the st_mode of INODE_ITEM
4127  *
4128  * Return 0 if no error occurred.
4129  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4130  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4131  * dir/file.
4132  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4133  * not match for normal dir/file.
4134  */
4135 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4136                          struct btrfs_key *key, u64 index, char *name,
4137                          u32 namelen, u32 mode)
4138 {
4139         struct btrfs_path path;
4140         struct extent_buffer *node;
4141         struct btrfs_dir_item *di;
4142         struct btrfs_key location;
4143         char namebuf[BTRFS_NAME_LEN] = {0};
4144         u32 total;
4145         u32 cur = 0;
4146         u32 len;
4147         u32 name_len;
4148         u32 data_len;
4149         u8 filetype;
4150         int slot;
4151         int ret;
4152
4153         btrfs_init_path(&path);
4154         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4155         if (ret < 0) {
4156                 ret = DIR_ITEM_MISSING;
4157                 goto out;
4158         }
4159
4160         /* Process root dir and goto out*/
4161         if (index == 0) {
4162                 if (ret == 0) {
4163                         ret = ROOT_DIR_ERROR;
4164                         error(
4165                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4166                                 root->objectid,
4167                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4168                                         "REF" : "EXTREF",
4169                                 ref_key->objectid, ref_key->offset,
4170                                 key->type == BTRFS_DIR_ITEM_KEY ?
4171                                         "DIR_ITEM" : "DIR_INDEX");
4172                 } else {
4173                         ret = 0;
4174                 }
4175
4176                 goto out;
4177         }
4178
4179         /* Process normal file/dir */
4180         if (ret > 0) {
4181                 ret = DIR_ITEM_MISSING;
4182                 error(
4183                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4184                         root->objectid,
4185                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4186                         ref_key->objectid, ref_key->offset,
4187                         key->type == BTRFS_DIR_ITEM_KEY ?
4188                                 "DIR_ITEM" : "DIR_INDEX",
4189                         key->objectid, key->offset, namelen, name,
4190                         imode_to_type(mode));
4191                 goto out;
4192         }
4193
4194         /* Check whether inode_id/filetype/name match */
4195         node = path.nodes[0];
4196         slot = path.slots[0];
4197         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4198         total = btrfs_item_size_nr(node, slot);
4199         while (cur < total) {
4200                 ret = DIR_ITEM_MISMATCH;
4201                 name_len = btrfs_dir_name_len(node, di);
4202                 data_len = btrfs_dir_data_len(node, di);
4203
4204                 btrfs_dir_item_key_to_cpu(node, di, &location);
4205                 if (location.objectid != ref_key->objectid ||
4206                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4207                     location.offset != 0)
4208                         goto next;
4209
4210                 filetype = btrfs_dir_type(node, di);
4211                 if (imode_to_type(mode) != filetype)
4212                         goto next;
4213
4214                 if (name_len <= BTRFS_NAME_LEN) {
4215                         len = name_len;
4216                 } else {
4217                         len = BTRFS_NAME_LEN;
4218                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4219                         root->objectid,
4220                         key->type == BTRFS_DIR_ITEM_KEY ?
4221                         "DIR_ITEM" : "DIR_INDEX",
4222                         key->objectid, key->offset, name_len);
4223                 }
4224                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4225                 if (len != namelen || strncmp(namebuf, name, len))
4226                         goto next;
4227
4228                 ret = 0;
4229                 goto out;
4230 next:
4231                 len = sizeof(*di) + name_len + data_len;
4232                 di = (struct btrfs_dir_item *)((char *)di + len);
4233                 cur += len;
4234         }
4235         if (ret == DIR_ITEM_MISMATCH)
4236                 error(
4237                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4238                         root->objectid,
4239                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4240                         ref_key->objectid, ref_key->offset,
4241                         key->type == BTRFS_DIR_ITEM_KEY ?
4242                                 "DIR_ITEM" : "DIR_INDEX",
4243                         key->objectid, key->offset, namelen, name,
4244                         imode_to_type(mode));
4245 out:
4246         btrfs_release_path(&path);
4247         return ret;
4248 }
4249
4250 /*
4251  * Traverse the given INODE_REF and call find_dir_item() to find related
4252  * DIR_ITEM/DIR_INDEX.
4253  *
4254  * @root:       the root of the fs/file tree
4255  * @ref_key:    the key of the INODE_REF
4256  * @refs:       the count of INODE_REF
4257  * @mode:       the st_mode of INODE_ITEM
4258  *
4259  * Return 0 if no error occurred.
4260  */
4261 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4262                            struct extent_buffer *node, int slot, u64 *refs,
4263                            int mode)
4264 {
4265         struct btrfs_key key;
4266         struct btrfs_inode_ref *ref;
4267         char namebuf[BTRFS_NAME_LEN] = {0};
4268         u32 total;
4269         u32 cur = 0;
4270         u32 len;
4271         u32 name_len;
4272         u64 index;
4273         int ret, err = 0;
4274
4275         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4276         total = btrfs_item_size_nr(node, slot);
4277
4278 next:
4279         /* Update inode ref count */
4280         (*refs)++;
4281
4282         index = btrfs_inode_ref_index(node, ref);
4283         name_len = btrfs_inode_ref_name_len(node, ref);
4284         if (name_len <= BTRFS_NAME_LEN) {
4285                 len = name_len;
4286         } else {
4287                 len = BTRFS_NAME_LEN;
4288                 warning("root %llu INODE_REF[%llu %llu] name too long",
4289                         root->objectid, ref_key->objectid, ref_key->offset);
4290         }
4291
4292         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4293
4294         /* Check root dir ref name */
4295         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4296                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4297                       root->objectid, ref_key->objectid, ref_key->offset,
4298                       namebuf);
4299                 err |= ROOT_DIR_ERROR;
4300         }
4301
4302         /* Find related DIR_INDEX */
4303         key.objectid = ref_key->offset;
4304         key.type = BTRFS_DIR_INDEX_KEY;
4305         key.offset = index;
4306         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4307         err |= ret;
4308
4309         /* Find related dir_item */
4310         key.objectid = ref_key->offset;
4311         key.type = BTRFS_DIR_ITEM_KEY;
4312         key.offset = btrfs_name_hash(namebuf, len);
4313         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4314         err |= ret;
4315
4316         len = sizeof(*ref) + name_len;
4317         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4318         cur += len;
4319         if (cur < total)
4320                 goto next;
4321
4322         return err;
4323 }
4324
4325 /*
4326  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4327  * DIR_ITEM/DIR_INDEX.
4328  *
4329  * @root:       the root of the fs/file tree
4330  * @ref_key:    the key of the INODE_EXTREF
4331  * @refs:       the count of INODE_EXTREF
4332  * @mode:       the st_mode of INODE_ITEM
4333  *
4334  * Return 0 if no error occurred.
4335  */
4336 static int check_inode_extref(struct btrfs_root *root,
4337                               struct btrfs_key *ref_key,
4338                               struct extent_buffer *node, int slot, u64 *refs,
4339                               int mode)
4340 {
4341         struct btrfs_key key;
4342         struct btrfs_inode_extref *extref;
4343         char namebuf[BTRFS_NAME_LEN] = {0};
4344         u32 total;
4345         u32 cur = 0;
4346         u32 len;
4347         u32 name_len;
4348         u64 index;
4349         u64 parent;
4350         int ret;
4351         int err = 0;
4352
4353         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4354         total = btrfs_item_size_nr(node, slot);
4355
4356 next:
4357         /* update inode ref count */
4358         (*refs)++;
4359         name_len = btrfs_inode_extref_name_len(node, extref);
4360         index = btrfs_inode_extref_index(node, extref);
4361         parent = btrfs_inode_extref_parent(node, extref);
4362         if (name_len <= BTRFS_NAME_LEN) {
4363                 len = name_len;
4364         } else {
4365                 len = BTRFS_NAME_LEN;
4366                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4367                         root->objectid, ref_key->objectid, ref_key->offset);
4368         }
4369         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4370
4371         /* Check root dir ref name */
4372         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4373                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4374                       root->objectid, ref_key->objectid, ref_key->offset,
4375                       namebuf);
4376                 err |= ROOT_DIR_ERROR;
4377         }
4378
4379         /* find related dir_index */
4380         key.objectid = parent;
4381         key.type = BTRFS_DIR_INDEX_KEY;
4382         key.offset = index;
4383         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4384         err |= ret;
4385
4386         /* find related dir_item */
4387         key.objectid = parent;
4388         key.type = BTRFS_DIR_ITEM_KEY;
4389         key.offset = btrfs_name_hash(namebuf, len);
4390         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4391         err |= ret;
4392
4393         len = sizeof(*extref) + name_len;
4394         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4395         cur += len;
4396
4397         if (cur < total)
4398                 goto next;
4399
4400         return err;
4401 }
4402
4403 /*
4404  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4405  * DIR_ITEM/DIR_INDEX match.
4406  *
4407  * @root:       the root of the fs/file tree
4408  * @key:        the key of the INODE_REF/INODE_EXTREF
4409  * @name:       the name in the INODE_REF/INODE_EXTREF
4410  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4411  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4412  * to (u64)-1
4413  * @ext_ref:    the EXTENDED_IREF feature
4414  *
4415  * Return 0 if no error occurred.
4416  * Return >0 for error bitmap
4417  */
4418 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4419                           char *name, int namelen, u64 index,
4420                           unsigned int ext_ref)
4421 {
4422         struct btrfs_path path;
4423         struct btrfs_inode_ref *ref;
4424         struct btrfs_inode_extref *extref;
4425         struct extent_buffer *node;
4426         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4427         u32 total;
4428         u32 cur = 0;
4429         u32 len;
4430         u32 ref_namelen;
4431         u64 ref_index;
4432         u64 parent;
4433         u64 dir_id;
4434         int slot;
4435         int ret;
4436
4437         btrfs_init_path(&path);
4438         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4439         if (ret) {
4440                 ret = INODE_REF_MISSING;
4441                 goto extref;
4442         }
4443
4444         node = path.nodes[0];
4445         slot = path.slots[0];
4446
4447         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4448         total = btrfs_item_size_nr(node, slot);
4449
4450         /* Iterate all entry of INODE_REF */
4451         while (cur < total) {
4452                 ret = INODE_REF_MISSING;
4453
4454                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4455                 ref_index = btrfs_inode_ref_index(node, ref);
4456                 if (index != (u64)-1 && index != ref_index)
4457                         goto next_ref;
4458
4459                 if (ref_namelen <= BTRFS_NAME_LEN) {
4460                         len = ref_namelen;
4461                 } else {
4462                         len = BTRFS_NAME_LEN;
4463                         warning("root %llu INODE %s[%llu %llu] name too long",
4464                                 root->objectid,
4465                                 key->type == BTRFS_INODE_REF_KEY ?
4466                                         "REF" : "EXTREF",
4467                                 key->objectid, key->offset);
4468                 }
4469                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4470                                    len);
4471
4472                 if (len != namelen || strncmp(ref_namebuf, name, len))
4473                         goto next_ref;
4474
4475                 ret = 0;
4476                 goto out;
4477 next_ref:
4478                 len = sizeof(*ref) + ref_namelen;
4479                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4480                 cur += len;
4481         }
4482
4483 extref:
4484         /* Skip if not support EXTENDED_IREF feature */
4485         if (!ext_ref)
4486                 goto out;
4487
4488         btrfs_release_path(&path);
4489         btrfs_init_path(&path);
4490
4491         dir_id = key->offset;
4492         key->type = BTRFS_INODE_EXTREF_KEY;
4493         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4494
4495         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4496         if (ret) {
4497                 ret = INODE_REF_MISSING;
4498                 goto out;
4499         }
4500
4501         node = path.nodes[0];
4502         slot = path.slots[0];
4503
4504         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4505         cur = 0;
4506         total = btrfs_item_size_nr(node, slot);
4507
4508         /* Iterate all entry of INODE_EXTREF */
4509         while (cur < total) {
4510                 ret = INODE_REF_MISSING;
4511
4512                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4513                 ref_index = btrfs_inode_extref_index(node, extref);
4514                 parent = btrfs_inode_extref_parent(node, extref);
4515                 if (index != (u64)-1 && index != ref_index)
4516                         goto next_extref;
4517
4518                 if (parent != dir_id)
4519                         goto next_extref;
4520
4521                 if (ref_namelen <= BTRFS_NAME_LEN) {
4522                         len = ref_namelen;
4523                 } else {
4524                         len = BTRFS_NAME_LEN;
4525                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4526                                 root->objectid,
4527                                 key->type == BTRFS_INODE_REF_KEY ?
4528                                         "REF" : "EXTREF",
4529                                 key->objectid, key->offset);
4530                 }
4531                 read_extent_buffer(node, ref_namebuf,
4532                                    (unsigned long)(extref + 1), len);
4533
4534                 if (len != namelen || strncmp(ref_namebuf, name, len))
4535                         goto next_extref;
4536
4537                 ret = 0;
4538                 goto out;
4539
4540 next_extref:
4541                 len = sizeof(*extref) + ref_namelen;
4542                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4543                 cur += len;
4544
4545         }
4546 out:
4547         btrfs_release_path(&path);
4548         return ret;
4549 }
4550
4551 /*
4552  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4553  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4554  *
4555  * @root:       the root of the fs/file tree
4556  * @key:        the key of the INODE_REF/INODE_EXTREF
4557  * @size:       the st_size of the INODE_ITEM
4558  * @ext_ref:    the EXTENDED_IREF feature
4559  *
4560  * Return 0 if no error occurred.
4561  */
4562 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4563                           struct extent_buffer *node, int slot, u64 *size,
4564                           unsigned int ext_ref)
4565 {
4566         struct btrfs_dir_item *di;
4567         struct btrfs_inode_item *ii;
4568         struct btrfs_path path;
4569         struct btrfs_key location;
4570         char namebuf[BTRFS_NAME_LEN] = {0};
4571         u32 total;
4572         u32 cur = 0;
4573         u32 len;
4574         u32 name_len;
4575         u32 data_len;
4576         u8 filetype;
4577         u32 mode;
4578         u64 index;
4579         int ret;
4580         int err = 0;
4581
4582         /*
4583          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4584          * ignore index check.
4585          */
4586         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4587
4588         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4589         total = btrfs_item_size_nr(node, slot);
4590
4591         while (cur < total) {
4592                 data_len = btrfs_dir_data_len(node, di);
4593                 if (data_len)
4594                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4595                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4596                               "DIR_ITEM" : "DIR_INDEX",
4597                               key->objectid, key->offset, data_len);
4598
4599                 name_len = btrfs_dir_name_len(node, di);
4600                 if (name_len <= BTRFS_NAME_LEN) {
4601                         len = name_len;
4602                 } else {
4603                         len = BTRFS_NAME_LEN;
4604                         warning("root %llu %s[%llu %llu] name too long",
4605                                 root->objectid,
4606                                 key->type == BTRFS_DIR_ITEM_KEY ?
4607                                 "DIR_ITEM" : "DIR_INDEX",
4608                                 key->objectid, key->offset);
4609                 }
4610                 (*size) += name_len;
4611
4612                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4613                 filetype = btrfs_dir_type(node, di);
4614
4615                 btrfs_init_path(&path);
4616                 btrfs_dir_item_key_to_cpu(node, di, &location);
4617
4618                 /* Ignore related ROOT_ITEM check */
4619                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4620                         goto next;
4621
4622                 /* Check relative INODE_ITEM(existence/filetype) */
4623                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4624                 if (ret) {
4625                         err |= INODE_ITEM_MISSING;
4626                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4627                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4628                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4629                               key->offset, location.objectid, name_len,
4630                               namebuf, filetype);
4631                         goto next;
4632                 }
4633
4634                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4635                                     struct btrfs_inode_item);
4636                 mode = btrfs_inode_mode(path.nodes[0], ii);
4637
4638                 if (imode_to_type(mode) != filetype) {
4639                         err |= INODE_ITEM_MISMATCH;
4640                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4641                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643                               key->offset, name_len, namebuf, filetype);
4644                 }
4645
4646                 /* Check relative INODE_REF/INODE_EXTREF */
4647                 location.type = BTRFS_INODE_REF_KEY;
4648                 location.offset = key->objectid;
4649                 ret = find_inode_ref(root, &location, namebuf, len,
4650                                        index, ext_ref);
4651                 err |= ret;
4652                 if (ret & INODE_REF_MISSING)
4653                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4654                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4655                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4656                               key->offset, name_len, namebuf, filetype);
4657
4658 next:
4659                 btrfs_release_path(&path);
4660                 len = sizeof(*di) + name_len + data_len;
4661                 di = (struct btrfs_dir_item *)((char *)di + len);
4662                 cur += len;
4663
4664                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4665                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4666                               root->objectid, key->objectid, key->offset);
4667                         break;
4668                 }
4669         }
4670
4671         return err;
4672 }
4673
4674 /*
4675  * Check file extent datasum/hole, update the size of the file extents,
4676  * check and update the last offset of the file extent.
4677  *
4678  * @root:       the root of fs/file tree.
4679  * @fkey:       the key of the file extent.
4680  * @nodatasum:  INODE_NODATASUM feature.
4681  * @size:       the sum of all EXTENT_DATA items size for this inode.
4682  * @end:        the offset of the last extent.
4683  *
4684  * Return 0 if no error occurred.
4685  */
4686 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4687                              struct extent_buffer *node, int slot,
4688                              unsigned int nodatasum, u64 *size, u64 *end)
4689 {
4690         struct btrfs_file_extent_item *fi;
4691         u64 disk_bytenr;
4692         u64 disk_num_bytes;
4693         u64 extent_num_bytes;
4694         u64 found;
4695         unsigned int extent_type;
4696         unsigned int is_hole;
4697         int ret;
4698         int err = 0;
4699
4700         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4701
4702         extent_type = btrfs_file_extent_type(node, fi);
4703         /* Skip if file extent is inline */
4704         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4705                 struct btrfs_item *e = btrfs_item_nr(slot);
4706                 u32 item_inline_len;
4707
4708                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4709                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4710                 if (extent_num_bytes == 0 ||
4711                     extent_num_bytes != item_inline_len)
4712                         err |= FILE_EXTENT_ERROR;
4713                 *size += extent_num_bytes;
4714                 return err;
4715         }
4716
4717         /* Check extent type */
4718         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4719                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4720                 err |= FILE_EXTENT_ERROR;
4721                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4722                       root->objectid, fkey->objectid, fkey->offset);
4723                 return err;
4724         }
4725
4726         /* Check REG_EXTENT/PREALLOC_EXTENT */
4727         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4728         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4729         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4730         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4731
4732         /* Check EXTENT_DATA datasum */
4733         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4734         if (found > 0 && nodatasum) {
4735                 err |= ODD_CSUM_ITEM;
4736                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4737                       root->objectid, fkey->objectid, fkey->offset);
4738         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4739                    !is_hole &&
4740                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4741                 err |= CSUM_ITEM_MISSING;
4742                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4743                       root->objectid, fkey->objectid, fkey->offset);
4744         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4745                 err |= ODD_CSUM_ITEM;
4746                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4747                       root->objectid, fkey->objectid, fkey->offset);
4748         }
4749
4750         /* Check EXTENT_DATA hole */
4751         if (no_holes && is_hole) {
4752                 err |= FILE_EXTENT_ERROR;
4753                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4754                       root->objectid, fkey->objectid, fkey->offset);
4755         } else if (!no_holes && *end != fkey->offset) {
4756                 err |= FILE_EXTENT_ERROR;
4757                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4758                       root->objectid, fkey->objectid, fkey->offset);
4759         }
4760
4761         *end += extent_num_bytes;
4762         if (!is_hole)
4763                 *size += extent_num_bytes;
4764
4765         return err;
4766 }
4767
4768 /*
4769  * Check INODE_ITEM and related ITEMs (the same inode number)
4770  * 1. check link count
4771  * 2. check inode ref/extref
4772  * 3. check dir item/index
4773  *
4774  * @ext_ref:    the EXTENDED_IREF feature
4775  *
4776  * Return 0 if no error occurred.
4777  * Return >0 for error or hit the traversal is done(by error bitmap)
4778  */
4779 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4780                             unsigned int ext_ref)
4781 {
4782         struct extent_buffer *node;
4783         struct btrfs_inode_item *ii;
4784         struct btrfs_key key;
4785         u64 inode_id;
4786         u32 mode;
4787         u64 nlink;
4788         u64 nbytes;
4789         u64 isize;
4790         u64 size = 0;
4791         u64 refs = 0;
4792         u64 extent_end = 0;
4793         u64 extent_size = 0;
4794         unsigned int dir;
4795         unsigned int nodatasum;
4796         int slot;
4797         int ret;
4798         int err = 0;
4799
4800         node = path->nodes[0];
4801         slot = path->slots[0];
4802
4803         btrfs_item_key_to_cpu(node, &key, slot);
4804         inode_id = key.objectid;
4805
4806         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4807                 ret = btrfs_next_item(root, path);
4808                 if (ret > 0)
4809                         err |= LAST_ITEM;
4810                 return err;
4811         }
4812
4813         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4814         isize = btrfs_inode_size(node, ii);
4815         nbytes = btrfs_inode_nbytes(node, ii);
4816         mode = btrfs_inode_mode(node, ii);
4817         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4818         nlink = btrfs_inode_nlink(node, ii);
4819         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4820
4821         while (1) {
4822                 ret = btrfs_next_item(root, path);
4823                 if (ret < 0) {
4824                         /* out will fill 'err' rusing current statistics */
4825                         goto out;
4826                 } else if (ret > 0) {
4827                         err |= LAST_ITEM;
4828                         goto out;
4829                 }
4830
4831                 node = path->nodes[0];
4832                 slot = path->slots[0];
4833                 btrfs_item_key_to_cpu(node, &key, slot);
4834                 if (key.objectid != inode_id)
4835                         goto out;
4836
4837                 switch (key.type) {
4838                 case BTRFS_INODE_REF_KEY:
4839                         ret = check_inode_ref(root, &key, node, slot, &refs,
4840                                               mode);
4841                         err |= ret;
4842                         break;
4843                 case BTRFS_INODE_EXTREF_KEY:
4844                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4845                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4846                                         root->objectid, key.objectid,
4847                                         key.offset);
4848                         ret = check_inode_extref(root, &key, node, slot, &refs,
4849                                                  mode);
4850                         err |= ret;
4851                         break;
4852                 case BTRFS_DIR_ITEM_KEY:
4853                 case BTRFS_DIR_INDEX_KEY:
4854                         if (!dir) {
4855                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4856                                         root->objectid, inode_id,
4857                                         imode_to_type(mode), key.objectid,
4858                                         key.offset);
4859                         }
4860                         ret = check_dir_item(root, &key, node, slot, &size,
4861                                              ext_ref);
4862                         err |= ret;
4863                         break;
4864                 case BTRFS_EXTENT_DATA_KEY:
4865                         if (dir) {
4866                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4867                                         root->objectid, inode_id, key.objectid,
4868                                         key.offset);
4869                         }
4870                         ret = check_file_extent(root, &key, node, slot,
4871                                                 nodatasum, &extent_size,
4872                                                 &extent_end);
4873                         err |= ret;
4874                         break;
4875                 case BTRFS_XATTR_ITEM_KEY:
4876                         break;
4877                 default:
4878                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4879                               key.objectid, key.type, key.offset);
4880                 }
4881         }
4882
4883 out:
4884         /* verify INODE_ITEM nlink/isize/nbytes */
4885         if (dir) {
4886                 if (nlink != 1) {
4887                         err |= LINK_COUNT_ERROR;
4888                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4889                               root->objectid, inode_id, nlink);
4890                 }
4891
4892                 /*
4893                  * Just a warning, as dir inode nbytes is just an
4894                  * instructive value.
4895                  */
4896                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4897                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4898                                 root->objectid, inode_id, root->nodesize);
4899                 }
4900
4901                 if (isize != size) {
4902                         err |= ISIZE_ERROR;
4903                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4904                               root->objectid, inode_id, isize, size);
4905                 }
4906         } else {
4907                 if (nlink != refs) {
4908                         err |= LINK_COUNT_ERROR;
4909                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4910                               root->objectid, inode_id, nlink, refs);
4911                 } else if (!nlink) {
4912                         err |= ORPHAN_ITEM;
4913                 }
4914
4915                 if (!nbytes && !no_holes && extent_end < isize) {
4916                         err |= NBYTES_ERROR;
4917                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4918                               root->objectid, inode_id, isize);
4919                 }
4920
4921                 if (nbytes != extent_size) {
4922                         err |= NBYTES_ERROR;
4923                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4924                               root->objectid, inode_id, nbytes, extent_size);
4925                 }
4926         }
4927
4928         return err;
4929 }
4930
4931 /*
4932  * Iterate all item on the tree and call check_inode_item() to check.
4933  *
4934  * @root:       the root of the tree to be checked.
4935  * @ext_ref:    the EXTENDED_IREF feature
4936  *
4937  * Return 0 if no error found.
4938  * Return <0 for error.
4939  */
4940 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4941 {
4942         struct btrfs_path *path;
4943         struct node_refs nrefs;
4944         struct btrfs_root_item *root_item = &root->root_item;
4945         int ret, wret;
4946         int level;
4947
4948         path = btrfs_alloc_path();
4949         if (!path)
4950                 return -ENOMEM;
4951
4952         memset(&nrefs, 0, sizeof(nrefs));
4953         level = btrfs_header_level(root->node);
4954
4955         if (btrfs_root_refs(root_item) > 0 ||
4956             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4957                 path->nodes[level] = root->node;
4958                 path->slots[level] = 0;
4959                 extent_buffer_get(root->node);
4960         } else {
4961                 struct btrfs_key key;
4962
4963                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4964                 level = root_item->drop_level;
4965                 path->lowest_level = level;
4966                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4967                 if (ret < 0)
4968                         goto out;
4969                 ret = 0;
4970         }
4971
4972         while (1) {
4973                 wret = walk_down_tree_v2(root, path, &level, &nrefs, ext_ref);
4974                 if (wret < 0)
4975                         ret = wret;
4976                 if (wret != 0)
4977                         break;
4978
4979                 wret = walk_up_tree_v2(root, path, &level);
4980                 if (wret < 0)
4981                         ret = wret;
4982                 if (wret != 0)
4983                         break;
4984         }
4985
4986 out:
4987         btrfs_free_path(path);
4988         return ret;
4989 }
4990
4991 /*
4992  * Find the relative ref for root_ref and root_backref.
4993  *
4994  * @root:       the root of the root tree.
4995  * @ref_key:    the key of the root ref.
4996  *
4997  * Return 0 if no error occurred.
4998  */
4999 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5000                           struct extent_buffer *node, int slot)
5001 {
5002         struct btrfs_path *path;
5003         struct btrfs_key key;
5004         struct btrfs_root_ref *ref;
5005         struct btrfs_root_ref *backref;
5006         char ref_name[BTRFS_NAME_LEN] = {0};
5007         char backref_name[BTRFS_NAME_LEN] = {0};
5008         u64 ref_dirid;
5009         u64 ref_seq;
5010         u32 ref_namelen;
5011         u64 backref_dirid;
5012         u64 backref_seq;
5013         u32 backref_namelen;
5014         u32 len;
5015         int ret;
5016         int err = 0;
5017
5018         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5019         ref_dirid = btrfs_root_ref_dirid(node, ref);
5020         ref_seq = btrfs_root_ref_sequence(node, ref);
5021         ref_namelen = btrfs_root_ref_name_len(node, ref);
5022
5023         if (ref_namelen <= BTRFS_NAME_LEN) {
5024                 len = ref_namelen;
5025         } else {
5026                 len = BTRFS_NAME_LEN;
5027                 warning("%s[%llu %llu] ref_name too long",
5028                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5029                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5030                         ref_key->offset);
5031         }
5032         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5033
5034         /* Find relative root_ref */
5035         key.objectid = ref_key->offset;
5036         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5037         key.offset = ref_key->objectid;
5038
5039         path = btrfs_alloc_path();
5040         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5041         if (ret) {
5042                 err |= ROOT_REF_MISSING;
5043                 error("%s[%llu %llu] couldn't find relative ref",
5044                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5045                       "ROOT_REF" : "ROOT_BACKREF",
5046                       ref_key->objectid, ref_key->offset);
5047                 goto out;
5048         }
5049
5050         backref = btrfs_item_ptr(path->nodes[0], path->slots[0],
5051                                  struct btrfs_root_ref);
5052         backref_dirid = btrfs_root_ref_dirid(path->nodes[0], backref);
5053         backref_seq = btrfs_root_ref_sequence(path->nodes[0], backref);
5054         backref_namelen = btrfs_root_ref_name_len(path->nodes[0], backref);
5055
5056         if (backref_namelen <= BTRFS_NAME_LEN) {
5057                 len = backref_namelen;
5058         } else {
5059                 len = BTRFS_NAME_LEN;
5060                 warning("%s[%llu %llu] ref_name too long",
5061                         key.type == BTRFS_ROOT_REF_KEY ?
5062                         "ROOT_REF" : "ROOT_BACKREF",
5063                         key.objectid, key.offset);
5064         }
5065         read_extent_buffer(path->nodes[0], backref_name,
5066                            (unsigned long)(backref + 1), len);
5067
5068         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5069             ref_namelen != backref_namelen ||
5070             strncmp(ref_name, backref_name, len)) {
5071                 err |= ROOT_REF_MISMATCH;
5072                 error("%s[%llu %llu] mismatch relative ref",
5073                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5074                       "ROOT_REF" : "ROOT_BACKREF",
5075                       ref_key->objectid, ref_key->offset);
5076         }
5077 out:
5078         btrfs_free_path(path);
5079         return err;
5080 }
5081
5082 /*
5083  * Check all fs/file tree in low_memory mode.
5084  *
5085  * 1. for fs tree root item, call check_fs_root_v2()
5086  * 2. for fs tree root ref/backref, call check_root_ref()
5087  *
5088  * Return 0 if no error occurred.
5089  */
5090 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5091 {
5092         struct btrfs_root *tree_root = fs_info->tree_root;
5093         struct btrfs_root *cur_root = NULL;
5094         struct btrfs_path *path;
5095         struct btrfs_key key;
5096         struct extent_buffer *node;
5097         unsigned int ext_ref;
5098         int slot;
5099         int ret;
5100         int err = 0;
5101
5102         ext_ref = btrfs_fs_incompat(fs_info,
5103                                     BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF);
5104
5105         path = btrfs_alloc_path();
5106         if (!path)
5107                 return -ENOMEM;
5108
5109         key.objectid = BTRFS_FS_TREE_OBJECTID;
5110         key.offset = 0;
5111         key.type = BTRFS_ROOT_ITEM_KEY;
5112
5113         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
5114         if (ret < 0) {
5115                 err = ret;
5116                 goto out;
5117         } else if (ret > 0) {
5118                 err = -ENOENT;
5119                 goto out;
5120         }
5121
5122         while (1) {
5123                 node = path->nodes[0];
5124                 slot = path->slots[0];
5125                 btrfs_item_key_to_cpu(node, &key, slot);
5126                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5127                         goto out;
5128                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5129                     fs_root_objectid(key.objectid)) {
5130                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5131                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5132                                                                        &key);
5133                         } else {
5134                                 key.offset = (u64)-1;
5135                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5136                         }
5137
5138                         if (IS_ERR(cur_root)) {
5139                                 error("Fail to read fs/subvol tree: %lld",
5140                                       key.objectid);
5141                                 err = -EIO;
5142                                 goto next;
5143                         }
5144
5145                         ret = check_fs_root_v2(cur_root, ext_ref);
5146                         err |= ret;
5147
5148                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5149                                 btrfs_free_fs_root(cur_root);
5150                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5151                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5152                         ret = check_root_ref(tree_root, &key, node, slot);
5153                         err |= ret;
5154                 }
5155 next:
5156                 ret = btrfs_next_item(tree_root, path);
5157                 if (ret > 0)
5158                         goto out;
5159                 if (ret < 0) {
5160                         err = ret;
5161                         goto out;
5162                 }
5163         }
5164
5165 out:
5166         btrfs_free_path(path);
5167         return err;
5168 }
5169
5170 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5171 {
5172         struct list_head *cur = rec->backrefs.next;
5173         struct extent_backref *back;
5174         struct tree_backref *tback;
5175         struct data_backref *dback;
5176         u64 found = 0;
5177         int err = 0;
5178
5179         while(cur != &rec->backrefs) {
5180                 back = to_extent_backref(cur);
5181                 cur = cur->next;
5182                 if (!back->found_extent_tree) {
5183                         err = 1;
5184                         if (!print_errs)
5185                                 goto out;
5186                         if (back->is_data) {
5187                                 dback = to_data_backref(back);
5188                                 fprintf(stderr, "Backref %llu %s %llu"
5189                                         " owner %llu offset %llu num_refs %lu"
5190                                         " not found in extent tree\n",
5191                                         (unsigned long long)rec->start,
5192                                         back->full_backref ?
5193                                         "parent" : "root",
5194                                         back->full_backref ?
5195                                         (unsigned long long)dback->parent:
5196                                         (unsigned long long)dback->root,
5197                                         (unsigned long long)dback->owner,
5198                                         (unsigned long long)dback->offset,
5199                                         (unsigned long)dback->num_refs);
5200                         } else {
5201                                 tback = to_tree_backref(back);
5202                                 fprintf(stderr, "Backref %llu parent %llu"
5203                                         " root %llu not found in extent tree\n",
5204                                         (unsigned long long)rec->start,
5205                                         (unsigned long long)tback->parent,
5206                                         (unsigned long long)tback->root);
5207                         }
5208                 }
5209                 if (!back->is_data && !back->found_ref) {
5210                         err = 1;
5211                         if (!print_errs)
5212                                 goto out;
5213                         tback = to_tree_backref(back);
5214                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5215                                 (unsigned long long)rec->start,
5216                                 back->full_backref ? "parent" : "root",
5217                                 back->full_backref ?
5218                                 (unsigned long long)tback->parent :
5219                                 (unsigned long long)tback->root, back);
5220                 }
5221                 if (back->is_data) {
5222                         dback = to_data_backref(back);
5223                         if (dback->found_ref != dback->num_refs) {
5224                                 err = 1;
5225                                 if (!print_errs)
5226                                         goto out;
5227                                 fprintf(stderr, "Incorrect local backref count"
5228                                         " on %llu %s %llu owner %llu"
5229                                         " offset %llu found %u wanted %u back %p\n",
5230                                         (unsigned long long)rec->start,
5231                                         back->full_backref ?
5232                                         "parent" : "root",
5233                                         back->full_backref ?
5234                                         (unsigned long long)dback->parent:
5235                                         (unsigned long long)dback->root,
5236                                         (unsigned long long)dback->owner,
5237                                         (unsigned long long)dback->offset,
5238                                         dback->found_ref, dback->num_refs, back);
5239                         }
5240                         if (dback->disk_bytenr != rec->start) {
5241                                 err = 1;
5242                                 if (!print_errs)
5243                                         goto out;
5244                                 fprintf(stderr, "Backref disk bytenr does not"
5245                                         " match extent record, bytenr=%llu, "
5246                                         "ref bytenr=%llu\n",
5247                                         (unsigned long long)rec->start,
5248                                         (unsigned long long)dback->disk_bytenr);
5249                         }
5250
5251                         if (dback->bytes != rec->nr) {
5252                                 err = 1;
5253                                 if (!print_errs)
5254                                         goto out;
5255                                 fprintf(stderr, "Backref bytes do not match "
5256                                         "extent backref, bytenr=%llu, ref "
5257                                         "bytes=%llu, backref bytes=%llu\n",
5258                                         (unsigned long long)rec->start,
5259                                         (unsigned long long)rec->nr,
5260                                         (unsigned long long)dback->bytes);
5261                         }
5262                 }
5263                 if (!back->is_data) {
5264                         found += 1;
5265                 } else {
5266                         dback = to_data_backref(back);
5267                         found += dback->found_ref;
5268                 }
5269         }
5270         if (found != rec->refs) {
5271                 err = 1;
5272                 if (!print_errs)
5273                         goto out;
5274                 fprintf(stderr, "Incorrect global backref count "
5275                         "on %llu found %llu wanted %llu\n",
5276                         (unsigned long long)rec->start,
5277                         (unsigned long long)found,
5278                         (unsigned long long)rec->refs);
5279         }
5280 out:
5281         return err;
5282 }
5283
5284 static int free_all_extent_backrefs(struct extent_record *rec)
5285 {
5286         struct extent_backref *back;
5287         struct list_head *cur;
5288         while (!list_empty(&rec->backrefs)) {
5289                 cur = rec->backrefs.next;
5290                 back = to_extent_backref(cur);
5291                 list_del(cur);
5292                 free(back);
5293         }
5294         return 0;
5295 }
5296
5297 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5298                                      struct cache_tree *extent_cache)
5299 {
5300         struct cache_extent *cache;
5301         struct extent_record *rec;
5302
5303         while (1) {
5304                 cache = first_cache_extent(extent_cache);
5305                 if (!cache)
5306                         break;
5307                 rec = container_of(cache, struct extent_record, cache);
5308                 remove_cache_extent(extent_cache, cache);
5309                 free_all_extent_backrefs(rec);
5310                 free(rec);
5311         }
5312 }
5313
5314 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5315                                  struct extent_record *rec)
5316 {
5317         if (rec->content_checked && rec->owner_ref_checked &&
5318             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5319             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5320             !rec->bad_full_backref && !rec->crossing_stripes &&
5321             !rec->wrong_chunk_type) {
5322                 remove_cache_extent(extent_cache, &rec->cache);
5323                 free_all_extent_backrefs(rec);
5324                 list_del_init(&rec->list);
5325                 free(rec);
5326         }
5327         return 0;
5328 }
5329
5330 static int check_owner_ref(struct btrfs_root *root,
5331                             struct extent_record *rec,
5332                             struct extent_buffer *buf)
5333 {
5334         struct extent_backref *node;
5335         struct tree_backref *back;
5336         struct btrfs_root *ref_root;
5337         struct btrfs_key key;
5338         struct btrfs_path path;
5339         struct extent_buffer *parent;
5340         int level;
5341         int found = 0;
5342         int ret;
5343
5344         list_for_each_entry(node, &rec->backrefs, list) {
5345                 if (node->is_data)
5346                         continue;
5347                 if (!node->found_ref)
5348                         continue;
5349                 if (node->full_backref)
5350                         continue;
5351                 back = to_tree_backref(node);
5352                 if (btrfs_header_owner(buf) == back->root)
5353                         return 0;
5354         }
5355         BUG_ON(rec->is_root);
5356
5357         /* try to find the block by search corresponding fs tree */
5358         key.objectid = btrfs_header_owner(buf);
5359         key.type = BTRFS_ROOT_ITEM_KEY;
5360         key.offset = (u64)-1;
5361
5362         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5363         if (IS_ERR(ref_root))
5364                 return 1;
5365
5366         level = btrfs_header_level(buf);
5367         if (level == 0)
5368                 btrfs_item_key_to_cpu(buf, &key, 0);
5369         else
5370                 btrfs_node_key_to_cpu(buf, &key, 0);
5371
5372         btrfs_init_path(&path);
5373         path.lowest_level = level + 1;
5374         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5375         if (ret < 0)
5376                 return 0;
5377
5378         parent = path.nodes[level + 1];
5379         if (parent && buf->start == btrfs_node_blockptr(parent,
5380                                                         path.slots[level + 1]))
5381                 found = 1;
5382
5383         btrfs_release_path(&path);
5384         return found ? 0 : 1;
5385 }
5386
5387 static int is_extent_tree_record(struct extent_record *rec)
5388 {
5389         struct list_head *cur = rec->backrefs.next;
5390         struct extent_backref *node;
5391         struct tree_backref *back;
5392         int is_extent = 0;
5393
5394         while(cur != &rec->backrefs) {
5395                 node = to_extent_backref(cur);
5396                 cur = cur->next;
5397                 if (node->is_data)
5398                         return 0;
5399                 back = to_tree_backref(node);
5400                 if (node->full_backref)
5401                         return 0;
5402                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5403                         is_extent = 1;
5404         }
5405         return is_extent;
5406 }
5407
5408
5409 static int record_bad_block_io(struct btrfs_fs_info *info,
5410                                struct cache_tree *extent_cache,
5411                                u64 start, u64 len)
5412 {
5413         struct extent_record *rec;
5414         struct cache_extent *cache;
5415         struct btrfs_key key;
5416
5417         cache = lookup_cache_extent(extent_cache, start, len);
5418         if (!cache)
5419                 return 0;
5420
5421         rec = container_of(cache, struct extent_record, cache);
5422         if (!is_extent_tree_record(rec))
5423                 return 0;
5424
5425         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5426         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5427 }
5428
5429 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5430                        struct extent_buffer *buf, int slot)
5431 {
5432         if (btrfs_header_level(buf)) {
5433                 struct btrfs_key_ptr ptr1, ptr2;
5434
5435                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5436                                    sizeof(struct btrfs_key_ptr));
5437                 read_extent_buffer(buf, &ptr2,
5438                                    btrfs_node_key_ptr_offset(slot + 1),
5439                                    sizeof(struct btrfs_key_ptr));
5440                 write_extent_buffer(buf, &ptr1,
5441                                     btrfs_node_key_ptr_offset(slot + 1),
5442                                     sizeof(struct btrfs_key_ptr));
5443                 write_extent_buffer(buf, &ptr2,
5444                                     btrfs_node_key_ptr_offset(slot),
5445                                     sizeof(struct btrfs_key_ptr));
5446                 if (slot == 0) {
5447                         struct btrfs_disk_key key;
5448                         btrfs_node_key(buf, &key, 0);
5449                         btrfs_fixup_low_keys(root, path, &key,
5450                                              btrfs_header_level(buf) + 1);
5451                 }
5452         } else {
5453                 struct btrfs_item *item1, *item2;
5454                 struct btrfs_key k1, k2;
5455                 char *item1_data, *item2_data;
5456                 u32 item1_offset, item2_offset, item1_size, item2_size;
5457
5458                 item1 = btrfs_item_nr(slot);
5459                 item2 = btrfs_item_nr(slot + 1);
5460                 btrfs_item_key_to_cpu(buf, &k1, slot);
5461                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5462                 item1_offset = btrfs_item_offset(buf, item1);
5463                 item2_offset = btrfs_item_offset(buf, item2);
5464                 item1_size = btrfs_item_size(buf, item1);
5465                 item2_size = btrfs_item_size(buf, item2);
5466
5467                 item1_data = malloc(item1_size);
5468                 if (!item1_data)
5469                         return -ENOMEM;
5470                 item2_data = malloc(item2_size);
5471                 if (!item2_data) {
5472                         free(item1_data);
5473                         return -ENOMEM;
5474                 }
5475
5476                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5477                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5478
5479                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5480                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5481                 free(item1_data);
5482                 free(item2_data);
5483
5484                 btrfs_set_item_offset(buf, item1, item2_offset);
5485                 btrfs_set_item_offset(buf, item2, item1_offset);
5486                 btrfs_set_item_size(buf, item1, item2_size);
5487                 btrfs_set_item_size(buf, item2, item1_size);
5488
5489                 path->slots[0] = slot;
5490                 btrfs_set_item_key_unsafe(root, path, &k2);
5491                 path->slots[0] = slot + 1;
5492                 btrfs_set_item_key_unsafe(root, path, &k1);
5493         }
5494         return 0;
5495 }
5496
5497 static int fix_key_order(struct btrfs_trans_handle *trans,
5498                          struct btrfs_root *root,
5499                          struct btrfs_path *path)
5500 {
5501         struct extent_buffer *buf;
5502         struct btrfs_key k1, k2;
5503         int i;
5504         int level = path->lowest_level;
5505         int ret = -EIO;
5506
5507         buf = path->nodes[level];
5508         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5509                 if (level) {
5510                         btrfs_node_key_to_cpu(buf, &k1, i);
5511                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5512                 } else {
5513                         btrfs_item_key_to_cpu(buf, &k1, i);
5514                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5515                 }
5516                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5517                         continue;
5518                 ret = swap_values(root, path, buf, i);
5519                 if (ret)
5520                         break;
5521                 btrfs_mark_buffer_dirty(buf);
5522                 i = 0;
5523         }
5524         return ret;
5525 }
5526
5527 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5528                              struct btrfs_root *root,
5529                              struct btrfs_path *path,
5530                              struct extent_buffer *buf, int slot)
5531 {
5532         struct btrfs_key key;
5533         int nritems = btrfs_header_nritems(buf);
5534
5535         btrfs_item_key_to_cpu(buf, &key, slot);
5536
5537         /* These are all the keys we can deal with missing. */
5538         if (key.type != BTRFS_DIR_INDEX_KEY &&
5539             key.type != BTRFS_EXTENT_ITEM_KEY &&
5540             key.type != BTRFS_METADATA_ITEM_KEY &&
5541             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5542             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5543                 return -1;
5544
5545         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5546                (unsigned long long)key.objectid, key.type,
5547                (unsigned long long)key.offset, slot, buf->start);
5548         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5549                               btrfs_item_nr_offset(slot + 1),
5550                               sizeof(struct btrfs_item) *
5551                               (nritems - slot - 1));
5552         btrfs_set_header_nritems(buf, nritems - 1);
5553         if (slot == 0) {
5554                 struct btrfs_disk_key disk_key;
5555
5556                 btrfs_item_key(buf, &disk_key, 0);
5557                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5558         }
5559         btrfs_mark_buffer_dirty(buf);
5560         return 0;
5561 }
5562
5563 static int fix_item_offset(struct btrfs_trans_handle *trans,
5564                            struct btrfs_root *root,
5565                            struct btrfs_path *path)
5566 {
5567         struct extent_buffer *buf;
5568         int i;
5569         int ret = 0;
5570
5571         /* We should only get this for leaves */
5572         BUG_ON(path->lowest_level);
5573         buf = path->nodes[0];
5574 again:
5575         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5576                 unsigned int shift = 0, offset;
5577
5578                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5579                     BTRFS_LEAF_DATA_SIZE(root)) {
5580                         if (btrfs_item_end_nr(buf, i) >
5581                             BTRFS_LEAF_DATA_SIZE(root)) {
5582                                 ret = delete_bogus_item(trans, root, path,
5583                                                         buf, i);
5584                                 if (!ret)
5585                                         goto again;
5586                                 fprintf(stderr, "item is off the end of the "
5587                                         "leaf, can't fix\n");
5588                                 ret = -EIO;
5589                                 break;
5590                         }
5591                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5592                                 btrfs_item_end_nr(buf, i);
5593                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5594                            btrfs_item_offset_nr(buf, i - 1)) {
5595                         if (btrfs_item_end_nr(buf, i) >
5596                             btrfs_item_offset_nr(buf, i - 1)) {
5597                                 ret = delete_bogus_item(trans, root, path,
5598                                                         buf, i);
5599                                 if (!ret)
5600                                         goto again;
5601                                 fprintf(stderr, "items overlap, can't fix\n");
5602                                 ret = -EIO;
5603                                 break;
5604                         }
5605                         shift = btrfs_item_offset_nr(buf, i - 1) -
5606                                 btrfs_item_end_nr(buf, i);
5607                 }
5608                 if (!shift)
5609                         continue;
5610
5611                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5612                        i, shift, (unsigned long long)buf->start);
5613                 offset = btrfs_item_offset_nr(buf, i);
5614                 memmove_extent_buffer(buf,
5615                                       btrfs_leaf_data(buf) + offset + shift,
5616                                       btrfs_leaf_data(buf) + offset,
5617                                       btrfs_item_size_nr(buf, i));
5618                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5619                                       offset + shift);
5620                 btrfs_mark_buffer_dirty(buf);
5621         }
5622
5623         /*
5624          * We may have moved things, in which case we want to exit so we don't
5625          * write those changes out.  Once we have proper abort functionality in
5626          * progs this can be changed to something nicer.
5627          */
5628         BUG_ON(ret);
5629         return ret;
5630 }
5631
5632 /*
5633  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5634  * then just return -EIO.
5635  */
5636 static int try_to_fix_bad_block(struct btrfs_root *root,
5637                                 struct extent_buffer *buf,
5638                                 enum btrfs_tree_block_status status)
5639 {
5640         struct btrfs_trans_handle *trans;
5641         struct ulist *roots;
5642         struct ulist_node *node;
5643         struct btrfs_root *search_root;
5644         struct btrfs_path path;
5645         struct ulist_iterator iter;
5646         struct btrfs_key root_key, key;
5647         int ret;
5648
5649         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5650             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5651                 return -EIO;
5652
5653         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5654         if (ret)
5655                 return -EIO;
5656
5657         btrfs_init_path(&path);
5658         ULIST_ITER_INIT(&iter);
5659         while ((node = ulist_next(roots, &iter))) {
5660                 root_key.objectid = node->val;
5661                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5662                 root_key.offset = (u64)-1;
5663
5664                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5665                 if (IS_ERR(root)) {
5666                         ret = -EIO;
5667                         break;
5668                 }
5669
5670
5671                 trans = btrfs_start_transaction(search_root, 0);
5672                 if (IS_ERR(trans)) {
5673                         ret = PTR_ERR(trans);
5674                         break;
5675                 }
5676
5677                 path.lowest_level = btrfs_header_level(buf);
5678                 path.skip_check_block = 1;
5679                 if (path.lowest_level)
5680                         btrfs_node_key_to_cpu(buf, &key, 0);
5681                 else
5682                         btrfs_item_key_to_cpu(buf, &key, 0);
5683                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5684                 if (ret) {
5685                         ret = -EIO;
5686                         btrfs_commit_transaction(trans, search_root);
5687                         break;
5688                 }
5689                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5690                         ret = fix_key_order(trans, search_root, &path);
5691                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5692                         ret = fix_item_offset(trans, search_root, &path);
5693                 if (ret) {
5694                         btrfs_commit_transaction(trans, search_root);
5695                         break;
5696                 }
5697                 btrfs_release_path(&path);
5698                 btrfs_commit_transaction(trans, search_root);
5699         }
5700         ulist_free(roots);
5701         btrfs_release_path(&path);
5702         return ret;
5703 }
5704
5705 static int check_block(struct btrfs_root *root,
5706                        struct cache_tree *extent_cache,
5707                        struct extent_buffer *buf, u64 flags)
5708 {
5709         struct extent_record *rec;
5710         struct cache_extent *cache;
5711         struct btrfs_key key;
5712         enum btrfs_tree_block_status status;
5713         int ret = 0;
5714         int level;
5715
5716         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5717         if (!cache)
5718                 return 1;
5719         rec = container_of(cache, struct extent_record, cache);
5720         rec->generation = btrfs_header_generation(buf);
5721
5722         level = btrfs_header_level(buf);
5723         if (btrfs_header_nritems(buf) > 0) {
5724
5725                 if (level == 0)
5726                         btrfs_item_key_to_cpu(buf, &key, 0);
5727                 else
5728                         btrfs_node_key_to_cpu(buf, &key, 0);
5729
5730                 rec->info_objectid = key.objectid;
5731         }
5732         rec->info_level = level;
5733
5734         if (btrfs_is_leaf(buf))
5735                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5736         else
5737                 status = btrfs_check_node(root, &rec->parent_key, buf);
5738
5739         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5740                 if (repair)
5741                         status = try_to_fix_bad_block(root, buf, status);
5742                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5743                         ret = -EIO;
5744                         fprintf(stderr, "bad block %llu\n",
5745                                 (unsigned long long)buf->start);
5746                 } else {
5747                         /*
5748                          * Signal to callers we need to start the scan over
5749                          * again since we'll have cowed blocks.
5750                          */
5751                         ret = -EAGAIN;
5752                 }
5753         } else {
5754                 rec->content_checked = 1;
5755                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5756                         rec->owner_ref_checked = 1;
5757                 else {
5758                         ret = check_owner_ref(root, rec, buf);
5759                         if (!ret)
5760                                 rec->owner_ref_checked = 1;
5761                 }
5762         }
5763         if (!ret)
5764                 maybe_free_extent_rec(extent_cache, rec);
5765         return ret;
5766 }
5767
5768 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5769                                                 u64 parent, u64 root)
5770 {
5771         struct list_head *cur = rec->backrefs.next;
5772         struct extent_backref *node;
5773         struct tree_backref *back;
5774
5775         while(cur != &rec->backrefs) {
5776                 node = to_extent_backref(cur);
5777                 cur = cur->next;
5778                 if (node->is_data)
5779                         continue;
5780                 back = to_tree_backref(node);
5781                 if (parent > 0) {
5782                         if (!node->full_backref)
5783                                 continue;
5784                         if (parent == back->parent)
5785                                 return back;
5786                 } else {
5787                         if (node->full_backref)
5788                                 continue;
5789                         if (back->root == root)
5790                                 return back;
5791                 }
5792         }
5793         return NULL;
5794 }
5795
5796 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5797                                                 u64 parent, u64 root)
5798 {
5799         struct tree_backref *ref = malloc(sizeof(*ref));
5800
5801         if (!ref)
5802                 return NULL;
5803         memset(&ref->node, 0, sizeof(ref->node));
5804         if (parent > 0) {
5805                 ref->parent = parent;
5806                 ref->node.full_backref = 1;
5807         } else {
5808                 ref->root = root;
5809                 ref->node.full_backref = 0;
5810         }
5811         list_add_tail(&ref->node.list, &rec->backrefs);
5812
5813         return ref;
5814 }
5815
5816 static struct data_backref *find_data_backref(struct extent_record *rec,
5817                                                 u64 parent, u64 root,
5818                                                 u64 owner, u64 offset,
5819                                                 int found_ref,
5820                                                 u64 disk_bytenr, u64 bytes)
5821 {
5822         struct list_head *cur = rec->backrefs.next;
5823         struct extent_backref *node;
5824         struct data_backref *back;
5825
5826         while(cur != &rec->backrefs) {
5827                 node = to_extent_backref(cur);
5828                 cur = cur->next;
5829                 if (!node->is_data)
5830                         continue;
5831                 back = to_data_backref(node);
5832                 if (parent > 0) {
5833                         if (!node->full_backref)
5834                                 continue;
5835                         if (parent == back->parent)
5836                                 return back;
5837                 } else {
5838                         if (node->full_backref)
5839                                 continue;
5840                         if (back->root == root && back->owner == owner &&
5841                             back->offset == offset) {
5842                                 if (found_ref && node->found_ref &&
5843                                     (back->bytes != bytes ||
5844                                     back->disk_bytenr != disk_bytenr))
5845                                         continue;
5846                                 return back;
5847                         }
5848                 }
5849         }
5850         return NULL;
5851 }
5852
5853 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5854                                                 u64 parent, u64 root,
5855                                                 u64 owner, u64 offset,
5856                                                 u64 max_size)
5857 {
5858         struct data_backref *ref = malloc(sizeof(*ref));
5859
5860         if (!ref)
5861                 return NULL;
5862         memset(&ref->node, 0, sizeof(ref->node));
5863         ref->node.is_data = 1;
5864
5865         if (parent > 0) {
5866                 ref->parent = parent;
5867                 ref->owner = 0;
5868                 ref->offset = 0;
5869                 ref->node.full_backref = 1;
5870         } else {
5871                 ref->root = root;
5872                 ref->owner = owner;
5873                 ref->offset = offset;
5874                 ref->node.full_backref = 0;
5875         }
5876         ref->bytes = max_size;
5877         ref->found_ref = 0;
5878         ref->num_refs = 0;
5879         list_add_tail(&ref->node.list, &rec->backrefs);
5880         if (max_size > rec->max_size)
5881                 rec->max_size = max_size;
5882         return ref;
5883 }
5884
5885 /* Check if the type of extent matches with its chunk */
5886 static void check_extent_type(struct extent_record *rec)
5887 {
5888         struct btrfs_block_group_cache *bg_cache;
5889
5890         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5891         if (!bg_cache)
5892                 return;
5893
5894         /* data extent, check chunk directly*/
5895         if (!rec->metadata) {
5896                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5897                         rec->wrong_chunk_type = 1;
5898                 return;
5899         }
5900
5901         /* metadata extent, check the obvious case first */
5902         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5903                                  BTRFS_BLOCK_GROUP_METADATA))) {
5904                 rec->wrong_chunk_type = 1;
5905                 return;
5906         }
5907
5908         /*
5909          * Check SYSTEM extent, as it's also marked as metadata, we can only
5910          * make sure it's a SYSTEM extent by its backref
5911          */
5912         if (!list_empty(&rec->backrefs)) {
5913                 struct extent_backref *node;
5914                 struct tree_backref *tback;
5915                 u64 bg_type;
5916
5917                 node = to_extent_backref(rec->backrefs.next);
5918                 if (node->is_data) {
5919                         /* tree block shouldn't have data backref */
5920                         rec->wrong_chunk_type = 1;
5921                         return;
5922                 }
5923                 tback = container_of(node, struct tree_backref, node);
5924
5925                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5926                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5927                 else
5928                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5929                 if (!(bg_cache->flags & bg_type))
5930                         rec->wrong_chunk_type = 1;
5931         }
5932 }
5933
5934 /*
5935  * Allocate a new extent record, fill default values from @tmpl and insert int
5936  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5937  * the cache, otherwise it fails.
5938  */
5939 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5940                 struct extent_record *tmpl)
5941 {
5942         struct extent_record *rec;
5943         int ret = 0;
5944
5945         rec = malloc(sizeof(*rec));
5946         if (!rec)
5947                 return -ENOMEM;
5948         rec->start = tmpl->start;
5949         rec->max_size = tmpl->max_size;
5950         rec->nr = max(tmpl->nr, tmpl->max_size);
5951         rec->found_rec = tmpl->found_rec;
5952         rec->content_checked = tmpl->content_checked;
5953         rec->owner_ref_checked = tmpl->owner_ref_checked;
5954         rec->num_duplicates = 0;
5955         rec->metadata = tmpl->metadata;
5956         rec->flag_block_full_backref = FLAG_UNSET;
5957         rec->bad_full_backref = 0;
5958         rec->crossing_stripes = 0;
5959         rec->wrong_chunk_type = 0;
5960         rec->is_root = tmpl->is_root;
5961         rec->refs = tmpl->refs;
5962         rec->extent_item_refs = tmpl->extent_item_refs;
5963         rec->parent_generation = tmpl->parent_generation;
5964         INIT_LIST_HEAD(&rec->backrefs);
5965         INIT_LIST_HEAD(&rec->dups);
5966         INIT_LIST_HEAD(&rec->list);
5967         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5968         rec->cache.start = tmpl->start;
5969         rec->cache.size = tmpl->nr;
5970         ret = insert_cache_extent(extent_cache, &rec->cache);
5971         if (ret) {
5972                 free(rec);
5973                 return ret;
5974         }
5975         bytes_used += rec->nr;
5976
5977         if (tmpl->metadata)
5978                 rec->crossing_stripes = check_crossing_stripes(global_info,
5979                                 rec->start, global_info->tree_root->nodesize);
5980         check_extent_type(rec);
5981         return ret;
5982 }
5983
5984 /*
5985  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5986  * some are hints:
5987  * - refs              - if found, increase refs
5988  * - is_root           - if found, set
5989  * - content_checked   - if found, set
5990  * - owner_ref_checked - if found, set
5991  *
5992  * If not found, create a new one, initialize and insert.
5993  */
5994 static int add_extent_rec(struct cache_tree *extent_cache,
5995                 struct extent_record *tmpl)
5996 {
5997         struct extent_record *rec;
5998         struct cache_extent *cache;
5999         int ret = 0;
6000         int dup = 0;
6001
6002         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6003         if (cache) {
6004                 rec = container_of(cache, struct extent_record, cache);
6005                 if (tmpl->refs)
6006                         rec->refs++;
6007                 if (rec->nr == 1)
6008                         rec->nr = max(tmpl->nr, tmpl->max_size);
6009
6010                 /*
6011                  * We need to make sure to reset nr to whatever the extent
6012                  * record says was the real size, this way we can compare it to
6013                  * the backrefs.
6014                  */
6015                 if (tmpl->found_rec) {
6016                         if (tmpl->start != rec->start || rec->found_rec) {
6017                                 struct extent_record *tmp;
6018
6019                                 dup = 1;
6020                                 if (list_empty(&rec->list))
6021                                         list_add_tail(&rec->list,
6022                                                       &duplicate_extents);
6023
6024                                 /*
6025                                  * We have to do this song and dance in case we
6026                                  * find an extent record that falls inside of
6027                                  * our current extent record but does not have
6028                                  * the same objectid.
6029                                  */
6030                                 tmp = malloc(sizeof(*tmp));
6031                                 if (!tmp)
6032                                         return -ENOMEM;
6033                                 tmp->start = tmpl->start;
6034                                 tmp->max_size = tmpl->max_size;
6035                                 tmp->nr = tmpl->nr;
6036                                 tmp->found_rec = 1;
6037                                 tmp->metadata = tmpl->metadata;
6038                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6039                                 INIT_LIST_HEAD(&tmp->list);
6040                                 list_add_tail(&tmp->list, &rec->dups);
6041                                 rec->num_duplicates++;
6042                         } else {
6043                                 rec->nr = tmpl->nr;
6044                                 rec->found_rec = 1;
6045                         }
6046                 }
6047
6048                 if (tmpl->extent_item_refs && !dup) {
6049                         if (rec->extent_item_refs) {
6050                                 fprintf(stderr, "block %llu rec "
6051                                         "extent_item_refs %llu, passed %llu\n",
6052                                         (unsigned long long)tmpl->start,
6053                                         (unsigned long long)
6054                                                         rec->extent_item_refs,
6055                                         (unsigned long long)tmpl->extent_item_refs);
6056                         }
6057                         rec->extent_item_refs = tmpl->extent_item_refs;
6058                 }
6059                 if (tmpl->is_root)
6060                         rec->is_root = 1;
6061                 if (tmpl->content_checked)
6062                         rec->content_checked = 1;
6063                 if (tmpl->owner_ref_checked)
6064                         rec->owner_ref_checked = 1;
6065                 memcpy(&rec->parent_key, &tmpl->parent_key,
6066                                 sizeof(tmpl->parent_key));
6067                 if (tmpl->parent_generation)
6068                         rec->parent_generation = tmpl->parent_generation;
6069                 if (rec->max_size < tmpl->max_size)
6070                         rec->max_size = tmpl->max_size;
6071
6072                 /*
6073                  * A metadata extent can't cross stripe_len boundary, otherwise
6074                  * kernel scrub won't be able to handle it.
6075                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6076                  * it.
6077                  */
6078                 if (tmpl->metadata)
6079                         rec->crossing_stripes = check_crossing_stripes(
6080                                         global_info, rec->start,
6081                                         global_info->tree_root->nodesize);
6082                 check_extent_type(rec);
6083                 maybe_free_extent_rec(extent_cache, rec);
6084                 return ret;
6085         }
6086
6087         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6088
6089         return ret;
6090 }
6091
6092 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6093                             u64 parent, u64 root, int found_ref)
6094 {
6095         struct extent_record *rec;
6096         struct tree_backref *back;
6097         struct cache_extent *cache;
6098         int ret;
6099
6100         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6101         if (!cache) {
6102                 struct extent_record tmpl;
6103
6104                 memset(&tmpl, 0, sizeof(tmpl));
6105                 tmpl.start = bytenr;
6106                 tmpl.nr = 1;
6107                 tmpl.metadata = 1;
6108
6109                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6110                 if (ret)
6111                         return ret;
6112
6113                 /* really a bug in cache_extent implement now */
6114                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6115                 if (!cache)
6116                         return -ENOENT;
6117         }
6118
6119         rec = container_of(cache, struct extent_record, cache);
6120         if (rec->start != bytenr) {
6121                 /*
6122                  * Several cause, from unaligned bytenr to over lapping extents
6123                  */
6124                 return -EEXIST;
6125         }
6126
6127         back = find_tree_backref(rec, parent, root);
6128         if (!back) {
6129                 back = alloc_tree_backref(rec, parent, root);
6130                 if (!back)
6131                         return -ENOMEM;
6132         }
6133
6134         if (found_ref) {
6135                 if (back->node.found_ref) {
6136                         fprintf(stderr, "Extent back ref already exists "
6137                                 "for %llu parent %llu root %llu \n",
6138                                 (unsigned long long)bytenr,
6139                                 (unsigned long long)parent,
6140                                 (unsigned long long)root);
6141                 }
6142                 back->node.found_ref = 1;
6143         } else {
6144                 if (back->node.found_extent_tree) {
6145                         fprintf(stderr, "Extent back ref already exists "
6146                                 "for %llu parent %llu root %llu \n",
6147                                 (unsigned long long)bytenr,
6148                                 (unsigned long long)parent,
6149                                 (unsigned long long)root);
6150                 }
6151                 back->node.found_extent_tree = 1;
6152         }
6153         check_extent_type(rec);
6154         maybe_free_extent_rec(extent_cache, rec);
6155         return 0;
6156 }
6157
6158 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6159                             u64 parent, u64 root, u64 owner, u64 offset,
6160                             u32 num_refs, int found_ref, u64 max_size)
6161 {
6162         struct extent_record *rec;
6163         struct data_backref *back;
6164         struct cache_extent *cache;
6165         int ret;
6166
6167         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6168         if (!cache) {
6169                 struct extent_record tmpl;
6170
6171                 memset(&tmpl, 0, sizeof(tmpl));
6172                 tmpl.start = bytenr;
6173                 tmpl.nr = 1;
6174                 tmpl.max_size = max_size;
6175
6176                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6177                 if (ret)
6178                         return ret;
6179
6180                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6181                 if (!cache)
6182                         abort();
6183         }
6184
6185         rec = container_of(cache, struct extent_record, cache);
6186         if (rec->max_size < max_size)
6187                 rec->max_size = max_size;
6188
6189         /*
6190          * If found_ref is set then max_size is the real size and must match the
6191          * existing refs.  So if we have already found a ref then we need to
6192          * make sure that this ref matches the existing one, otherwise we need
6193          * to add a new backref so we can notice that the backrefs don't match
6194          * and we need to figure out who is telling the truth.  This is to
6195          * account for that awful fsync bug I introduced where we'd end up with
6196          * a btrfs_file_extent_item that would have its length include multiple
6197          * prealloc extents or point inside of a prealloc extent.
6198          */
6199         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6200                                  bytenr, max_size);
6201         if (!back) {
6202                 back = alloc_data_backref(rec, parent, root, owner, offset,
6203                                           max_size);
6204                 BUG_ON(!back);
6205         }
6206
6207         if (found_ref) {
6208                 BUG_ON(num_refs != 1);
6209                 if (back->node.found_ref)
6210                         BUG_ON(back->bytes != max_size);
6211                 back->node.found_ref = 1;
6212                 back->found_ref += 1;
6213                 back->bytes = max_size;
6214                 back->disk_bytenr = bytenr;
6215                 rec->refs += 1;
6216                 rec->content_checked = 1;
6217                 rec->owner_ref_checked = 1;
6218         } else {
6219                 if (back->node.found_extent_tree) {
6220                         fprintf(stderr, "Extent back ref already exists "
6221                                 "for %llu parent %llu root %llu "
6222                                 "owner %llu offset %llu num_refs %lu\n",
6223                                 (unsigned long long)bytenr,
6224                                 (unsigned long long)parent,
6225                                 (unsigned long long)root,
6226                                 (unsigned long long)owner,
6227                                 (unsigned long long)offset,
6228                                 (unsigned long)num_refs);
6229                 }
6230                 back->num_refs = num_refs;
6231                 back->node.found_extent_tree = 1;
6232         }
6233         maybe_free_extent_rec(extent_cache, rec);
6234         return 0;
6235 }
6236
6237 static int add_pending(struct cache_tree *pending,
6238                        struct cache_tree *seen, u64 bytenr, u32 size)
6239 {
6240         int ret;
6241         ret = add_cache_extent(seen, bytenr, size);
6242         if (ret)
6243                 return ret;
6244         add_cache_extent(pending, bytenr, size);
6245         return 0;
6246 }
6247
6248 static int pick_next_pending(struct cache_tree *pending,
6249                         struct cache_tree *reada,
6250                         struct cache_tree *nodes,
6251                         u64 last, struct block_info *bits, int bits_nr,
6252                         int *reada_bits)
6253 {
6254         unsigned long node_start = last;
6255         struct cache_extent *cache;
6256         int ret;
6257
6258         cache = search_cache_extent(reada, 0);
6259         if (cache) {
6260                 bits[0].start = cache->start;
6261                 bits[0].size = cache->size;
6262                 *reada_bits = 1;
6263                 return 1;
6264         }
6265         *reada_bits = 0;
6266         if (node_start > 32768)
6267                 node_start -= 32768;
6268
6269         cache = search_cache_extent(nodes, node_start);
6270         if (!cache)
6271                 cache = search_cache_extent(nodes, 0);
6272
6273         if (!cache) {
6274                  cache = search_cache_extent(pending, 0);
6275                  if (!cache)
6276                          return 0;
6277                  ret = 0;
6278                  do {
6279                          bits[ret].start = cache->start;
6280                          bits[ret].size = cache->size;
6281                          cache = next_cache_extent(cache);
6282                          ret++;
6283                  } while (cache && ret < bits_nr);
6284                  return ret;
6285         }
6286
6287         ret = 0;
6288         do {
6289                 bits[ret].start = cache->start;
6290                 bits[ret].size = cache->size;
6291                 cache = next_cache_extent(cache);
6292                 ret++;
6293         } while (cache && ret < bits_nr);
6294
6295         if (bits_nr - ret > 8) {
6296                 u64 lookup = bits[0].start + bits[0].size;
6297                 struct cache_extent *next;
6298                 next = search_cache_extent(pending, lookup);
6299                 while(next) {
6300                         if (next->start - lookup > 32768)
6301                                 break;
6302                         bits[ret].start = next->start;
6303                         bits[ret].size = next->size;
6304                         lookup = next->start + next->size;
6305                         ret++;
6306                         if (ret == bits_nr)
6307                                 break;
6308                         next = next_cache_extent(next);
6309                         if (!next)
6310                                 break;
6311                 }
6312         }
6313         return ret;
6314 }
6315
6316 static void free_chunk_record(struct cache_extent *cache)
6317 {
6318         struct chunk_record *rec;
6319
6320         rec = container_of(cache, struct chunk_record, cache);
6321         list_del_init(&rec->list);
6322         list_del_init(&rec->dextents);
6323         free(rec);
6324 }
6325
6326 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6327 {
6328         cache_tree_free_extents(chunk_cache, free_chunk_record);
6329 }
6330
6331 static void free_device_record(struct rb_node *node)
6332 {
6333         struct device_record *rec;
6334
6335         rec = container_of(node, struct device_record, node);
6336         free(rec);
6337 }
6338
6339 FREE_RB_BASED_TREE(device_cache, free_device_record);
6340
6341 int insert_block_group_record(struct block_group_tree *tree,
6342                               struct block_group_record *bg_rec)
6343 {
6344         int ret;
6345
6346         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6347         if (ret)
6348                 return ret;
6349
6350         list_add_tail(&bg_rec->list, &tree->block_groups);
6351         return 0;
6352 }
6353
6354 static void free_block_group_record(struct cache_extent *cache)
6355 {
6356         struct block_group_record *rec;
6357
6358         rec = container_of(cache, struct block_group_record, cache);
6359         list_del_init(&rec->list);
6360         free(rec);
6361 }
6362
6363 void free_block_group_tree(struct block_group_tree *tree)
6364 {
6365         cache_tree_free_extents(&tree->tree, free_block_group_record);
6366 }
6367
6368 int insert_device_extent_record(struct device_extent_tree *tree,
6369                                 struct device_extent_record *de_rec)
6370 {
6371         int ret;
6372
6373         /*
6374          * Device extent is a bit different from the other extents, because
6375          * the extents which belong to the different devices may have the
6376          * same start and size, so we need use the special extent cache
6377          * search/insert functions.
6378          */
6379         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6380         if (ret)
6381                 return ret;
6382
6383         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6384         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6385         return 0;
6386 }
6387
6388 static void free_device_extent_record(struct cache_extent *cache)
6389 {
6390         struct device_extent_record *rec;
6391
6392         rec = container_of(cache, struct device_extent_record, cache);
6393         if (!list_empty(&rec->chunk_list))
6394                 list_del_init(&rec->chunk_list);
6395         if (!list_empty(&rec->device_list))
6396                 list_del_init(&rec->device_list);
6397         free(rec);
6398 }
6399
6400 void free_device_extent_tree(struct device_extent_tree *tree)
6401 {
6402         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6403 }
6404
6405 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6406 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6407                                  struct extent_buffer *leaf, int slot)
6408 {
6409         struct btrfs_extent_ref_v0 *ref0;
6410         struct btrfs_key key;
6411         int ret;
6412
6413         btrfs_item_key_to_cpu(leaf, &key, slot);
6414         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6415         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6416                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6417                                 0, 0);
6418         } else {
6419                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6420                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6421         }
6422         return ret;
6423 }
6424 #endif
6425
6426 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6427                                             struct btrfs_key *key,
6428                                             int slot)
6429 {
6430         struct btrfs_chunk *ptr;
6431         struct chunk_record *rec;
6432         int num_stripes, i;
6433
6434         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6435         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6436
6437         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6438         if (!rec) {
6439                 fprintf(stderr, "memory allocation failed\n");
6440                 exit(-1);
6441         }
6442
6443         INIT_LIST_HEAD(&rec->list);
6444         INIT_LIST_HEAD(&rec->dextents);
6445         rec->bg_rec = NULL;
6446
6447         rec->cache.start = key->offset;
6448         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6449
6450         rec->generation = btrfs_header_generation(leaf);
6451
6452         rec->objectid = key->objectid;
6453         rec->type = key->type;
6454         rec->offset = key->offset;
6455
6456         rec->length = rec->cache.size;
6457         rec->owner = btrfs_chunk_owner(leaf, ptr);
6458         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6459         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6460         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6461         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6462         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6463         rec->num_stripes = num_stripes;
6464         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6465
6466         for (i = 0; i < rec->num_stripes; ++i) {
6467                 rec->stripes[i].devid =
6468                         btrfs_stripe_devid_nr(leaf, ptr, i);
6469                 rec->stripes[i].offset =
6470                         btrfs_stripe_offset_nr(leaf, ptr, i);
6471                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6472                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6473                                 BTRFS_UUID_SIZE);
6474         }
6475
6476         return rec;
6477 }
6478
6479 static int process_chunk_item(struct cache_tree *chunk_cache,
6480                               struct btrfs_key *key, struct extent_buffer *eb,
6481                               int slot)
6482 {
6483         struct chunk_record *rec;
6484         struct btrfs_chunk *chunk;
6485         int ret = 0;
6486
6487         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6488         /*
6489          * Do extra check for this chunk item,
6490          *
6491          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6492          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6493          * and owner<->key_type check.
6494          */
6495         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6496                                       key->offset);
6497         if (ret < 0) {
6498                 error("chunk(%llu, %llu) is not valid, ignore it",
6499                       key->offset, btrfs_chunk_length(eb, chunk));
6500                 return 0;
6501         }
6502         rec = btrfs_new_chunk_record(eb, key, slot);
6503         ret = insert_cache_extent(chunk_cache, &rec->cache);
6504         if (ret) {
6505                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6506                         rec->offset, rec->length);
6507                 free(rec);
6508         }
6509
6510         return ret;
6511 }
6512
6513 static int process_device_item(struct rb_root *dev_cache,
6514                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6515 {
6516         struct btrfs_dev_item *ptr;
6517         struct device_record *rec;
6518         int ret = 0;
6519
6520         ptr = btrfs_item_ptr(eb,
6521                 slot, struct btrfs_dev_item);
6522
6523         rec = malloc(sizeof(*rec));
6524         if (!rec) {
6525                 fprintf(stderr, "memory allocation failed\n");
6526                 return -ENOMEM;
6527         }
6528
6529         rec->devid = key->offset;
6530         rec->generation = btrfs_header_generation(eb);
6531
6532         rec->objectid = key->objectid;
6533         rec->type = key->type;
6534         rec->offset = key->offset;
6535
6536         rec->devid = btrfs_device_id(eb, ptr);
6537         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6538         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6539
6540         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6541         if (ret) {
6542                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6543                 free(rec);
6544         }
6545
6546         return ret;
6547 }
6548
6549 struct block_group_record *
6550 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6551                              int slot)
6552 {
6553         struct btrfs_block_group_item *ptr;
6554         struct block_group_record *rec;
6555
6556         rec = calloc(1, sizeof(*rec));
6557         if (!rec) {
6558                 fprintf(stderr, "memory allocation failed\n");
6559                 exit(-1);
6560         }
6561
6562         rec->cache.start = key->objectid;
6563         rec->cache.size = key->offset;
6564
6565         rec->generation = btrfs_header_generation(leaf);
6566
6567         rec->objectid = key->objectid;
6568         rec->type = key->type;
6569         rec->offset = key->offset;
6570
6571         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6572         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6573
6574         INIT_LIST_HEAD(&rec->list);
6575
6576         return rec;
6577 }
6578
6579 static int process_block_group_item(struct block_group_tree *block_group_cache,
6580                                     struct btrfs_key *key,
6581                                     struct extent_buffer *eb, int slot)
6582 {
6583         struct block_group_record *rec;
6584         int ret = 0;
6585
6586         rec = btrfs_new_block_group_record(eb, key, slot);
6587         ret = insert_block_group_record(block_group_cache, rec);
6588         if (ret) {
6589                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6590                         rec->objectid, rec->offset);
6591                 free(rec);
6592         }
6593
6594         return ret;
6595 }
6596
6597 struct device_extent_record *
6598 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6599                                struct btrfs_key *key, int slot)
6600 {
6601         struct device_extent_record *rec;
6602         struct btrfs_dev_extent *ptr;
6603
6604         rec = calloc(1, sizeof(*rec));
6605         if (!rec) {
6606                 fprintf(stderr, "memory allocation failed\n");
6607                 exit(-1);
6608         }
6609
6610         rec->cache.objectid = key->objectid;
6611         rec->cache.start = key->offset;
6612
6613         rec->generation = btrfs_header_generation(leaf);
6614
6615         rec->objectid = key->objectid;
6616         rec->type = key->type;
6617         rec->offset = key->offset;
6618
6619         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6620         rec->chunk_objecteid =
6621                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6622         rec->chunk_offset =
6623                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6624         rec->length = btrfs_dev_extent_length(leaf, ptr);
6625         rec->cache.size = rec->length;
6626
6627         INIT_LIST_HEAD(&rec->chunk_list);
6628         INIT_LIST_HEAD(&rec->device_list);
6629
6630         return rec;
6631 }
6632
6633 static int
6634 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6635                            struct btrfs_key *key, struct extent_buffer *eb,
6636                            int slot)
6637 {
6638         struct device_extent_record *rec;
6639         int ret;
6640
6641         rec = btrfs_new_device_extent_record(eb, key, slot);
6642         ret = insert_device_extent_record(dev_extent_cache, rec);
6643         if (ret) {
6644                 fprintf(stderr,
6645                         "Device extent[%llu, %llu, %llu] existed.\n",
6646                         rec->objectid, rec->offset, rec->length);
6647                 free(rec);
6648         }
6649
6650         return ret;
6651 }
6652
6653 static int process_extent_item(struct btrfs_root *root,
6654                                struct cache_tree *extent_cache,
6655                                struct extent_buffer *eb, int slot)
6656 {
6657         struct btrfs_extent_item *ei;
6658         struct btrfs_extent_inline_ref *iref;
6659         struct btrfs_extent_data_ref *dref;
6660         struct btrfs_shared_data_ref *sref;
6661         struct btrfs_key key;
6662         struct extent_record tmpl;
6663         unsigned long end;
6664         unsigned long ptr;
6665         int ret;
6666         int type;
6667         u32 item_size = btrfs_item_size_nr(eb, slot);
6668         u64 refs = 0;
6669         u64 offset;
6670         u64 num_bytes;
6671         int metadata = 0;
6672
6673         btrfs_item_key_to_cpu(eb, &key, slot);
6674
6675         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6676                 metadata = 1;
6677                 num_bytes = root->nodesize;
6678         } else {
6679                 num_bytes = key.offset;
6680         }
6681
6682         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6683                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6684                       key.objectid, root->sectorsize);
6685                 return -EIO;
6686         }
6687         if (item_size < sizeof(*ei)) {
6688 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6689                 struct btrfs_extent_item_v0 *ei0;
6690                 BUG_ON(item_size != sizeof(*ei0));
6691                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6692                 refs = btrfs_extent_refs_v0(eb, ei0);
6693 #else
6694                 BUG();
6695 #endif
6696                 memset(&tmpl, 0, sizeof(tmpl));
6697                 tmpl.start = key.objectid;
6698                 tmpl.nr = num_bytes;
6699                 tmpl.extent_item_refs = refs;
6700                 tmpl.metadata = metadata;
6701                 tmpl.found_rec = 1;
6702                 tmpl.max_size = num_bytes;
6703
6704                 return add_extent_rec(extent_cache, &tmpl);
6705         }
6706
6707         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6708         refs = btrfs_extent_refs(eb, ei);
6709         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6710                 metadata = 1;
6711         else
6712                 metadata = 0;
6713         if (metadata && num_bytes != root->nodesize) {
6714                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6715                       num_bytes, root->nodesize);
6716                 return -EIO;
6717         }
6718         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6719                 error("ignore invalid data extent, length %llu is not aligned to %u",
6720                       num_bytes, root->sectorsize);
6721                 return -EIO;
6722         }
6723
6724         memset(&tmpl, 0, sizeof(tmpl));
6725         tmpl.start = key.objectid;
6726         tmpl.nr = num_bytes;
6727         tmpl.extent_item_refs = refs;
6728         tmpl.metadata = metadata;
6729         tmpl.found_rec = 1;
6730         tmpl.max_size = num_bytes;
6731         add_extent_rec(extent_cache, &tmpl);
6732
6733         ptr = (unsigned long)(ei + 1);
6734         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6735             key.type == BTRFS_EXTENT_ITEM_KEY)
6736                 ptr += sizeof(struct btrfs_tree_block_info);
6737
6738         end = (unsigned long)ei + item_size;
6739         while (ptr < end) {
6740                 iref = (struct btrfs_extent_inline_ref *)ptr;
6741                 type = btrfs_extent_inline_ref_type(eb, iref);
6742                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6743                 switch (type) {
6744                 case BTRFS_TREE_BLOCK_REF_KEY:
6745                         ret = add_tree_backref(extent_cache, key.objectid,
6746                                         0, offset, 0);
6747                         if (ret < 0)
6748                                 error("add_tree_backref failed: %s",
6749                                       strerror(-ret));
6750                         break;
6751                 case BTRFS_SHARED_BLOCK_REF_KEY:
6752                         ret = add_tree_backref(extent_cache, key.objectid,
6753                                         offset, 0, 0);
6754                         if (ret < 0)
6755                                 error("add_tree_backref failed: %s",
6756                                       strerror(-ret));
6757                         break;
6758                 case BTRFS_EXTENT_DATA_REF_KEY:
6759                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6760                         add_data_backref(extent_cache, key.objectid, 0,
6761                                         btrfs_extent_data_ref_root(eb, dref),
6762                                         btrfs_extent_data_ref_objectid(eb,
6763                                                                        dref),
6764                                         btrfs_extent_data_ref_offset(eb, dref),
6765                                         btrfs_extent_data_ref_count(eb, dref),
6766                                         0, num_bytes);
6767                         break;
6768                 case BTRFS_SHARED_DATA_REF_KEY:
6769                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6770                         add_data_backref(extent_cache, key.objectid, offset,
6771                                         0, 0, 0,
6772                                         btrfs_shared_data_ref_count(eb, sref),
6773                                         0, num_bytes);
6774                         break;
6775                 default:
6776                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6777                                 key.objectid, key.type, num_bytes);
6778                         goto out;
6779                 }
6780                 ptr += btrfs_extent_inline_ref_size(type);
6781         }
6782         WARN_ON(ptr > end);
6783 out:
6784         return 0;
6785 }
6786
6787 static int check_cache_range(struct btrfs_root *root,
6788                              struct btrfs_block_group_cache *cache,
6789                              u64 offset, u64 bytes)
6790 {
6791         struct btrfs_free_space *entry;
6792         u64 *logical;
6793         u64 bytenr;
6794         int stripe_len;
6795         int i, nr, ret;
6796
6797         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6798                 bytenr = btrfs_sb_offset(i);
6799                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6800                                        cache->key.objectid, bytenr, 0,
6801                                        &logical, &nr, &stripe_len);
6802                 if (ret)
6803                         return ret;
6804
6805                 while (nr--) {
6806                         if (logical[nr] + stripe_len <= offset)
6807                                 continue;
6808                         if (offset + bytes <= logical[nr])
6809                                 continue;
6810                         if (logical[nr] == offset) {
6811                                 if (stripe_len >= bytes) {
6812                                         free(logical);
6813                                         return 0;
6814                                 }
6815                                 bytes -= stripe_len;
6816                                 offset += stripe_len;
6817                         } else if (logical[nr] < offset) {
6818                                 if (logical[nr] + stripe_len >=
6819                                     offset + bytes) {
6820                                         free(logical);
6821                                         return 0;
6822                                 }
6823                                 bytes = (offset + bytes) -
6824                                         (logical[nr] + stripe_len);
6825                                 offset = logical[nr] + stripe_len;
6826                         } else {
6827                                 /*
6828                                  * Could be tricky, the super may land in the
6829                                  * middle of the area we're checking.  First
6830                                  * check the easiest case, it's at the end.
6831                                  */
6832                                 if (logical[nr] + stripe_len >=
6833                                     bytes + offset) {
6834                                         bytes = logical[nr] - offset;
6835                                         continue;
6836                                 }
6837
6838                                 /* Check the left side */
6839                                 ret = check_cache_range(root, cache,
6840                                                         offset,
6841                                                         logical[nr] - offset);
6842                                 if (ret) {
6843                                         free(logical);
6844                                         return ret;
6845                                 }
6846
6847                                 /* Now we continue with the right side */
6848                                 bytes = (offset + bytes) -
6849                                         (logical[nr] + stripe_len);
6850                                 offset = logical[nr] + stripe_len;
6851                         }
6852                 }
6853
6854                 free(logical);
6855         }
6856
6857         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6858         if (!entry) {
6859                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6860                         offset, offset+bytes);
6861                 return -EINVAL;
6862         }
6863
6864         if (entry->offset != offset) {
6865                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6866                         entry->offset);
6867                 return -EINVAL;
6868         }
6869
6870         if (entry->bytes != bytes) {
6871                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6872                         bytes, entry->bytes, offset);
6873                 return -EINVAL;
6874         }
6875
6876         unlink_free_space(cache->free_space_ctl, entry);
6877         free(entry);
6878         return 0;
6879 }
6880
6881 static int verify_space_cache(struct btrfs_root *root,
6882                               struct btrfs_block_group_cache *cache)
6883 {
6884         struct btrfs_path path;
6885         struct extent_buffer *leaf;
6886         struct btrfs_key key;
6887         u64 last;
6888         int ret = 0;
6889
6890         root = root->fs_info->extent_root;
6891
6892         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6893
6894         btrfs_init_path(&path);
6895         key.objectid = last;
6896         key.offset = 0;
6897         key.type = BTRFS_EXTENT_ITEM_KEY;
6898         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6899         if (ret < 0)
6900                 goto out;
6901         ret = 0;
6902         while (1) {
6903                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6904                         ret = btrfs_next_leaf(root, &path);
6905                         if (ret < 0)
6906                                 goto out;
6907                         if (ret > 0) {
6908                                 ret = 0;
6909                                 break;
6910                         }
6911                 }
6912                 leaf = path.nodes[0];
6913                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6914                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6915                         break;
6916                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6917                     key.type != BTRFS_METADATA_ITEM_KEY) {
6918                         path.slots[0]++;
6919                         continue;
6920                 }
6921
6922                 if (last == key.objectid) {
6923                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6924                                 last = key.objectid + key.offset;
6925                         else
6926                                 last = key.objectid + root->nodesize;
6927                         path.slots[0]++;
6928                         continue;
6929                 }
6930
6931                 ret = check_cache_range(root, cache, last,
6932                                         key.objectid - last);
6933                 if (ret)
6934                         break;
6935                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6936                         last = key.objectid + key.offset;
6937                 else
6938                         last = key.objectid + root->nodesize;
6939                 path.slots[0]++;
6940         }
6941
6942         if (last < cache->key.objectid + cache->key.offset)
6943                 ret = check_cache_range(root, cache, last,
6944                                         cache->key.objectid +
6945                                         cache->key.offset - last);
6946
6947 out:
6948         btrfs_release_path(&path);
6949
6950         if (!ret &&
6951             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6952                 fprintf(stderr, "There are still entries left in the space "
6953                         "cache\n");
6954                 ret = -EINVAL;
6955         }
6956
6957         return ret;
6958 }
6959
6960 static int check_space_cache(struct btrfs_root *root)
6961 {
6962         struct btrfs_block_group_cache *cache;
6963         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6964         int ret;
6965         int error = 0;
6966
6967         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6968             btrfs_super_generation(root->fs_info->super_copy) !=
6969             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6970                 printf("cache and super generation don't match, space cache "
6971                        "will be invalidated\n");
6972                 return 0;
6973         }
6974
6975         if (ctx.progress_enabled) {
6976                 ctx.tp = TASK_FREE_SPACE;
6977                 task_start(ctx.info);
6978         }
6979
6980         while (1) {
6981                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6982                 if (!cache)
6983                         break;
6984
6985                 start = cache->key.objectid + cache->key.offset;
6986                 if (!cache->free_space_ctl) {
6987                         if (btrfs_init_free_space_ctl(cache,
6988                                                       root->sectorsize)) {
6989                                 ret = -ENOMEM;
6990                                 break;
6991                         }
6992                 } else {
6993                         btrfs_remove_free_space_cache(cache);
6994                 }
6995
6996                 if (btrfs_fs_compat_ro(root->fs_info,
6997                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6998                         ret = exclude_super_stripes(root, cache);
6999                         if (ret) {
7000                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7001                                         strerror(-ret));
7002                                 error++;
7003                                 continue;
7004                         }
7005                         ret = load_free_space_tree(root->fs_info, cache);
7006                         free_excluded_extents(root, cache);
7007                         if (ret < 0) {
7008                                 fprintf(stderr, "could not load free space tree: %s\n",
7009                                         strerror(-ret));
7010                                 error++;
7011                                 continue;
7012                         }
7013                         error += ret;
7014                 } else {
7015                         ret = load_free_space_cache(root->fs_info, cache);
7016                         if (!ret)
7017                                 continue;
7018                 }
7019
7020                 ret = verify_space_cache(root, cache);
7021                 if (ret) {
7022                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7023                                 cache->key.objectid);
7024                         error++;
7025                 }
7026         }
7027
7028         task_stop(ctx.info);
7029
7030         return error ? -EINVAL : 0;
7031 }
7032
7033 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7034                         u64 num_bytes, unsigned long leaf_offset,
7035                         struct extent_buffer *eb) {
7036
7037         u64 offset = 0;
7038         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7039         char *data;
7040         unsigned long csum_offset;
7041         u32 csum;
7042         u32 csum_expected;
7043         u64 read_len;
7044         u64 data_checked = 0;
7045         u64 tmp;
7046         int ret = 0;
7047         int mirror;
7048         int num_copies;
7049
7050         if (num_bytes % root->sectorsize)
7051                 return -EINVAL;
7052
7053         data = malloc(num_bytes);
7054         if (!data)
7055                 return -ENOMEM;
7056
7057         while (offset < num_bytes) {
7058                 mirror = 0;
7059 again:
7060                 read_len = num_bytes - offset;
7061                 /* read as much space once a time */
7062                 ret = read_extent_data(root, data + offset,
7063                                 bytenr + offset, &read_len, mirror);
7064                 if (ret)
7065                         goto out;
7066                 data_checked = 0;
7067                 /* verify every 4k data's checksum */
7068                 while (data_checked < read_len) {
7069                         csum = ~(u32)0;
7070                         tmp = offset + data_checked;
7071
7072                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
7073                                                csum, root->sectorsize);
7074                         btrfs_csum_final(csum, (u8 *)&csum);
7075
7076                         csum_offset = leaf_offset +
7077                                  tmp / root->sectorsize * csum_size;
7078                         read_extent_buffer(eb, (char *)&csum_expected,
7079                                            csum_offset, csum_size);
7080                         /* try another mirror */
7081                         if (csum != csum_expected) {
7082                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7083                                                 mirror, bytenr + tmp,
7084                                                 csum, csum_expected);
7085                                 num_copies = btrfs_num_copies(
7086                                                 &root->fs_info->mapping_tree,
7087                                                 bytenr, num_bytes);
7088                                 if (mirror < num_copies - 1) {
7089                                         mirror += 1;
7090                                         goto again;
7091                                 }
7092                         }
7093                         data_checked += root->sectorsize;
7094                 }
7095                 offset += read_len;
7096         }
7097 out:
7098         free(data);
7099         return ret;
7100 }
7101
7102 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7103                                u64 num_bytes)
7104 {
7105         struct btrfs_path path;
7106         struct extent_buffer *leaf;
7107         struct btrfs_key key;
7108         int ret;
7109
7110         btrfs_init_path(&path);
7111         key.objectid = bytenr;
7112         key.type = BTRFS_EXTENT_ITEM_KEY;
7113         key.offset = (u64)-1;
7114
7115 again:
7116         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7117                                 0, 0);
7118         if (ret < 0) {
7119                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7120                 btrfs_release_path(&path);
7121                 return ret;
7122         } else if (ret) {
7123                 if (path.slots[0] > 0) {
7124                         path.slots[0]--;
7125                 } else {
7126                         ret = btrfs_prev_leaf(root, &path);
7127                         if (ret < 0) {
7128                                 goto out;
7129                         } else if (ret > 0) {
7130                                 ret = 0;
7131                                 goto out;
7132                         }
7133                 }
7134         }
7135
7136         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7137
7138         /*
7139          * Block group items come before extent items if they have the same
7140          * bytenr, so walk back one more just in case.  Dear future traveller,
7141          * first congrats on mastering time travel.  Now if it's not too much
7142          * trouble could you go back to 2006 and tell Chris to make the
7143          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7144          * EXTENT_ITEM_KEY please?
7145          */
7146         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7147                 if (path.slots[0] > 0) {
7148                         path.slots[0]--;
7149                 } else {
7150                         ret = btrfs_prev_leaf(root, &path);
7151                         if (ret < 0) {
7152                                 goto out;
7153                         } else if (ret > 0) {
7154                                 ret = 0;
7155                                 goto out;
7156                         }
7157                 }
7158                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7159         }
7160
7161         while (num_bytes) {
7162                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7163                         ret = btrfs_next_leaf(root, &path);
7164                         if (ret < 0) {
7165                                 fprintf(stderr, "Error going to next leaf "
7166                                         "%d\n", ret);
7167                                 btrfs_release_path(&path);
7168                                 return ret;
7169                         } else if (ret) {
7170                                 break;
7171                         }
7172                 }
7173                 leaf = path.nodes[0];
7174                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7175                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7176                         path.slots[0]++;
7177                         continue;
7178                 }
7179                 if (key.objectid + key.offset < bytenr) {
7180                         path.slots[0]++;
7181                         continue;
7182                 }
7183                 if (key.objectid > bytenr + num_bytes)
7184                         break;
7185
7186                 if (key.objectid == bytenr) {
7187                         if (key.offset >= num_bytes) {
7188                                 num_bytes = 0;
7189                                 break;
7190                         }
7191                         num_bytes -= key.offset;
7192                         bytenr += key.offset;
7193                 } else if (key.objectid < bytenr) {
7194                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7195                                 num_bytes = 0;
7196                                 break;
7197                         }
7198                         num_bytes = (bytenr + num_bytes) -
7199                                 (key.objectid + key.offset);
7200                         bytenr = key.objectid + key.offset;
7201                 } else {
7202                         if (key.objectid + key.offset < bytenr + num_bytes) {
7203                                 u64 new_start = key.objectid + key.offset;
7204                                 u64 new_bytes = bytenr + num_bytes - new_start;
7205
7206                                 /*
7207                                  * Weird case, the extent is in the middle of
7208                                  * our range, we'll have to search one side
7209                                  * and then the other.  Not sure if this happens
7210                                  * in real life, but no harm in coding it up
7211                                  * anyway just in case.
7212                                  */
7213                                 btrfs_release_path(&path);
7214                                 ret = check_extent_exists(root, new_start,
7215                                                           new_bytes);
7216                                 if (ret) {
7217                                         fprintf(stderr, "Right section didn't "
7218                                                 "have a record\n");
7219                                         break;
7220                                 }
7221                                 num_bytes = key.objectid - bytenr;
7222                                 goto again;
7223                         }
7224                         num_bytes = key.objectid - bytenr;
7225                 }
7226                 path.slots[0]++;
7227         }
7228         ret = 0;
7229
7230 out:
7231         if (num_bytes && !ret) {
7232                 fprintf(stderr, "There are no extents for csum range "
7233                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7234                 ret = 1;
7235         }
7236
7237         btrfs_release_path(&path);
7238         return ret;
7239 }
7240
7241 static int check_csums(struct btrfs_root *root)
7242 {
7243         struct btrfs_path path;
7244         struct extent_buffer *leaf;
7245         struct btrfs_key key;
7246         u64 offset = 0, num_bytes = 0;
7247         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7248         int errors = 0;
7249         int ret;
7250         u64 data_len;
7251         unsigned long leaf_offset;
7252
7253         root = root->fs_info->csum_root;
7254         if (!extent_buffer_uptodate(root->node)) {
7255                 fprintf(stderr, "No valid csum tree found\n");
7256                 return -ENOENT;
7257         }
7258
7259         btrfs_init_path(&path);
7260         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7261         key.type = BTRFS_EXTENT_CSUM_KEY;
7262         key.offset = 0;
7263         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7264         if (ret < 0) {
7265                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7266                 btrfs_release_path(&path);
7267                 return ret;
7268         }
7269
7270         if (ret > 0 && path.slots[0])
7271                 path.slots[0]--;
7272         ret = 0;
7273
7274         while (1) {
7275                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7276                         ret = btrfs_next_leaf(root, &path);
7277                         if (ret < 0) {
7278                                 fprintf(stderr, "Error going to next leaf "
7279                                         "%d\n", ret);
7280                                 break;
7281                         }
7282                         if (ret)
7283                                 break;
7284                 }
7285                 leaf = path.nodes[0];
7286
7287                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7288                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7289                         path.slots[0]++;
7290                         continue;
7291                 }
7292
7293                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7294                               csum_size) * root->sectorsize;
7295                 if (!check_data_csum)
7296                         goto skip_csum_check;
7297                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7298                 ret = check_extent_csums(root, key.offset, data_len,
7299                                          leaf_offset, leaf);
7300                 if (ret)
7301                         break;
7302 skip_csum_check:
7303                 if (!num_bytes) {
7304                         offset = key.offset;
7305                 } else if (key.offset != offset + num_bytes) {
7306                         ret = check_extent_exists(root, offset, num_bytes);
7307                         if (ret) {
7308                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7309                                         "there is no extent record\n",
7310                                         offset, offset+num_bytes);
7311                                 errors++;
7312                         }
7313                         offset = key.offset;
7314                         num_bytes = 0;
7315                 }
7316                 num_bytes += data_len;
7317                 path.slots[0]++;
7318         }
7319
7320         btrfs_release_path(&path);
7321         return errors;
7322 }
7323
7324 static int is_dropped_key(struct btrfs_key *key,
7325                           struct btrfs_key *drop_key) {
7326         if (key->objectid < drop_key->objectid)
7327                 return 1;
7328         else if (key->objectid == drop_key->objectid) {
7329                 if (key->type < drop_key->type)
7330                         return 1;
7331                 else if (key->type == drop_key->type) {
7332                         if (key->offset < drop_key->offset)
7333                                 return 1;
7334                 }
7335         }
7336         return 0;
7337 }
7338
7339 /*
7340  * Here are the rules for FULL_BACKREF.
7341  *
7342  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7343  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7344  *      FULL_BACKREF set.
7345  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7346  *    if it happened after the relocation occurred since we'll have dropped the
7347  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7348  *    have no real way to know for sure.
7349  *
7350  * We process the blocks one root at a time, and we start from the lowest root
7351  * objectid and go to the highest.  So we can just lookup the owner backref for
7352  * the record and if we don't find it then we know it doesn't exist and we have
7353  * a FULL BACKREF.
7354  *
7355  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7356  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7357  * be set or not and then we can check later once we've gathered all the refs.
7358  */
7359 static int calc_extent_flag(struct btrfs_root *root,
7360                            struct cache_tree *extent_cache,
7361                            struct extent_buffer *buf,
7362                            struct root_item_record *ri,
7363                            u64 *flags)
7364 {
7365         struct extent_record *rec;
7366         struct cache_extent *cache;
7367         struct tree_backref *tback;
7368         u64 owner = 0;
7369
7370         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7371         /* we have added this extent before */
7372         if (!cache)
7373                 return -ENOENT;
7374
7375         rec = container_of(cache, struct extent_record, cache);
7376
7377         /*
7378          * Except file/reloc tree, we can not have
7379          * FULL BACKREF MODE
7380          */
7381         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7382                 goto normal;
7383         /*
7384          * root node
7385          */
7386         if (buf->start == ri->bytenr)
7387                 goto normal;
7388
7389         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7390                 goto full_backref;
7391
7392         owner = btrfs_header_owner(buf);
7393         if (owner == ri->objectid)
7394                 goto normal;
7395
7396         tback = find_tree_backref(rec, 0, owner);
7397         if (!tback)
7398                 goto full_backref;
7399 normal:
7400         *flags = 0;
7401         if (rec->flag_block_full_backref != FLAG_UNSET &&
7402             rec->flag_block_full_backref != 0)
7403                 rec->bad_full_backref = 1;
7404         return 0;
7405 full_backref:
7406         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7407         if (rec->flag_block_full_backref != FLAG_UNSET &&
7408             rec->flag_block_full_backref != 1)
7409                 rec->bad_full_backref = 1;
7410         return 0;
7411 }
7412
7413 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7414 {
7415         fprintf(stderr, "Invalid key type(");
7416         print_key_type(stderr, 0, key_type);
7417         fprintf(stderr, ") found in root(");
7418         print_objectid(stderr, rootid, 0);
7419         fprintf(stderr, ")\n");
7420 }
7421
7422 /*
7423  * Check if the key is valid with its extent buffer.
7424  *
7425  * This is a early check in case invalid key exists in a extent buffer
7426  * This is not comprehensive yet, but should prevent wrong key/item passed
7427  * further
7428  */
7429 static int check_type_with_root(u64 rootid, u8 key_type)
7430 {
7431         switch (key_type) {
7432         /* Only valid in chunk tree */
7433         case BTRFS_DEV_ITEM_KEY:
7434         case BTRFS_CHUNK_ITEM_KEY:
7435                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7436                         goto err;
7437                 break;
7438         /* valid in csum and log tree */
7439         case BTRFS_CSUM_TREE_OBJECTID:
7440                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7441                       is_fstree(rootid)))
7442                         goto err;
7443                 break;
7444         case BTRFS_EXTENT_ITEM_KEY:
7445         case BTRFS_METADATA_ITEM_KEY:
7446         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7447                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7448                         goto err;
7449                 break;
7450         case BTRFS_ROOT_ITEM_KEY:
7451                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7452                         goto err;
7453                 break;
7454         case BTRFS_DEV_EXTENT_KEY:
7455                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7456                         goto err;
7457                 break;
7458         }
7459         return 0;
7460 err:
7461         report_mismatch_key_root(key_type, rootid);
7462         return -EINVAL;
7463 }
7464
7465 static int run_next_block(struct btrfs_root *root,
7466                           struct block_info *bits,
7467                           int bits_nr,
7468                           u64 *last,
7469                           struct cache_tree *pending,
7470                           struct cache_tree *seen,
7471                           struct cache_tree *reada,
7472                           struct cache_tree *nodes,
7473                           struct cache_tree *extent_cache,
7474                           struct cache_tree *chunk_cache,
7475                           struct rb_root *dev_cache,
7476                           struct block_group_tree *block_group_cache,
7477                           struct device_extent_tree *dev_extent_cache,
7478                           struct root_item_record *ri)
7479 {
7480         struct extent_buffer *buf;
7481         struct extent_record *rec = NULL;
7482         u64 bytenr;
7483         u32 size;
7484         u64 parent;
7485         u64 owner;
7486         u64 flags;
7487         u64 ptr;
7488         u64 gen = 0;
7489         int ret = 0;
7490         int i;
7491         int nritems;
7492         struct btrfs_key key;
7493         struct cache_extent *cache;
7494         int reada_bits;
7495
7496         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7497                                     bits_nr, &reada_bits);
7498         if (nritems == 0)
7499                 return 1;
7500
7501         if (!reada_bits) {
7502                 for(i = 0; i < nritems; i++) {
7503                         ret = add_cache_extent(reada, bits[i].start,
7504                                                bits[i].size);
7505                         if (ret == -EEXIST)
7506                                 continue;
7507
7508                         /* fixme, get the parent transid */
7509                         readahead_tree_block(root, bits[i].start,
7510                                              bits[i].size, 0);
7511                 }
7512         }
7513         *last = bits[0].start;
7514         bytenr = bits[0].start;
7515         size = bits[0].size;
7516
7517         cache = lookup_cache_extent(pending, bytenr, size);
7518         if (cache) {
7519                 remove_cache_extent(pending, cache);
7520                 free(cache);
7521         }
7522         cache = lookup_cache_extent(reada, bytenr, size);
7523         if (cache) {
7524                 remove_cache_extent(reada, cache);
7525                 free(cache);
7526         }
7527         cache = lookup_cache_extent(nodes, bytenr, size);
7528         if (cache) {
7529                 remove_cache_extent(nodes, cache);
7530                 free(cache);
7531         }
7532         cache = lookup_cache_extent(extent_cache, bytenr, size);
7533         if (cache) {
7534                 rec = container_of(cache, struct extent_record, cache);
7535                 gen = rec->parent_generation;
7536         }
7537
7538         /* fixme, get the real parent transid */
7539         buf = read_tree_block(root, bytenr, size, gen);
7540         if (!extent_buffer_uptodate(buf)) {
7541                 record_bad_block_io(root->fs_info,
7542                                     extent_cache, bytenr, size);
7543                 goto out;
7544         }
7545
7546         nritems = btrfs_header_nritems(buf);
7547
7548         flags = 0;
7549         if (!init_extent_tree) {
7550                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7551                                        btrfs_header_level(buf), 1, NULL,
7552                                        &flags);
7553                 if (ret < 0) {
7554                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7555                         if (ret < 0) {
7556                                 fprintf(stderr, "Couldn't calc extent flags\n");
7557                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7558                         }
7559                 }
7560         } else {
7561                 flags = 0;
7562                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7563                 if (ret < 0) {
7564                         fprintf(stderr, "Couldn't calc extent flags\n");
7565                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7566                 }
7567         }
7568
7569         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7570                 if (ri != NULL &&
7571                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7572                     ri->objectid == btrfs_header_owner(buf)) {
7573                         /*
7574                          * Ok we got to this block from it's original owner and
7575                          * we have FULL_BACKREF set.  Relocation can leave
7576                          * converted blocks over so this is altogether possible,
7577                          * however it's not possible if the generation > the
7578                          * last snapshot, so check for this case.
7579                          */
7580                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7581                             btrfs_header_generation(buf) > ri->last_snapshot) {
7582                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7583                                 rec->bad_full_backref = 1;
7584                         }
7585                 }
7586         } else {
7587                 if (ri != NULL &&
7588                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7589                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7590                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7591                         rec->bad_full_backref = 1;
7592                 }
7593         }
7594
7595         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7596                 rec->flag_block_full_backref = 1;
7597                 parent = bytenr;
7598                 owner = 0;
7599         } else {
7600                 rec->flag_block_full_backref = 0;
7601                 parent = 0;
7602                 owner = btrfs_header_owner(buf);
7603         }
7604
7605         ret = check_block(root, extent_cache, buf, flags);
7606         if (ret)
7607                 goto out;
7608
7609         if (btrfs_is_leaf(buf)) {
7610                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7611                 for (i = 0; i < nritems; i++) {
7612                         struct btrfs_file_extent_item *fi;
7613                         btrfs_item_key_to_cpu(buf, &key, i);
7614                         /*
7615                          * Check key type against the leaf owner.
7616                          * Could filter quite a lot of early error if
7617                          * owner is correct
7618                          */
7619                         if (check_type_with_root(btrfs_header_owner(buf),
7620                                                  key.type)) {
7621                                 fprintf(stderr, "ignoring invalid key\n");
7622                                 continue;
7623                         }
7624                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7625                                 process_extent_item(root, extent_cache, buf,
7626                                                     i);
7627                                 continue;
7628                         }
7629                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7630                                 process_extent_item(root, extent_cache, buf,
7631                                                     i);
7632                                 continue;
7633                         }
7634                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7635                                 total_csum_bytes +=
7636                                         btrfs_item_size_nr(buf, i);
7637                                 continue;
7638                         }
7639                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7640                                 process_chunk_item(chunk_cache, &key, buf, i);
7641                                 continue;
7642                         }
7643                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7644                                 process_device_item(dev_cache, &key, buf, i);
7645                                 continue;
7646                         }
7647                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7648                                 process_block_group_item(block_group_cache,
7649                                         &key, buf, i);
7650                                 continue;
7651                         }
7652                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7653                                 process_device_extent_item(dev_extent_cache,
7654                                         &key, buf, i);
7655                                 continue;
7656
7657                         }
7658                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7659 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7660                                 process_extent_ref_v0(extent_cache, buf, i);
7661 #else
7662                                 BUG();
7663 #endif
7664                                 continue;
7665                         }
7666
7667                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7668                                 ret = add_tree_backref(extent_cache,
7669                                                 key.objectid, 0, key.offset, 0);
7670                                 if (ret < 0)
7671                                         error("add_tree_backref failed: %s",
7672                                               strerror(-ret));
7673                                 continue;
7674                         }
7675                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7676                                 ret = add_tree_backref(extent_cache,
7677                                                 key.objectid, key.offset, 0, 0);
7678                                 if (ret < 0)
7679                                         error("add_tree_backref failed: %s",
7680                                               strerror(-ret));
7681                                 continue;
7682                         }
7683                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7684                                 struct btrfs_extent_data_ref *ref;
7685                                 ref = btrfs_item_ptr(buf, i,
7686                                                 struct btrfs_extent_data_ref);
7687                                 add_data_backref(extent_cache,
7688                                         key.objectid, 0,
7689                                         btrfs_extent_data_ref_root(buf, ref),
7690                                         btrfs_extent_data_ref_objectid(buf,
7691                                                                        ref),
7692                                         btrfs_extent_data_ref_offset(buf, ref),
7693                                         btrfs_extent_data_ref_count(buf, ref),
7694                                         0, root->sectorsize);
7695                                 continue;
7696                         }
7697                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7698                                 struct btrfs_shared_data_ref *ref;
7699                                 ref = btrfs_item_ptr(buf, i,
7700                                                 struct btrfs_shared_data_ref);
7701                                 add_data_backref(extent_cache,
7702                                         key.objectid, key.offset, 0, 0, 0,
7703                                         btrfs_shared_data_ref_count(buf, ref),
7704                                         0, root->sectorsize);
7705                                 continue;
7706                         }
7707                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7708                                 struct bad_item *bad;
7709
7710                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7711                                         continue;
7712                                 if (!owner)
7713                                         continue;
7714                                 bad = malloc(sizeof(struct bad_item));
7715                                 if (!bad)
7716                                         continue;
7717                                 INIT_LIST_HEAD(&bad->list);
7718                                 memcpy(&bad->key, &key,
7719                                        sizeof(struct btrfs_key));
7720                                 bad->root_id = owner;
7721                                 list_add_tail(&bad->list, &delete_items);
7722                                 continue;
7723                         }
7724                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7725                                 continue;
7726                         fi = btrfs_item_ptr(buf, i,
7727                                             struct btrfs_file_extent_item);
7728                         if (btrfs_file_extent_type(buf, fi) ==
7729                             BTRFS_FILE_EXTENT_INLINE)
7730                                 continue;
7731                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7732                                 continue;
7733
7734                         data_bytes_allocated +=
7735                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7736                         if (data_bytes_allocated < root->sectorsize) {
7737                                 abort();
7738                         }
7739                         data_bytes_referenced +=
7740                                 btrfs_file_extent_num_bytes(buf, fi);
7741                         add_data_backref(extent_cache,
7742                                 btrfs_file_extent_disk_bytenr(buf, fi),
7743                                 parent, owner, key.objectid, key.offset -
7744                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7745                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7746                 }
7747         } else {
7748                 int level;
7749                 struct btrfs_key first_key;
7750
7751                 first_key.objectid = 0;
7752
7753                 if (nritems > 0)
7754                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7755                 level = btrfs_header_level(buf);
7756                 for (i = 0; i < nritems; i++) {
7757                         struct extent_record tmpl;
7758
7759                         ptr = btrfs_node_blockptr(buf, i);
7760                         size = root->nodesize;
7761                         btrfs_node_key_to_cpu(buf, &key, i);
7762                         if (ri != NULL) {
7763                                 if ((level == ri->drop_level)
7764                                     && is_dropped_key(&key, &ri->drop_key)) {
7765                                         continue;
7766                                 }
7767                         }
7768
7769                         memset(&tmpl, 0, sizeof(tmpl));
7770                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7771                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7772                         tmpl.start = ptr;
7773                         tmpl.nr = size;
7774                         tmpl.refs = 1;
7775                         tmpl.metadata = 1;
7776                         tmpl.max_size = size;
7777                         ret = add_extent_rec(extent_cache, &tmpl);
7778                         if (ret < 0)
7779                                 goto out;
7780
7781                         ret = add_tree_backref(extent_cache, ptr, parent,
7782                                         owner, 1);
7783                         if (ret < 0) {
7784                                 error("add_tree_backref failed: %s",
7785                                       strerror(-ret));
7786                                 continue;
7787                         }
7788
7789                         if (level > 1) {
7790                                 add_pending(nodes, seen, ptr, size);
7791                         } else {
7792                                 add_pending(pending, seen, ptr, size);
7793                         }
7794                 }
7795                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7796                                       nritems) * sizeof(struct btrfs_key_ptr);
7797         }
7798         total_btree_bytes += buf->len;
7799         if (fs_root_objectid(btrfs_header_owner(buf)))
7800                 total_fs_tree_bytes += buf->len;
7801         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7802                 total_extent_tree_bytes += buf->len;
7803         if (!found_old_backref &&
7804             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7805             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7806             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7807                 found_old_backref = 1;
7808 out:
7809         free_extent_buffer(buf);
7810         return ret;
7811 }
7812
7813 static int add_root_to_pending(struct extent_buffer *buf,
7814                                struct cache_tree *extent_cache,
7815                                struct cache_tree *pending,
7816                                struct cache_tree *seen,
7817                                struct cache_tree *nodes,
7818                                u64 objectid)
7819 {
7820         struct extent_record tmpl;
7821         int ret;
7822
7823         if (btrfs_header_level(buf) > 0)
7824                 add_pending(nodes, seen, buf->start, buf->len);
7825         else
7826                 add_pending(pending, seen, buf->start, buf->len);
7827
7828         memset(&tmpl, 0, sizeof(tmpl));
7829         tmpl.start = buf->start;
7830         tmpl.nr = buf->len;
7831         tmpl.is_root = 1;
7832         tmpl.refs = 1;
7833         tmpl.metadata = 1;
7834         tmpl.max_size = buf->len;
7835         add_extent_rec(extent_cache, &tmpl);
7836
7837         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7838             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7839                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7840                                 0, 1);
7841         else
7842                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7843                                 1);
7844         return ret;
7845 }
7846
7847 /* as we fix the tree, we might be deleting blocks that
7848  * we're tracking for repair.  This hook makes sure we
7849  * remove any backrefs for blocks as we are fixing them.
7850  */
7851 static int free_extent_hook(struct btrfs_trans_handle *trans,
7852                             struct btrfs_root *root,
7853                             u64 bytenr, u64 num_bytes, u64 parent,
7854                             u64 root_objectid, u64 owner, u64 offset,
7855                             int refs_to_drop)
7856 {
7857         struct extent_record *rec;
7858         struct cache_extent *cache;
7859         int is_data;
7860         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7861
7862         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7863         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7864         if (!cache)
7865                 return 0;
7866
7867         rec = container_of(cache, struct extent_record, cache);
7868         if (is_data) {
7869                 struct data_backref *back;
7870                 back = find_data_backref(rec, parent, root_objectid, owner,
7871                                          offset, 1, bytenr, num_bytes);
7872                 if (!back)
7873                         goto out;
7874                 if (back->node.found_ref) {
7875                         back->found_ref -= refs_to_drop;
7876                         if (rec->refs)
7877                                 rec->refs -= refs_to_drop;
7878                 }
7879                 if (back->node.found_extent_tree) {
7880                         back->num_refs -= refs_to_drop;
7881                         if (rec->extent_item_refs)
7882                                 rec->extent_item_refs -= refs_to_drop;
7883                 }
7884                 if (back->found_ref == 0)
7885                         back->node.found_ref = 0;
7886                 if (back->num_refs == 0)
7887                         back->node.found_extent_tree = 0;
7888
7889                 if (!back->node.found_extent_tree && back->node.found_ref) {
7890                         list_del(&back->node.list);
7891                         free(back);
7892                 }
7893         } else {
7894                 struct tree_backref *back;
7895                 back = find_tree_backref(rec, parent, root_objectid);
7896                 if (!back)
7897                         goto out;
7898                 if (back->node.found_ref) {
7899                         if (rec->refs)
7900                                 rec->refs--;
7901                         back->node.found_ref = 0;
7902                 }
7903                 if (back->node.found_extent_tree) {
7904                         if (rec->extent_item_refs)
7905                                 rec->extent_item_refs--;
7906                         back->node.found_extent_tree = 0;
7907                 }
7908                 if (!back->node.found_extent_tree && back->node.found_ref) {
7909                         list_del(&back->node.list);
7910                         free(back);
7911                 }
7912         }
7913         maybe_free_extent_rec(extent_cache, rec);
7914 out:
7915         return 0;
7916 }
7917
7918 static int delete_extent_records(struct btrfs_trans_handle *trans,
7919                                  struct btrfs_root *root,
7920                                  struct btrfs_path *path,
7921                                  u64 bytenr, u64 new_len)
7922 {
7923         struct btrfs_key key;
7924         struct btrfs_key found_key;
7925         struct extent_buffer *leaf;
7926         int ret;
7927         int slot;
7928
7929
7930         key.objectid = bytenr;
7931         key.type = (u8)-1;
7932         key.offset = (u64)-1;
7933
7934         while(1) {
7935                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7936                                         &key, path, 0, 1);
7937                 if (ret < 0)
7938                         break;
7939
7940                 if (ret > 0) {
7941                         ret = 0;
7942                         if (path->slots[0] == 0)
7943                                 break;
7944                         path->slots[0]--;
7945                 }
7946                 ret = 0;
7947
7948                 leaf = path->nodes[0];
7949                 slot = path->slots[0];
7950
7951                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7952                 if (found_key.objectid != bytenr)
7953                         break;
7954
7955                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7956                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7957                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7958                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7959                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7960                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7961                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7962                         btrfs_release_path(path);
7963                         if (found_key.type == 0) {
7964                                 if (found_key.offset == 0)
7965                                         break;
7966                                 key.offset = found_key.offset - 1;
7967                                 key.type = found_key.type;
7968                         }
7969                         key.type = found_key.type - 1;
7970                         key.offset = (u64)-1;
7971                         continue;
7972                 }
7973
7974                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7975                         found_key.objectid, found_key.type, found_key.offset);
7976
7977                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7978                 if (ret)
7979                         break;
7980                 btrfs_release_path(path);
7981
7982                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7983                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7984                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7985                                 found_key.offset : root->nodesize;
7986
7987                         ret = btrfs_update_block_group(trans, root, bytenr,
7988                                                        bytes, 0, 0);
7989                         if (ret)
7990                                 break;
7991                 }
7992         }
7993
7994         btrfs_release_path(path);
7995         return ret;
7996 }
7997
7998 /*
7999  * for a single backref, this will allocate a new extent
8000  * and add the backref to it.
8001  */
8002 static int record_extent(struct btrfs_trans_handle *trans,
8003                          struct btrfs_fs_info *info,
8004                          struct btrfs_path *path,
8005                          struct extent_record *rec,
8006                          struct extent_backref *back,
8007                          int allocated, u64 flags)
8008 {
8009         int ret;
8010         struct btrfs_root *extent_root = info->extent_root;
8011         struct extent_buffer *leaf;
8012         struct btrfs_key ins_key;
8013         struct btrfs_extent_item *ei;
8014         struct data_backref *dback;
8015         struct btrfs_tree_block_info *bi;
8016
8017         if (!back->is_data)
8018                 rec->max_size = max_t(u64, rec->max_size,
8019                                     info->extent_root->nodesize);
8020
8021         if (!allocated) {
8022                 u32 item_size = sizeof(*ei);
8023
8024                 if (!back->is_data)
8025                         item_size += sizeof(*bi);
8026
8027                 ins_key.objectid = rec->start;
8028                 ins_key.offset = rec->max_size;
8029                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8030
8031                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8032                                         &ins_key, item_size);
8033                 if (ret)
8034                         goto fail;
8035
8036                 leaf = path->nodes[0];
8037                 ei = btrfs_item_ptr(leaf, path->slots[0],
8038                                     struct btrfs_extent_item);
8039
8040                 btrfs_set_extent_refs(leaf, ei, 0);
8041                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8042
8043                 if (back->is_data) {
8044                         btrfs_set_extent_flags(leaf, ei,
8045                                                BTRFS_EXTENT_FLAG_DATA);
8046                 } else {
8047                         struct btrfs_disk_key copy_key;;
8048
8049                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8050                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8051                                              sizeof(*bi));
8052
8053                         btrfs_set_disk_key_objectid(&copy_key,
8054                                                     rec->info_objectid);
8055                         btrfs_set_disk_key_type(&copy_key, 0);
8056                         btrfs_set_disk_key_offset(&copy_key, 0);
8057
8058                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8059                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8060
8061                         btrfs_set_extent_flags(leaf, ei,
8062                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8063                 }
8064
8065                 btrfs_mark_buffer_dirty(leaf);
8066                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8067                                                rec->max_size, 1, 0);
8068                 if (ret)
8069                         goto fail;
8070                 btrfs_release_path(path);
8071         }
8072
8073         if (back->is_data) {
8074                 u64 parent;
8075                 int i;
8076
8077                 dback = to_data_backref(back);
8078                 if (back->full_backref)
8079                         parent = dback->parent;
8080                 else
8081                         parent = 0;
8082
8083                 for (i = 0; i < dback->found_ref; i++) {
8084                         /* if parent != 0, we're doing a full backref
8085                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8086                          * just makes the backref allocator create a data
8087                          * backref
8088                          */
8089                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8090                                                    rec->start, rec->max_size,
8091                                                    parent,
8092                                                    dback->root,
8093                                                    parent ?
8094                                                    BTRFS_FIRST_FREE_OBJECTID :
8095                                                    dback->owner,
8096                                                    dback->offset);
8097                         if (ret)
8098                                 break;
8099                 }
8100                 fprintf(stderr, "adding new data backref"
8101                                 " on %llu %s %llu owner %llu"
8102                                 " offset %llu found %d\n",
8103                                 (unsigned long long)rec->start,
8104                                 back->full_backref ?
8105                                 "parent" : "root",
8106                                 back->full_backref ?
8107                                 (unsigned long long)parent :
8108                                 (unsigned long long)dback->root,
8109                                 (unsigned long long)dback->owner,
8110                                 (unsigned long long)dback->offset,
8111                                 dback->found_ref);
8112         } else {
8113                 u64 parent;
8114                 struct tree_backref *tback;
8115
8116                 tback = to_tree_backref(back);
8117                 if (back->full_backref)
8118                         parent = tback->parent;
8119                 else
8120                         parent = 0;
8121
8122                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8123                                            rec->start, rec->max_size,
8124                                            parent, tback->root, 0, 0);
8125                 fprintf(stderr, "adding new tree backref on "
8126                         "start %llu len %llu parent %llu root %llu\n",
8127                         rec->start, rec->max_size, parent, tback->root);
8128         }
8129 fail:
8130         btrfs_release_path(path);
8131         return ret;
8132 }
8133
8134 static struct extent_entry *find_entry(struct list_head *entries,
8135                                        u64 bytenr, u64 bytes)
8136 {
8137         struct extent_entry *entry = NULL;
8138
8139         list_for_each_entry(entry, entries, list) {
8140                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8141                         return entry;
8142         }
8143
8144         return NULL;
8145 }
8146
8147 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8148 {
8149         struct extent_entry *entry, *best = NULL, *prev = NULL;
8150
8151         list_for_each_entry(entry, entries, list) {
8152                 /*
8153                  * If there are as many broken entries as entries then we know
8154                  * not to trust this particular entry.
8155                  */
8156                 if (entry->broken == entry->count)
8157                         continue;
8158
8159                 /*
8160                  * Special case, when there are only two entries and 'best' is
8161                  * the first one
8162                  */
8163                 if (!prev) {
8164                         best = entry;
8165                         prev = entry;
8166                         continue;
8167                 }
8168
8169                 /*
8170                  * If our current entry == best then we can't be sure our best
8171                  * is really the best, so we need to keep searching.
8172                  */
8173                 if (best && best->count == entry->count) {
8174                         prev = entry;
8175                         best = NULL;
8176                         continue;
8177                 }
8178
8179                 /* Prev == entry, not good enough, have to keep searching */
8180                 if (!prev->broken && prev->count == entry->count)
8181                         continue;
8182
8183                 if (!best)
8184                         best = (prev->count > entry->count) ? prev : entry;
8185                 else if (best->count < entry->count)
8186                         best = entry;
8187                 prev = entry;
8188         }
8189
8190         return best;
8191 }
8192
8193 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8194                       struct data_backref *dback, struct extent_entry *entry)
8195 {
8196         struct btrfs_trans_handle *trans;
8197         struct btrfs_root *root;
8198         struct btrfs_file_extent_item *fi;
8199         struct extent_buffer *leaf;
8200         struct btrfs_key key;
8201         u64 bytenr, bytes;
8202         int ret, err;
8203
8204         key.objectid = dback->root;
8205         key.type = BTRFS_ROOT_ITEM_KEY;
8206         key.offset = (u64)-1;
8207         root = btrfs_read_fs_root(info, &key);
8208         if (IS_ERR(root)) {
8209                 fprintf(stderr, "Couldn't find root for our ref\n");
8210                 return -EINVAL;
8211         }
8212
8213         /*
8214          * The backref points to the original offset of the extent if it was
8215          * split, so we need to search down to the offset we have and then walk
8216          * forward until we find the backref we're looking for.
8217          */
8218         key.objectid = dback->owner;
8219         key.type = BTRFS_EXTENT_DATA_KEY;
8220         key.offset = dback->offset;
8221         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8222         if (ret < 0) {
8223                 fprintf(stderr, "Error looking up ref %d\n", ret);
8224                 return ret;
8225         }
8226
8227         while (1) {
8228                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8229                         ret = btrfs_next_leaf(root, path);
8230                         if (ret) {
8231                                 fprintf(stderr, "Couldn't find our ref, next\n");
8232                                 return -EINVAL;
8233                         }
8234                 }
8235                 leaf = path->nodes[0];
8236                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8237                 if (key.objectid != dback->owner ||
8238                     key.type != BTRFS_EXTENT_DATA_KEY) {
8239                         fprintf(stderr, "Couldn't find our ref, search\n");
8240                         return -EINVAL;
8241                 }
8242                 fi = btrfs_item_ptr(leaf, path->slots[0],
8243                                     struct btrfs_file_extent_item);
8244                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8245                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8246
8247                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8248                         break;
8249                 path->slots[0]++;
8250         }
8251
8252         btrfs_release_path(path);
8253
8254         trans = btrfs_start_transaction(root, 1);
8255         if (IS_ERR(trans))
8256                 return PTR_ERR(trans);
8257
8258         /*
8259          * Ok we have the key of the file extent we want to fix, now we can cow
8260          * down to the thing and fix it.
8261          */
8262         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8263         if (ret < 0) {
8264                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8265                         key.objectid, key.type, key.offset, ret);
8266                 goto out;
8267         }
8268         if (ret > 0) {
8269                 fprintf(stderr, "Well that's odd, we just found this key "
8270                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8271                         key.offset);
8272                 ret = -EINVAL;
8273                 goto out;
8274         }
8275         leaf = path->nodes[0];
8276         fi = btrfs_item_ptr(leaf, path->slots[0],
8277                             struct btrfs_file_extent_item);
8278
8279         if (btrfs_file_extent_compression(leaf, fi) &&
8280             dback->disk_bytenr != entry->bytenr) {
8281                 fprintf(stderr, "Ref doesn't match the record start and is "
8282                         "compressed, please take a btrfs-image of this file "
8283                         "system and send it to a btrfs developer so they can "
8284                         "complete this functionality for bytenr %Lu\n",
8285                         dback->disk_bytenr);
8286                 ret = -EINVAL;
8287                 goto out;
8288         }
8289
8290         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8291                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8292         } else if (dback->disk_bytenr > entry->bytenr) {
8293                 u64 off_diff, offset;
8294
8295                 off_diff = dback->disk_bytenr - entry->bytenr;
8296                 offset = btrfs_file_extent_offset(leaf, fi);
8297                 if (dback->disk_bytenr + offset +
8298                     btrfs_file_extent_num_bytes(leaf, fi) >
8299                     entry->bytenr + entry->bytes) {
8300                         fprintf(stderr, "Ref is past the entry end, please "
8301                                 "take a btrfs-image of this file system and "
8302                                 "send it to a btrfs developer, ref %Lu\n",
8303                                 dback->disk_bytenr);
8304                         ret = -EINVAL;
8305                         goto out;
8306                 }
8307                 offset += off_diff;
8308                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8309                 btrfs_set_file_extent_offset(leaf, fi, offset);
8310         } else if (dback->disk_bytenr < entry->bytenr) {
8311                 u64 offset;
8312
8313                 offset = btrfs_file_extent_offset(leaf, fi);
8314                 if (dback->disk_bytenr + offset < entry->bytenr) {
8315                         fprintf(stderr, "Ref is before the entry start, please"
8316                                 " take a btrfs-image of this file system and "
8317                                 "send it to a btrfs developer, ref %Lu\n",
8318                                 dback->disk_bytenr);
8319                         ret = -EINVAL;
8320                         goto out;
8321                 }
8322
8323                 offset += dback->disk_bytenr;
8324                 offset -= entry->bytenr;
8325                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8326                 btrfs_set_file_extent_offset(leaf, fi, offset);
8327         }
8328
8329         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8330
8331         /*
8332          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8333          * only do this if we aren't using compression, otherwise it's a
8334          * trickier case.
8335          */
8336         if (!btrfs_file_extent_compression(leaf, fi))
8337                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8338         else
8339                 printf("ram bytes may be wrong?\n");
8340         btrfs_mark_buffer_dirty(leaf);
8341 out:
8342         err = btrfs_commit_transaction(trans, root);
8343         btrfs_release_path(path);
8344         return ret ? ret : err;
8345 }
8346
8347 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8348                            struct extent_record *rec)
8349 {
8350         struct extent_backref *back;
8351         struct data_backref *dback;
8352         struct extent_entry *entry, *best = NULL;
8353         LIST_HEAD(entries);
8354         int nr_entries = 0;
8355         int broken_entries = 0;
8356         int ret = 0;
8357         short mismatch = 0;
8358
8359         /*
8360          * Metadata is easy and the backrefs should always agree on bytenr and
8361          * size, if not we've got bigger issues.
8362          */
8363         if (rec->metadata)
8364                 return 0;
8365
8366         list_for_each_entry(back, &rec->backrefs, list) {
8367                 if (back->full_backref || !back->is_data)
8368                         continue;
8369
8370                 dback = to_data_backref(back);
8371
8372                 /*
8373                  * We only pay attention to backrefs that we found a real
8374                  * backref for.
8375                  */
8376                 if (dback->found_ref == 0)
8377                         continue;
8378
8379                 /*
8380                  * For now we only catch when the bytes don't match, not the
8381                  * bytenr.  We can easily do this at the same time, but I want
8382                  * to have a fs image to test on before we just add repair
8383                  * functionality willy-nilly so we know we won't screw up the
8384                  * repair.
8385                  */
8386
8387                 entry = find_entry(&entries, dback->disk_bytenr,
8388                                    dback->bytes);
8389                 if (!entry) {
8390                         entry = malloc(sizeof(struct extent_entry));
8391                         if (!entry) {
8392                                 ret = -ENOMEM;
8393                                 goto out;
8394                         }
8395                         memset(entry, 0, sizeof(*entry));
8396                         entry->bytenr = dback->disk_bytenr;
8397                         entry->bytes = dback->bytes;
8398                         list_add_tail(&entry->list, &entries);
8399                         nr_entries++;
8400                 }
8401
8402                 /*
8403                  * If we only have on entry we may think the entries agree when
8404                  * in reality they don't so we have to do some extra checking.
8405                  */
8406                 if (dback->disk_bytenr != rec->start ||
8407                     dback->bytes != rec->nr || back->broken)
8408                         mismatch = 1;
8409
8410                 if (back->broken) {
8411                         entry->broken++;
8412                         broken_entries++;
8413                 }
8414
8415                 entry->count++;
8416         }
8417
8418         /* Yay all the backrefs agree, carry on good sir */
8419         if (nr_entries <= 1 && !mismatch)
8420                 goto out;
8421
8422         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8423                 "%Lu\n", rec->start);
8424
8425         /*
8426          * First we want to see if the backrefs can agree amongst themselves who
8427          * is right, so figure out which one of the entries has the highest
8428          * count.
8429          */
8430         best = find_most_right_entry(&entries);
8431
8432         /*
8433          * Ok so we may have an even split between what the backrefs think, so
8434          * this is where we use the extent ref to see what it thinks.
8435          */
8436         if (!best) {
8437                 entry = find_entry(&entries, rec->start, rec->nr);
8438                 if (!entry && (!broken_entries || !rec->found_rec)) {
8439                         fprintf(stderr, "Backrefs don't agree with each other "
8440                                 "and extent record doesn't agree with anybody,"
8441                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8442                                 rec->start, rec->nr);
8443                         ret = -EINVAL;
8444                         goto out;
8445                 } else if (!entry) {
8446                         /*
8447                          * Ok our backrefs were broken, we'll assume this is the
8448                          * correct value and add an entry for this range.
8449                          */
8450                         entry = malloc(sizeof(struct extent_entry));
8451                         if (!entry) {
8452                                 ret = -ENOMEM;
8453                                 goto out;
8454                         }
8455                         memset(entry, 0, sizeof(*entry));
8456                         entry->bytenr = rec->start;
8457                         entry->bytes = rec->nr;
8458                         list_add_tail(&entry->list, &entries);
8459                         nr_entries++;
8460                 }
8461                 entry->count++;
8462                 best = find_most_right_entry(&entries);
8463                 if (!best) {
8464                         fprintf(stderr, "Backrefs and extent record evenly "
8465                                 "split on who is right, this is going to "
8466                                 "require user input to fix bytenr %Lu bytes "
8467                                 "%Lu\n", rec->start, rec->nr);
8468                         ret = -EINVAL;
8469                         goto out;
8470                 }
8471         }
8472
8473         /*
8474          * I don't think this can happen currently as we'll abort() if we catch
8475          * this case higher up, but in case somebody removes that we still can't
8476          * deal with it properly here yet, so just bail out of that's the case.
8477          */
8478         if (best->bytenr != rec->start) {
8479                 fprintf(stderr, "Extent start and backref starts don't match, "
8480                         "please use btrfs-image on this file system and send "
8481                         "it to a btrfs developer so they can make fsck fix "
8482                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8483                         rec->start, rec->nr);
8484                 ret = -EINVAL;
8485                 goto out;
8486         }
8487
8488         /*
8489          * Ok great we all agreed on an extent record, let's go find the real
8490          * references and fix up the ones that don't match.
8491          */
8492         list_for_each_entry(back, &rec->backrefs, list) {
8493                 if (back->full_backref || !back->is_data)
8494                         continue;
8495
8496                 dback = to_data_backref(back);
8497
8498                 /*
8499                  * Still ignoring backrefs that don't have a real ref attached
8500                  * to them.
8501                  */
8502                 if (dback->found_ref == 0)
8503                         continue;
8504
8505                 if (dback->bytes == best->bytes &&
8506                     dback->disk_bytenr == best->bytenr)
8507                         continue;
8508
8509                 ret = repair_ref(info, path, dback, best);
8510                 if (ret)
8511                         goto out;
8512         }
8513
8514         /*
8515          * Ok we messed with the actual refs, which means we need to drop our
8516          * entire cache and go back and rescan.  I know this is a huge pain and
8517          * adds a lot of extra work, but it's the only way to be safe.  Once all
8518          * the backrefs agree we may not need to do anything to the extent
8519          * record itself.
8520          */
8521         ret = -EAGAIN;
8522 out:
8523         while (!list_empty(&entries)) {
8524                 entry = list_entry(entries.next, struct extent_entry, list);
8525                 list_del_init(&entry->list);
8526                 free(entry);
8527         }
8528         return ret;
8529 }
8530
8531 static int process_duplicates(struct btrfs_root *root,
8532                               struct cache_tree *extent_cache,
8533                               struct extent_record *rec)
8534 {
8535         struct extent_record *good, *tmp;
8536         struct cache_extent *cache;
8537         int ret;
8538
8539         /*
8540          * If we found a extent record for this extent then return, or if we
8541          * have more than one duplicate we are likely going to need to delete
8542          * something.
8543          */
8544         if (rec->found_rec || rec->num_duplicates > 1)
8545                 return 0;
8546
8547         /* Shouldn't happen but just in case */
8548         BUG_ON(!rec->num_duplicates);
8549
8550         /*
8551          * So this happens if we end up with a backref that doesn't match the
8552          * actual extent entry.  So either the backref is bad or the extent
8553          * entry is bad.  Either way we want to have the extent_record actually
8554          * reflect what we found in the extent_tree, so we need to take the
8555          * duplicate out and use that as the extent_record since the only way we
8556          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8557          */
8558         remove_cache_extent(extent_cache, &rec->cache);
8559
8560         good = to_extent_record(rec->dups.next);
8561         list_del_init(&good->list);
8562         INIT_LIST_HEAD(&good->backrefs);
8563         INIT_LIST_HEAD(&good->dups);
8564         good->cache.start = good->start;
8565         good->cache.size = good->nr;
8566         good->content_checked = 0;
8567         good->owner_ref_checked = 0;
8568         good->num_duplicates = 0;
8569         good->refs = rec->refs;
8570         list_splice_init(&rec->backrefs, &good->backrefs);
8571         while (1) {
8572                 cache = lookup_cache_extent(extent_cache, good->start,
8573                                             good->nr);
8574                 if (!cache)
8575                         break;
8576                 tmp = container_of(cache, struct extent_record, cache);
8577
8578                 /*
8579                  * If we find another overlapping extent and it's found_rec is
8580                  * set then it's a duplicate and we need to try and delete
8581                  * something.
8582                  */
8583                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8584                         if (list_empty(&good->list))
8585                                 list_add_tail(&good->list,
8586                                               &duplicate_extents);
8587                         good->num_duplicates += tmp->num_duplicates + 1;
8588                         list_splice_init(&tmp->dups, &good->dups);
8589                         list_del_init(&tmp->list);
8590                         list_add_tail(&tmp->list, &good->dups);
8591                         remove_cache_extent(extent_cache, &tmp->cache);
8592                         continue;
8593                 }
8594
8595                 /*
8596                  * Ok we have another non extent item backed extent rec, so lets
8597                  * just add it to this extent and carry on like we did above.
8598                  */
8599                 good->refs += tmp->refs;
8600                 list_splice_init(&tmp->backrefs, &good->backrefs);
8601                 remove_cache_extent(extent_cache, &tmp->cache);
8602                 free(tmp);
8603         }
8604         ret = insert_cache_extent(extent_cache, &good->cache);
8605         BUG_ON(ret);
8606         free(rec);
8607         return good->num_duplicates ? 0 : 1;
8608 }
8609
8610 static int delete_duplicate_records(struct btrfs_root *root,
8611                                     struct extent_record *rec)
8612 {
8613         struct btrfs_trans_handle *trans;
8614         LIST_HEAD(delete_list);
8615         struct btrfs_path path;
8616         struct extent_record *tmp, *good, *n;
8617         int nr_del = 0;
8618         int ret = 0, err;
8619         struct btrfs_key key;
8620
8621         btrfs_init_path(&path);
8622
8623         good = rec;
8624         /* Find the record that covers all of the duplicates. */
8625         list_for_each_entry(tmp, &rec->dups, list) {
8626                 if (good->start < tmp->start)
8627                         continue;
8628                 if (good->nr > tmp->nr)
8629                         continue;
8630
8631                 if (tmp->start + tmp->nr < good->start + good->nr) {
8632                         fprintf(stderr, "Ok we have overlapping extents that "
8633                                 "aren't completely covered by each other, this "
8634                                 "is going to require more careful thought.  "
8635                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8636                                 tmp->start, tmp->nr, good->start, good->nr);
8637                         abort();
8638                 }
8639                 good = tmp;
8640         }
8641
8642         if (good != rec)
8643                 list_add_tail(&rec->list, &delete_list);
8644
8645         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8646                 if (tmp == good)
8647                         continue;
8648                 list_move_tail(&tmp->list, &delete_list);
8649         }
8650
8651         root = root->fs_info->extent_root;
8652         trans = btrfs_start_transaction(root, 1);
8653         if (IS_ERR(trans)) {
8654                 ret = PTR_ERR(trans);
8655                 goto out;
8656         }
8657
8658         list_for_each_entry(tmp, &delete_list, list) {
8659                 if (tmp->found_rec == 0)
8660                         continue;
8661                 key.objectid = tmp->start;
8662                 key.type = BTRFS_EXTENT_ITEM_KEY;
8663                 key.offset = tmp->nr;
8664
8665                 /* Shouldn't happen but just in case */
8666                 if (tmp->metadata) {
8667                         fprintf(stderr, "Well this shouldn't happen, extent "
8668                                 "record overlaps but is metadata? "
8669                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8670                         abort();
8671                 }
8672
8673                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8674                 if (ret) {
8675                         if (ret > 0)
8676                                 ret = -EINVAL;
8677                         break;
8678                 }
8679                 ret = btrfs_del_item(trans, root, &path);
8680                 if (ret)
8681                         break;
8682                 btrfs_release_path(&path);
8683                 nr_del++;
8684         }
8685         err = btrfs_commit_transaction(trans, root);
8686         if (err && !ret)
8687                 ret = err;
8688 out:
8689         while (!list_empty(&delete_list)) {
8690                 tmp = to_extent_record(delete_list.next);
8691                 list_del_init(&tmp->list);
8692                 if (tmp == rec)
8693                         continue;
8694                 free(tmp);
8695         }
8696
8697         while (!list_empty(&rec->dups)) {
8698                 tmp = to_extent_record(rec->dups.next);
8699                 list_del_init(&tmp->list);
8700                 free(tmp);
8701         }
8702
8703         btrfs_release_path(&path);
8704
8705         if (!ret && !nr_del)
8706                 rec->num_duplicates = 0;
8707
8708         return ret ? ret : nr_del;
8709 }
8710
8711 static int find_possible_backrefs(struct btrfs_fs_info *info,
8712                                   struct btrfs_path *path,
8713                                   struct cache_tree *extent_cache,
8714                                   struct extent_record *rec)
8715 {
8716         struct btrfs_root *root;
8717         struct extent_backref *back;
8718         struct data_backref *dback;
8719         struct cache_extent *cache;
8720         struct btrfs_file_extent_item *fi;
8721         struct btrfs_key key;
8722         u64 bytenr, bytes;
8723         int ret;
8724
8725         list_for_each_entry(back, &rec->backrefs, list) {
8726                 /* Don't care about full backrefs (poor unloved backrefs) */
8727                 if (back->full_backref || !back->is_data)
8728                         continue;
8729
8730                 dback = to_data_backref(back);
8731
8732                 /* We found this one, we don't need to do a lookup */
8733                 if (dback->found_ref)
8734                         continue;
8735
8736                 key.objectid = dback->root;
8737                 key.type = BTRFS_ROOT_ITEM_KEY;
8738                 key.offset = (u64)-1;
8739
8740                 root = btrfs_read_fs_root(info, &key);
8741
8742                 /* No root, definitely a bad ref, skip */
8743                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8744                         continue;
8745                 /* Other err, exit */
8746                 if (IS_ERR(root))
8747                         return PTR_ERR(root);
8748
8749                 key.objectid = dback->owner;
8750                 key.type = BTRFS_EXTENT_DATA_KEY;
8751                 key.offset = dback->offset;
8752                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8753                 if (ret) {
8754                         btrfs_release_path(path);
8755                         if (ret < 0)
8756                                 return ret;
8757                         /* Didn't find it, we can carry on */
8758                         ret = 0;
8759                         continue;
8760                 }
8761
8762                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8763                                     struct btrfs_file_extent_item);
8764                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8765                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8766                 btrfs_release_path(path);
8767                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8768                 if (cache) {
8769                         struct extent_record *tmp;
8770                         tmp = container_of(cache, struct extent_record, cache);
8771
8772                         /*
8773                          * If we found an extent record for the bytenr for this
8774                          * particular backref then we can't add it to our
8775                          * current extent record.  We only want to add backrefs
8776                          * that don't have a corresponding extent item in the
8777                          * extent tree since they likely belong to this record
8778                          * and we need to fix it if it doesn't match bytenrs.
8779                          */
8780                         if  (tmp->found_rec)
8781                                 continue;
8782                 }
8783
8784                 dback->found_ref += 1;
8785                 dback->disk_bytenr = bytenr;
8786                 dback->bytes = bytes;
8787
8788                 /*
8789                  * Set this so the verify backref code knows not to trust the
8790                  * values in this backref.
8791                  */
8792                 back->broken = 1;
8793         }
8794
8795         return 0;
8796 }
8797
8798 /*
8799  * Record orphan data ref into corresponding root.
8800  *
8801  * Return 0 if the extent item contains data ref and recorded.
8802  * Return 1 if the extent item contains no useful data ref
8803  *   On that case, it may contains only shared_dataref or metadata backref
8804  *   or the file extent exists(this should be handled by the extent bytenr
8805  *   recovery routine)
8806  * Return <0 if something goes wrong.
8807  */
8808 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8809                                       struct extent_record *rec)
8810 {
8811         struct btrfs_key key;
8812         struct btrfs_root *dest_root;
8813         struct extent_backref *back;
8814         struct data_backref *dback;
8815         struct orphan_data_extent *orphan;
8816         struct btrfs_path path;
8817         int recorded_data_ref = 0;
8818         int ret = 0;
8819
8820         if (rec->metadata)
8821                 return 1;
8822         btrfs_init_path(&path);
8823         list_for_each_entry(back, &rec->backrefs, list) {
8824                 if (back->full_backref || !back->is_data ||
8825                     !back->found_extent_tree)
8826                         continue;
8827                 dback = to_data_backref(back);
8828                 if (dback->found_ref)
8829                         continue;
8830                 key.objectid = dback->root;
8831                 key.type = BTRFS_ROOT_ITEM_KEY;
8832                 key.offset = (u64)-1;
8833
8834                 dest_root = btrfs_read_fs_root(fs_info, &key);
8835
8836                 /* For non-exist root we just skip it */
8837                 if (IS_ERR(dest_root) || !dest_root)
8838                         continue;
8839
8840                 key.objectid = dback->owner;
8841                 key.type = BTRFS_EXTENT_DATA_KEY;
8842                 key.offset = dback->offset;
8843
8844                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8845                 btrfs_release_path(&path);
8846                 /*
8847                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8848                  * we need to record it for inode/file extent rebuild.
8849                  * For ret > 0, we record it only for file extent rebuild.
8850                  * For ret == 0, the file extent exists but only bytenr
8851                  * mismatch, let the original bytenr fix routine to handle,
8852                  * don't record it.
8853                  */
8854                 if (ret == 0)
8855                         continue;
8856                 ret = 0;
8857                 orphan = malloc(sizeof(*orphan));
8858                 if (!orphan) {
8859                         ret = -ENOMEM;
8860                         goto out;
8861                 }
8862                 INIT_LIST_HEAD(&orphan->list);
8863                 orphan->root = dback->root;
8864                 orphan->objectid = dback->owner;
8865                 orphan->offset = dback->offset;
8866                 orphan->disk_bytenr = rec->cache.start;
8867                 orphan->disk_len = rec->cache.size;
8868                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8869                 recorded_data_ref = 1;
8870         }
8871 out:
8872         btrfs_release_path(&path);
8873         if (!ret)
8874                 return !recorded_data_ref;
8875         else
8876                 return ret;
8877 }
8878
8879 /*
8880  * when an incorrect extent item is found, this will delete
8881  * all of the existing entries for it and recreate them
8882  * based on what the tree scan found.
8883  */
8884 static int fixup_extent_refs(struct btrfs_fs_info *info,
8885                              struct cache_tree *extent_cache,
8886                              struct extent_record *rec)
8887 {
8888         struct btrfs_trans_handle *trans = NULL;
8889         int ret;
8890         struct btrfs_path path;
8891         struct list_head *cur = rec->backrefs.next;
8892         struct cache_extent *cache;
8893         struct extent_backref *back;
8894         int allocated = 0;
8895         u64 flags = 0;
8896
8897         if (rec->flag_block_full_backref)
8898                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8899
8900         btrfs_init_path(&path);
8901         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8902                 /*
8903                  * Sometimes the backrefs themselves are so broken they don't
8904                  * get attached to any meaningful rec, so first go back and
8905                  * check any of our backrefs that we couldn't find and throw
8906                  * them into the list if we find the backref so that
8907                  * verify_backrefs can figure out what to do.
8908                  */
8909                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8910                 if (ret < 0)
8911                         goto out;
8912         }
8913
8914         /* step one, make sure all of the backrefs agree */
8915         ret = verify_backrefs(info, &path, rec);
8916         if (ret < 0)
8917                 goto out;
8918
8919         trans = btrfs_start_transaction(info->extent_root, 1);
8920         if (IS_ERR(trans)) {
8921                 ret = PTR_ERR(trans);
8922                 goto out;
8923         }
8924
8925         /* step two, delete all the existing records */
8926         ret = delete_extent_records(trans, info->extent_root, &path,
8927                                     rec->start, rec->max_size);
8928
8929         if (ret < 0)
8930                 goto out;
8931
8932         /* was this block corrupt?  If so, don't add references to it */
8933         cache = lookup_cache_extent(info->corrupt_blocks,
8934                                     rec->start, rec->max_size);
8935         if (cache) {
8936                 ret = 0;
8937                 goto out;
8938         }
8939
8940         /* step three, recreate all the refs we did find */
8941         while(cur != &rec->backrefs) {
8942                 back = to_extent_backref(cur);
8943                 cur = cur->next;
8944
8945                 /*
8946                  * if we didn't find any references, don't create a
8947                  * new extent record
8948                  */
8949                 if (!back->found_ref)
8950                         continue;
8951
8952                 rec->bad_full_backref = 0;
8953                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8954                 allocated = 1;
8955
8956                 if (ret)
8957                         goto out;
8958         }
8959 out:
8960         if (trans) {
8961                 int err = btrfs_commit_transaction(trans, info->extent_root);
8962                 if (!ret)
8963                         ret = err;
8964         }
8965
8966         btrfs_release_path(&path);
8967         return ret;
8968 }
8969
8970 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8971                               struct extent_record *rec)
8972 {
8973         struct btrfs_trans_handle *trans;
8974         struct btrfs_root *root = fs_info->extent_root;
8975         struct btrfs_path path;
8976         struct btrfs_extent_item *ei;
8977         struct btrfs_key key;
8978         u64 flags;
8979         int ret = 0;
8980
8981         key.objectid = rec->start;
8982         if (rec->metadata) {
8983                 key.type = BTRFS_METADATA_ITEM_KEY;
8984                 key.offset = rec->info_level;
8985         } else {
8986                 key.type = BTRFS_EXTENT_ITEM_KEY;
8987                 key.offset = rec->max_size;
8988         }
8989
8990         trans = btrfs_start_transaction(root, 0);
8991         if (IS_ERR(trans))
8992                 return PTR_ERR(trans);
8993
8994         btrfs_init_path(&path);
8995         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8996         if (ret < 0) {
8997                 btrfs_release_path(&path);
8998                 btrfs_commit_transaction(trans, root);
8999                 return ret;
9000         } else if (ret) {
9001                 fprintf(stderr, "Didn't find extent for %llu\n",
9002                         (unsigned long long)rec->start);
9003                 btrfs_release_path(&path);
9004                 btrfs_commit_transaction(trans, root);
9005                 return -ENOENT;
9006         }
9007
9008         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9009                             struct btrfs_extent_item);
9010         flags = btrfs_extent_flags(path.nodes[0], ei);
9011         if (rec->flag_block_full_backref) {
9012                 fprintf(stderr, "setting full backref on %llu\n",
9013                         (unsigned long long)key.objectid);
9014                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9015         } else {
9016                 fprintf(stderr, "clearing full backref on %llu\n",
9017                         (unsigned long long)key.objectid);
9018                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9019         }
9020         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9021         btrfs_mark_buffer_dirty(path.nodes[0]);
9022         btrfs_release_path(&path);
9023         return btrfs_commit_transaction(trans, root);
9024 }
9025
9026 /* right now we only prune from the extent allocation tree */
9027 static int prune_one_block(struct btrfs_trans_handle *trans,
9028                            struct btrfs_fs_info *info,
9029                            struct btrfs_corrupt_block *corrupt)
9030 {
9031         int ret;
9032         struct btrfs_path path;
9033         struct extent_buffer *eb;
9034         u64 found;
9035         int slot;
9036         int nritems;
9037         int level = corrupt->level + 1;
9038
9039         btrfs_init_path(&path);
9040 again:
9041         /* we want to stop at the parent to our busted block */
9042         path.lowest_level = level;
9043
9044         ret = btrfs_search_slot(trans, info->extent_root,
9045                                 &corrupt->key, &path, -1, 1);
9046
9047         if (ret < 0)
9048                 goto out;
9049
9050         eb = path.nodes[level];
9051         if (!eb) {
9052                 ret = -ENOENT;
9053                 goto out;
9054         }
9055
9056         /*
9057          * hopefully the search gave us the block we want to prune,
9058          * lets try that first
9059          */
9060         slot = path.slots[level];
9061         found =  btrfs_node_blockptr(eb, slot);
9062         if (found == corrupt->cache.start)
9063                 goto del_ptr;
9064
9065         nritems = btrfs_header_nritems(eb);
9066
9067         /* the search failed, lets scan this node and hope we find it */
9068         for (slot = 0; slot < nritems; slot++) {
9069                 found =  btrfs_node_blockptr(eb, slot);
9070                 if (found == corrupt->cache.start)
9071                         goto del_ptr;
9072         }
9073         /*
9074          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9075          * to this block
9076          */
9077         if (eb == info->extent_root->node) {
9078                 ret = -ENOENT;
9079                 goto out;
9080         } else {
9081                 level++;
9082                 btrfs_release_path(&path);
9083                 goto again;
9084         }
9085
9086 del_ptr:
9087         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9088         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
9089
9090 out:
9091         btrfs_release_path(&path);
9092         return ret;
9093 }
9094
9095 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9096 {
9097         struct btrfs_trans_handle *trans = NULL;
9098         struct cache_extent *cache;
9099         struct btrfs_corrupt_block *corrupt;
9100
9101         while (1) {
9102                 cache = search_cache_extent(info->corrupt_blocks, 0);
9103                 if (!cache)
9104                         break;
9105                 if (!trans) {
9106                         trans = btrfs_start_transaction(info->extent_root, 1);
9107                         if (IS_ERR(trans))
9108                                 return PTR_ERR(trans);
9109                 }
9110                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9111                 prune_one_block(trans, info, corrupt);
9112                 remove_cache_extent(info->corrupt_blocks, cache);
9113         }
9114         if (trans)
9115                 return btrfs_commit_transaction(trans, info->extent_root);
9116         return 0;
9117 }
9118
9119 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9120 {
9121         struct btrfs_block_group_cache *cache;
9122         u64 start, end;
9123         int ret;
9124
9125         while (1) {
9126                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9127                                             &start, &end, EXTENT_DIRTY);
9128                 if (ret)
9129                         break;
9130                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9131                                    GFP_NOFS);
9132         }
9133
9134         start = 0;
9135         while (1) {
9136                 cache = btrfs_lookup_first_block_group(fs_info, start);
9137                 if (!cache)
9138                         break;
9139                 if (cache->cached)
9140                         cache->cached = 0;
9141                 start = cache->key.objectid + cache->key.offset;
9142         }
9143 }
9144
9145 static int check_extent_refs(struct btrfs_root *root,
9146                              struct cache_tree *extent_cache)
9147 {
9148         struct extent_record *rec;
9149         struct cache_extent *cache;
9150         int err = 0;
9151         int ret = 0;
9152         int fixed = 0;
9153         int had_dups = 0;
9154         int recorded = 0;
9155
9156         if (repair) {
9157                 /*
9158                  * if we're doing a repair, we have to make sure
9159                  * we don't allocate from the problem extents.
9160                  * In the worst case, this will be all the
9161                  * extents in the FS
9162                  */
9163                 cache = search_cache_extent(extent_cache, 0);
9164                 while(cache) {
9165                         rec = container_of(cache, struct extent_record, cache);
9166                         set_extent_dirty(root->fs_info->excluded_extents,
9167                                          rec->start,
9168                                          rec->start + rec->max_size - 1,
9169                                          GFP_NOFS);
9170                         cache = next_cache_extent(cache);
9171                 }
9172
9173                 /* pin down all the corrupted blocks too */
9174                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9175                 while(cache) {
9176                         set_extent_dirty(root->fs_info->excluded_extents,
9177                                          cache->start,
9178                                          cache->start + cache->size - 1,
9179                                          GFP_NOFS);
9180                         cache = next_cache_extent(cache);
9181                 }
9182                 prune_corrupt_blocks(root->fs_info);
9183                 reset_cached_block_groups(root->fs_info);
9184         }
9185
9186         reset_cached_block_groups(root->fs_info);
9187
9188         /*
9189          * We need to delete any duplicate entries we find first otherwise we
9190          * could mess up the extent tree when we have backrefs that actually
9191          * belong to a different extent item and not the weird duplicate one.
9192          */
9193         while (repair && !list_empty(&duplicate_extents)) {
9194                 rec = to_extent_record(duplicate_extents.next);
9195                 list_del_init(&rec->list);
9196
9197                 /* Sometimes we can find a backref before we find an actual
9198                  * extent, so we need to process it a little bit to see if there
9199                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9200                  * if this is a backref screwup.  If we need to delete stuff
9201                  * process_duplicates() will return 0, otherwise it will return
9202                  * 1 and we
9203                  */
9204                 if (process_duplicates(root, extent_cache, rec))
9205                         continue;
9206                 ret = delete_duplicate_records(root, rec);
9207                 if (ret < 0)
9208                         return ret;
9209                 /*
9210                  * delete_duplicate_records will return the number of entries
9211                  * deleted, so if it's greater than 0 then we know we actually
9212                  * did something and we need to remove.
9213                  */
9214                 if (ret)
9215                         had_dups = 1;
9216         }
9217
9218         if (had_dups)
9219                 return -EAGAIN;
9220
9221         while(1) {
9222                 int cur_err = 0;
9223
9224                 fixed = 0;
9225                 recorded = 0;
9226                 cache = search_cache_extent(extent_cache, 0);
9227                 if (!cache)
9228                         break;
9229                 rec = container_of(cache, struct extent_record, cache);
9230                 if (rec->num_duplicates) {
9231                         fprintf(stderr, "extent item %llu has multiple extent "
9232                                 "items\n", (unsigned long long)rec->start);
9233                         err = 1;
9234                         cur_err = 1;
9235                 }
9236
9237                 if (rec->refs != rec->extent_item_refs) {
9238                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9239                                 (unsigned long long)rec->start,
9240                                 (unsigned long long)rec->nr);
9241                         fprintf(stderr, "extent item %llu, found %llu\n",
9242                                 (unsigned long long)rec->extent_item_refs,
9243                                 (unsigned long long)rec->refs);
9244                         ret = record_orphan_data_extents(root->fs_info, rec);
9245                         if (ret < 0)
9246                                 goto repair_abort;
9247                         if (ret == 0) {
9248                                 recorded = 1;
9249                         } else {
9250                                 /*
9251                                  * we can't use the extent to repair file
9252                                  * extent, let the fallback method handle it.
9253                                  */
9254                                 if (!fixed && repair) {
9255                                         ret = fixup_extent_refs(
9256                                                         root->fs_info,
9257                                                         extent_cache, rec);
9258                                         if (ret)
9259                                                 goto repair_abort;
9260                                         fixed = 1;
9261                                 }
9262                         }
9263                         err = 1;
9264                         cur_err = 1;
9265                 }
9266                 if (all_backpointers_checked(rec, 1)) {
9267                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9268                                 (unsigned long long)rec->start,
9269                                 (unsigned long long)rec->nr);
9270
9271                         if (!fixed && !recorded && repair) {
9272                                 ret = fixup_extent_refs(root->fs_info,
9273                                                         extent_cache, rec);
9274                                 if (ret)
9275                                         goto repair_abort;
9276                                 fixed = 1;
9277                         }
9278                         cur_err = 1;
9279                         err = 1;
9280                 }
9281                 if (!rec->owner_ref_checked) {
9282                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9283                                 (unsigned long long)rec->start,
9284                                 (unsigned long long)rec->nr);
9285                         if (!fixed && !recorded && repair) {
9286                                 ret = fixup_extent_refs(root->fs_info,
9287                                                         extent_cache, rec);
9288                                 if (ret)
9289                                         goto repair_abort;
9290                                 fixed = 1;
9291                         }
9292                         err = 1;
9293                         cur_err = 1;
9294                 }
9295                 if (rec->bad_full_backref) {
9296                         fprintf(stderr, "bad full backref, on [%llu]\n",
9297                                 (unsigned long long)rec->start);
9298                         if (repair) {
9299                                 ret = fixup_extent_flags(root->fs_info, rec);
9300                                 if (ret)
9301                                         goto repair_abort;
9302                                 fixed = 1;
9303                         }
9304                         err = 1;
9305                         cur_err = 1;
9306                 }
9307                 /*
9308                  * Although it's not a extent ref's problem, we reuse this
9309                  * routine for error reporting.
9310                  * No repair function yet.
9311                  */
9312                 if (rec->crossing_stripes) {
9313                         fprintf(stderr,
9314                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9315                                 rec->start, rec->start + rec->max_size);
9316                         err = 1;
9317                         cur_err = 1;
9318                 }
9319
9320                 if (rec->wrong_chunk_type) {
9321                         fprintf(stderr,
9322                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9323                                 rec->start, rec->start + rec->max_size);
9324                         err = 1;
9325                         cur_err = 1;
9326                 }
9327
9328                 remove_cache_extent(extent_cache, cache);
9329                 free_all_extent_backrefs(rec);
9330                 if (!init_extent_tree && repair && (!cur_err || fixed))
9331                         clear_extent_dirty(root->fs_info->excluded_extents,
9332                                            rec->start,
9333                                            rec->start + rec->max_size - 1,
9334                                            GFP_NOFS);
9335                 free(rec);
9336         }
9337 repair_abort:
9338         if (repair) {
9339                 if (ret && ret != -EAGAIN) {
9340                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9341                         exit(1);
9342                 } else if (!ret) {
9343                         struct btrfs_trans_handle *trans;
9344
9345                         root = root->fs_info->extent_root;
9346                         trans = btrfs_start_transaction(root, 1);
9347                         if (IS_ERR(trans)) {
9348                                 ret = PTR_ERR(trans);
9349                                 goto repair_abort;
9350                         }
9351
9352                         btrfs_fix_block_accounting(trans, root);
9353                         ret = btrfs_commit_transaction(trans, root);
9354                         if (ret)
9355                                 goto repair_abort;
9356                 }
9357                 if (err)
9358                         fprintf(stderr, "repaired damaged extent references\n");
9359                 return ret;
9360         }
9361         return err;
9362 }
9363
9364 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9365 {
9366         u64 stripe_size;
9367
9368         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9369                 stripe_size = length;
9370                 stripe_size /= num_stripes;
9371         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9372                 stripe_size = length * 2;
9373                 stripe_size /= num_stripes;
9374         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9375                 stripe_size = length;
9376                 stripe_size /= (num_stripes - 1);
9377         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9378                 stripe_size = length;
9379                 stripe_size /= (num_stripes - 2);
9380         } else {
9381                 stripe_size = length;
9382         }
9383         return stripe_size;
9384 }
9385
9386 /*
9387  * Check the chunk with its block group/dev list ref:
9388  * Return 0 if all refs seems valid.
9389  * Return 1 if part of refs seems valid, need later check for rebuild ref
9390  * like missing block group and needs to search extent tree to rebuild them.
9391  * Return -1 if essential refs are missing and unable to rebuild.
9392  */
9393 static int check_chunk_refs(struct chunk_record *chunk_rec,
9394                             struct block_group_tree *block_group_cache,
9395                             struct device_extent_tree *dev_extent_cache,
9396                             int silent)
9397 {
9398         struct cache_extent *block_group_item;
9399         struct block_group_record *block_group_rec;
9400         struct cache_extent *dev_extent_item;
9401         struct device_extent_record *dev_extent_rec;
9402         u64 devid;
9403         u64 offset;
9404         u64 length;
9405         int metadump_v2 = 0;
9406         int i;
9407         int ret = 0;
9408
9409         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9410                                                chunk_rec->offset,
9411                                                chunk_rec->length);
9412         if (block_group_item) {
9413                 block_group_rec = container_of(block_group_item,
9414                                                struct block_group_record,
9415                                                cache);
9416                 if (chunk_rec->length != block_group_rec->offset ||
9417                     chunk_rec->offset != block_group_rec->objectid ||
9418                     (!metadump_v2 &&
9419                      chunk_rec->type_flags != block_group_rec->flags)) {
9420                         if (!silent)
9421                                 fprintf(stderr,
9422                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9423                                         chunk_rec->objectid,
9424                                         chunk_rec->type,
9425                                         chunk_rec->offset,
9426                                         chunk_rec->length,
9427                                         chunk_rec->offset,
9428                                         chunk_rec->type_flags,
9429                                         block_group_rec->objectid,
9430                                         block_group_rec->type,
9431                                         block_group_rec->offset,
9432                                         block_group_rec->offset,
9433                                         block_group_rec->objectid,
9434                                         block_group_rec->flags);
9435                         ret = -1;
9436                 } else {
9437                         list_del_init(&block_group_rec->list);
9438                         chunk_rec->bg_rec = block_group_rec;
9439                 }
9440         } else {
9441                 if (!silent)
9442                         fprintf(stderr,
9443                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9444                                 chunk_rec->objectid,
9445                                 chunk_rec->type,
9446                                 chunk_rec->offset,
9447                                 chunk_rec->length,
9448                                 chunk_rec->offset,
9449                                 chunk_rec->type_flags);
9450                 ret = 1;
9451         }
9452
9453         if (metadump_v2)
9454                 return ret;
9455
9456         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9457                                     chunk_rec->num_stripes);
9458         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9459                 devid = chunk_rec->stripes[i].devid;
9460                 offset = chunk_rec->stripes[i].offset;
9461                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9462                                                        devid, offset, length);
9463                 if (dev_extent_item) {
9464                         dev_extent_rec = container_of(dev_extent_item,
9465                                                 struct device_extent_record,
9466                                                 cache);
9467                         if (dev_extent_rec->objectid != devid ||
9468                             dev_extent_rec->offset != offset ||
9469                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9470                             dev_extent_rec->length != length) {
9471                                 if (!silent)
9472                                         fprintf(stderr,
9473                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9474                                                 chunk_rec->objectid,
9475                                                 chunk_rec->type,
9476                                                 chunk_rec->offset,
9477                                                 chunk_rec->stripes[i].devid,
9478                                                 chunk_rec->stripes[i].offset,
9479                                                 dev_extent_rec->objectid,
9480                                                 dev_extent_rec->offset,
9481                                                 dev_extent_rec->length);
9482                                 ret = -1;
9483                         } else {
9484                                 list_move(&dev_extent_rec->chunk_list,
9485                                           &chunk_rec->dextents);
9486                         }
9487                 } else {
9488                         if (!silent)
9489                                 fprintf(stderr,
9490                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9491                                         chunk_rec->objectid,
9492                                         chunk_rec->type,
9493                                         chunk_rec->offset,
9494                                         chunk_rec->stripes[i].devid,
9495                                         chunk_rec->stripes[i].offset);
9496                         ret = -1;
9497                 }
9498         }
9499         return ret;
9500 }
9501
9502 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9503 int check_chunks(struct cache_tree *chunk_cache,
9504                  struct block_group_tree *block_group_cache,
9505                  struct device_extent_tree *dev_extent_cache,
9506                  struct list_head *good, struct list_head *bad,
9507                  struct list_head *rebuild, int silent)
9508 {
9509         struct cache_extent *chunk_item;
9510         struct chunk_record *chunk_rec;
9511         struct block_group_record *bg_rec;
9512         struct device_extent_record *dext_rec;
9513         int err;
9514         int ret = 0;
9515
9516         chunk_item = first_cache_extent(chunk_cache);
9517         while (chunk_item) {
9518                 chunk_rec = container_of(chunk_item, struct chunk_record,
9519                                          cache);
9520                 err = check_chunk_refs(chunk_rec, block_group_cache,
9521                                        dev_extent_cache, silent);
9522                 if (err < 0)
9523                         ret = err;
9524                 if (err == 0 && good)
9525                         list_add_tail(&chunk_rec->list, good);
9526                 if (err > 0 && rebuild)
9527                         list_add_tail(&chunk_rec->list, rebuild);
9528                 if (err < 0 && bad)
9529                         list_add_tail(&chunk_rec->list, bad);
9530                 chunk_item = next_cache_extent(chunk_item);
9531         }
9532
9533         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9534                 if (!silent)
9535                         fprintf(stderr,
9536                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9537                                 bg_rec->objectid,
9538                                 bg_rec->offset,
9539                                 bg_rec->flags);
9540                 if (!ret)
9541                         ret = 1;
9542         }
9543
9544         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9545                             chunk_list) {
9546                 if (!silent)
9547                         fprintf(stderr,
9548                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9549                                 dext_rec->objectid,
9550                                 dext_rec->offset,
9551                                 dext_rec->length);
9552                 if (!ret)
9553                         ret = 1;
9554         }
9555         return ret;
9556 }
9557
9558
9559 static int check_device_used(struct device_record *dev_rec,
9560                              struct device_extent_tree *dext_cache)
9561 {
9562         struct cache_extent *cache;
9563         struct device_extent_record *dev_extent_rec;
9564         u64 total_byte = 0;
9565
9566         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9567         while (cache) {
9568                 dev_extent_rec = container_of(cache,
9569                                               struct device_extent_record,
9570                                               cache);
9571                 if (dev_extent_rec->objectid != dev_rec->devid)
9572                         break;
9573
9574                 list_del_init(&dev_extent_rec->device_list);
9575                 total_byte += dev_extent_rec->length;
9576                 cache = next_cache_extent(cache);
9577         }
9578
9579         if (total_byte != dev_rec->byte_used) {
9580                 fprintf(stderr,
9581                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9582                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9583                         dev_rec->type, dev_rec->offset);
9584                 return -1;
9585         } else {
9586                 return 0;
9587         }
9588 }
9589
9590 /* check btrfs_dev_item -> btrfs_dev_extent */
9591 static int check_devices(struct rb_root *dev_cache,
9592                          struct device_extent_tree *dev_extent_cache)
9593 {
9594         struct rb_node *dev_node;
9595         struct device_record *dev_rec;
9596         struct device_extent_record *dext_rec;
9597         int err;
9598         int ret = 0;
9599
9600         dev_node = rb_first(dev_cache);
9601         while (dev_node) {
9602                 dev_rec = container_of(dev_node, struct device_record, node);
9603                 err = check_device_used(dev_rec, dev_extent_cache);
9604                 if (err)
9605                         ret = err;
9606
9607                 dev_node = rb_next(dev_node);
9608         }
9609         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9610                             device_list) {
9611                 fprintf(stderr,
9612                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9613                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9614                 if (!ret)
9615                         ret = 1;
9616         }
9617         return ret;
9618 }
9619
9620 static int add_root_item_to_list(struct list_head *head,
9621                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9622                                   u8 level, u8 drop_level,
9623                                   int level_size, struct btrfs_key *drop_key)
9624 {
9625
9626         struct root_item_record *ri_rec;
9627         ri_rec = malloc(sizeof(*ri_rec));
9628         if (!ri_rec)
9629                 return -ENOMEM;
9630         ri_rec->bytenr = bytenr;
9631         ri_rec->objectid = objectid;
9632         ri_rec->level = level;
9633         ri_rec->level_size = level_size;
9634         ri_rec->drop_level = drop_level;
9635         ri_rec->last_snapshot = last_snapshot;
9636         if (drop_key)
9637                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9638         list_add_tail(&ri_rec->list, head);
9639
9640         return 0;
9641 }
9642
9643 static void free_root_item_list(struct list_head *list)
9644 {
9645         struct root_item_record *ri_rec;
9646
9647         while (!list_empty(list)) {
9648                 ri_rec = list_first_entry(list, struct root_item_record,
9649                                           list);
9650                 list_del_init(&ri_rec->list);
9651                 free(ri_rec);
9652         }
9653 }
9654
9655 static int deal_root_from_list(struct list_head *list,
9656                                struct btrfs_root *root,
9657                                struct block_info *bits,
9658                                int bits_nr,
9659                                struct cache_tree *pending,
9660                                struct cache_tree *seen,
9661                                struct cache_tree *reada,
9662                                struct cache_tree *nodes,
9663                                struct cache_tree *extent_cache,
9664                                struct cache_tree *chunk_cache,
9665                                struct rb_root *dev_cache,
9666                                struct block_group_tree *block_group_cache,
9667                                struct device_extent_tree *dev_extent_cache)
9668 {
9669         int ret = 0;
9670         u64 last;
9671
9672         while (!list_empty(list)) {
9673                 struct root_item_record *rec;
9674                 struct extent_buffer *buf;
9675                 rec = list_entry(list->next,
9676                                  struct root_item_record, list);
9677                 last = 0;
9678                 buf = read_tree_block(root->fs_info->tree_root,
9679                                       rec->bytenr, rec->level_size, 0);
9680                 if (!extent_buffer_uptodate(buf)) {
9681                         free_extent_buffer(buf);
9682                         ret = -EIO;
9683                         break;
9684                 }
9685                 ret = add_root_to_pending(buf, extent_cache, pending,
9686                                     seen, nodes, rec->objectid);
9687                 if (ret < 0)
9688                         break;
9689                 /*
9690                  * To rebuild extent tree, we need deal with snapshot
9691                  * one by one, otherwise we deal with node firstly which
9692                  * can maximize readahead.
9693                  */
9694                 while (1) {
9695                         ret = run_next_block(root, bits, bits_nr, &last,
9696                                              pending, seen, reada, nodes,
9697                                              extent_cache, chunk_cache,
9698                                              dev_cache, block_group_cache,
9699                                              dev_extent_cache, rec);
9700                         if (ret != 0)
9701                                 break;
9702                 }
9703                 free_extent_buffer(buf);
9704                 list_del(&rec->list);
9705                 free(rec);
9706                 if (ret < 0)
9707                         break;
9708         }
9709         while (ret >= 0) {
9710                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9711                                      reada, nodes, extent_cache, chunk_cache,
9712                                      dev_cache, block_group_cache,
9713                                      dev_extent_cache, NULL);
9714                 if (ret != 0) {
9715                         if (ret > 0)
9716                                 ret = 0;
9717                         break;
9718                 }
9719         }
9720         return ret;
9721 }
9722
9723 static int check_chunks_and_extents(struct btrfs_root *root)
9724 {
9725         struct rb_root dev_cache;
9726         struct cache_tree chunk_cache;
9727         struct block_group_tree block_group_cache;
9728         struct device_extent_tree dev_extent_cache;
9729         struct cache_tree extent_cache;
9730         struct cache_tree seen;
9731         struct cache_tree pending;
9732         struct cache_tree reada;
9733         struct cache_tree nodes;
9734         struct extent_io_tree excluded_extents;
9735         struct cache_tree corrupt_blocks;
9736         struct btrfs_path path;
9737         struct btrfs_key key;
9738         struct btrfs_key found_key;
9739         int ret, err = 0;
9740         struct block_info *bits;
9741         int bits_nr;
9742         struct extent_buffer *leaf;
9743         int slot;
9744         struct btrfs_root_item ri;
9745         struct list_head dropping_trees;
9746         struct list_head normal_trees;
9747         struct btrfs_root *root1;
9748         u64 objectid;
9749         u32 level_size;
9750         u8 level;
9751
9752         dev_cache = RB_ROOT;
9753         cache_tree_init(&chunk_cache);
9754         block_group_tree_init(&block_group_cache);
9755         device_extent_tree_init(&dev_extent_cache);
9756
9757         cache_tree_init(&extent_cache);
9758         cache_tree_init(&seen);
9759         cache_tree_init(&pending);
9760         cache_tree_init(&nodes);
9761         cache_tree_init(&reada);
9762         cache_tree_init(&corrupt_blocks);
9763         extent_io_tree_init(&excluded_extents);
9764         INIT_LIST_HEAD(&dropping_trees);
9765         INIT_LIST_HEAD(&normal_trees);
9766
9767         if (repair) {
9768                 root->fs_info->excluded_extents = &excluded_extents;
9769                 root->fs_info->fsck_extent_cache = &extent_cache;
9770                 root->fs_info->free_extent_hook = free_extent_hook;
9771                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9772         }
9773
9774         bits_nr = 1024;
9775         bits = malloc(bits_nr * sizeof(struct block_info));
9776         if (!bits) {
9777                 perror("malloc");
9778                 exit(1);
9779         }
9780
9781         if (ctx.progress_enabled) {
9782                 ctx.tp = TASK_EXTENTS;
9783                 task_start(ctx.info);
9784         }
9785
9786 again:
9787         root1 = root->fs_info->tree_root;
9788         level = btrfs_header_level(root1->node);
9789         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9790                                     root1->node->start, 0, level, 0,
9791                                     root1->nodesize, NULL);
9792         if (ret < 0)
9793                 goto out;
9794         root1 = root->fs_info->chunk_root;
9795         level = btrfs_header_level(root1->node);
9796         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9797                                     root1->node->start, 0, level, 0,
9798                                     root1->nodesize, NULL);
9799         if (ret < 0)
9800                 goto out;
9801         btrfs_init_path(&path);
9802         key.offset = 0;
9803         key.objectid = 0;
9804         key.type = BTRFS_ROOT_ITEM_KEY;
9805         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9806                                         &key, &path, 0, 0);
9807         if (ret < 0)
9808                 goto out;
9809         while(1) {
9810                 leaf = path.nodes[0];
9811                 slot = path.slots[0];
9812                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9813                         ret = btrfs_next_leaf(root, &path);
9814                         if (ret != 0)
9815                                 break;
9816                         leaf = path.nodes[0];
9817                         slot = path.slots[0];
9818                 }
9819                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9820                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9821                         unsigned long offset;
9822                         u64 last_snapshot;
9823
9824                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9825                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9826                         last_snapshot = btrfs_root_last_snapshot(&ri);
9827                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9828                                 level = btrfs_root_level(&ri);
9829                                 level_size = root->nodesize;
9830                                 ret = add_root_item_to_list(&normal_trees,
9831                                                 found_key.objectid,
9832                                                 btrfs_root_bytenr(&ri),
9833                                                 last_snapshot, level,
9834                                                 0, level_size, NULL);
9835                                 if (ret < 0)
9836                                         goto out;
9837                         } else {
9838                                 level = btrfs_root_level(&ri);
9839                                 level_size = root->nodesize;
9840                                 objectid = found_key.objectid;
9841                                 btrfs_disk_key_to_cpu(&found_key,
9842                                                       &ri.drop_progress);
9843                                 ret = add_root_item_to_list(&dropping_trees,
9844                                                 objectid,
9845                                                 btrfs_root_bytenr(&ri),
9846                                                 last_snapshot, level,
9847                                                 ri.drop_level,
9848                                                 level_size, &found_key);
9849                                 if (ret < 0)
9850                                         goto out;
9851                         }
9852                 }
9853                 path.slots[0]++;
9854         }
9855         btrfs_release_path(&path);
9856
9857         /*
9858          * check_block can return -EAGAIN if it fixes something, please keep
9859          * this in mind when dealing with return values from these functions, if
9860          * we get -EAGAIN we want to fall through and restart the loop.
9861          */
9862         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9863                                   &seen, &reada, &nodes, &extent_cache,
9864                                   &chunk_cache, &dev_cache, &block_group_cache,
9865                                   &dev_extent_cache);
9866         if (ret < 0) {
9867                 if (ret == -EAGAIN)
9868                         goto loop;
9869                 goto out;
9870         }
9871         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9872                                   &pending, &seen, &reada, &nodes,
9873                                   &extent_cache, &chunk_cache, &dev_cache,
9874                                   &block_group_cache, &dev_extent_cache);
9875         if (ret < 0) {
9876                 if (ret == -EAGAIN)
9877                         goto loop;
9878                 goto out;
9879         }
9880
9881         ret = check_chunks(&chunk_cache, &block_group_cache,
9882                            &dev_extent_cache, NULL, NULL, NULL, 0);
9883         if (ret) {
9884                 if (ret == -EAGAIN)
9885                         goto loop;
9886                 err = ret;
9887         }
9888
9889         ret = check_extent_refs(root, &extent_cache);
9890         if (ret < 0) {
9891                 if (ret == -EAGAIN)
9892                         goto loop;
9893                 goto out;
9894         }
9895
9896         ret = check_devices(&dev_cache, &dev_extent_cache);
9897         if (ret && err)
9898                 ret = err;
9899
9900 out:
9901         task_stop(ctx.info);
9902         if (repair) {
9903                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9904                 extent_io_tree_cleanup(&excluded_extents);
9905                 root->fs_info->fsck_extent_cache = NULL;
9906                 root->fs_info->free_extent_hook = NULL;
9907                 root->fs_info->corrupt_blocks = NULL;
9908                 root->fs_info->excluded_extents = NULL;
9909         }
9910         free(bits);
9911         free_chunk_cache_tree(&chunk_cache);
9912         free_device_cache_tree(&dev_cache);
9913         free_block_group_tree(&block_group_cache);
9914         free_device_extent_tree(&dev_extent_cache);
9915         free_extent_cache_tree(&seen);
9916         free_extent_cache_tree(&pending);
9917         free_extent_cache_tree(&reada);
9918         free_extent_cache_tree(&nodes);
9919         return ret;
9920 loop:
9921         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9922         free_extent_cache_tree(&seen);
9923         free_extent_cache_tree(&pending);
9924         free_extent_cache_tree(&reada);
9925         free_extent_cache_tree(&nodes);
9926         free_chunk_cache_tree(&chunk_cache);
9927         free_block_group_tree(&block_group_cache);
9928         free_device_cache_tree(&dev_cache);
9929         free_device_extent_tree(&dev_extent_cache);
9930         free_extent_record_cache(root->fs_info, &extent_cache);
9931         free_root_item_list(&normal_trees);
9932         free_root_item_list(&dropping_trees);
9933         extent_io_tree_cleanup(&excluded_extents);
9934         goto again;
9935 }
9936
9937 /*
9938  * Check backrefs of a tree block given by @bytenr or @eb.
9939  *
9940  * @root:       the root containing the @bytenr or @eb
9941  * @eb:         tree block extent buffer, can be NULL
9942  * @bytenr:     bytenr of the tree block to search
9943  * @level:      tree level of the tree block
9944  * @owner:      owner of the tree block
9945  *
9946  * Return >0 for any error found and output error message
9947  * Return 0 for no error found
9948  */
9949 static int check_tree_block_ref(struct btrfs_root *root,
9950                                 struct extent_buffer *eb, u64 bytenr,
9951                                 int level, u64 owner)
9952 {
9953         struct btrfs_key key;
9954         struct btrfs_root *extent_root = root->fs_info->extent_root;
9955         struct btrfs_path path;
9956         struct btrfs_extent_item *ei;
9957         struct btrfs_extent_inline_ref *iref;
9958         struct extent_buffer *leaf;
9959         unsigned long end;
9960         unsigned long ptr;
9961         int slot;
9962         int skinny_level;
9963         int type;
9964         u32 nodesize = root->nodesize;
9965         u32 item_size;
9966         u64 offset;
9967         int found_ref = 0;
9968         int err = 0;
9969         int ret;
9970
9971         btrfs_init_path(&path);
9972         key.objectid = bytenr;
9973         if (btrfs_fs_incompat(root->fs_info,
9974                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9975                 key.type = BTRFS_METADATA_ITEM_KEY;
9976         else
9977                 key.type = BTRFS_EXTENT_ITEM_KEY;
9978         key.offset = (u64)-1;
9979
9980         /* Search for the backref in extent tree */
9981         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9982         if (ret < 0) {
9983                 err |= BACKREF_MISSING;
9984                 goto out;
9985         }
9986         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9987         if (ret) {
9988                 err |= BACKREF_MISSING;
9989                 goto out;
9990         }
9991
9992         leaf = path.nodes[0];
9993         slot = path.slots[0];
9994         btrfs_item_key_to_cpu(leaf, &key, slot);
9995
9996         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9997
9998         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9999                 skinny_level = (int)key.offset;
10000                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10001         } else {
10002                 struct btrfs_tree_block_info *info;
10003
10004                 info = (struct btrfs_tree_block_info *)(ei + 1);
10005                 skinny_level = btrfs_tree_block_level(leaf, info);
10006                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10007         }
10008
10009         if (eb) {
10010                 u64 header_gen;
10011                 u64 extent_gen;
10012
10013                 if (!(btrfs_extent_flags(leaf, ei) &
10014                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10015                         error(
10016                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10017                                 key.objectid, nodesize,
10018                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10019                         err = BACKREF_MISMATCH;
10020                 }
10021                 header_gen = btrfs_header_generation(eb);
10022                 extent_gen = btrfs_extent_generation(leaf, ei);
10023                 if (header_gen != extent_gen) {
10024                         error(
10025         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10026                                 key.objectid, nodesize, header_gen,
10027                                 extent_gen);
10028                         err = BACKREF_MISMATCH;
10029                 }
10030                 if (level != skinny_level) {
10031                         error(
10032                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10033                                 key.objectid, nodesize, level, skinny_level);
10034                         err = BACKREF_MISMATCH;
10035                 }
10036                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10037                         error(
10038                         "extent[%llu %u] is referred by other roots than %llu",
10039                                 key.objectid, nodesize, root->objectid);
10040                         err = BACKREF_MISMATCH;
10041                 }
10042         }
10043
10044         /*
10045          * Iterate the extent/metadata item to find the exact backref
10046          */
10047         item_size = btrfs_item_size_nr(leaf, slot);
10048         ptr = (unsigned long)iref;
10049         end = (unsigned long)ei + item_size;
10050         while (ptr < end) {
10051                 iref = (struct btrfs_extent_inline_ref *)ptr;
10052                 type = btrfs_extent_inline_ref_type(leaf, iref);
10053                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10054
10055                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10056                         (offset == root->objectid || offset == owner)) {
10057                         found_ref = 1;
10058                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10059                         /* Check if the backref points to valid referencer */
10060                         found_ref = !check_tree_block_ref(root, NULL, offset,
10061                                                           level + 1, owner);
10062                 }
10063
10064                 if (found_ref)
10065                         break;
10066                 ptr += btrfs_extent_inline_ref_size(type);
10067         }
10068
10069         /*
10070          * Inlined extent item doesn't have what we need, check
10071          * TREE_BLOCK_REF_KEY
10072          */
10073         if (!found_ref) {
10074                 btrfs_release_path(&path);
10075                 key.objectid = bytenr;
10076                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10077                 key.offset = root->objectid;
10078
10079                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10080                 if (!ret)
10081                         found_ref = 1;
10082         }
10083         if (!found_ref)
10084                 err |= BACKREF_MISSING;
10085 out:
10086         btrfs_release_path(&path);
10087         if (eb && (err & BACKREF_MISSING))
10088                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10089                         bytenr, nodesize, owner, level);
10090         return err;
10091 }
10092
10093 /*
10094  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10095  *
10096  * Return >0 any error found and output error message
10097  * Return 0 for no error found
10098  */
10099 static int check_extent_data_item(struct btrfs_root *root,
10100                                   struct extent_buffer *eb, int slot)
10101 {
10102         struct btrfs_file_extent_item *fi;
10103         struct btrfs_path path;
10104         struct btrfs_root *extent_root = root->fs_info->extent_root;
10105         struct btrfs_key fi_key;
10106         struct btrfs_key dbref_key;
10107         struct extent_buffer *leaf;
10108         struct btrfs_extent_item *ei;
10109         struct btrfs_extent_inline_ref *iref;
10110         struct btrfs_extent_data_ref *dref;
10111         u64 owner;
10112         u64 file_extent_gen;
10113         u64 disk_bytenr;
10114         u64 disk_num_bytes;
10115         u64 extent_num_bytes;
10116         u64 extent_flags;
10117         u64 extent_gen;
10118         u32 item_size;
10119         unsigned long end;
10120         unsigned long ptr;
10121         int type;
10122         u64 ref_root;
10123         int found_dbackref = 0;
10124         int err = 0;
10125         int ret;
10126
10127         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10128         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10129         file_extent_gen = btrfs_file_extent_generation(eb, fi);
10130
10131         /* Nothing to check for hole and inline data extents */
10132         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10133             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10134                 return 0;
10135
10136         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10137         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10138         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10139
10140         /* Check unaligned disk_num_bytes and num_bytes */
10141         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10142                 error(
10143 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10144                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10145                         root->sectorsize);
10146                 err |= BYTES_UNALIGNED;
10147         } else {
10148                 data_bytes_allocated += disk_num_bytes;
10149         }
10150         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10151                 error(
10152 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10153                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10154                         root->sectorsize);
10155                 err |= BYTES_UNALIGNED;
10156         } else {
10157                 data_bytes_referenced += extent_num_bytes;
10158         }
10159         owner = btrfs_header_owner(eb);
10160
10161         /* Check the extent item of the file extent in extent tree */
10162         btrfs_init_path(&path);
10163         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10164         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10165         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10166
10167         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10168         if (ret) {
10169                 err |= BACKREF_MISSING;
10170                 goto error;
10171         }
10172
10173         leaf = path.nodes[0];
10174         slot = path.slots[0];
10175         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10176
10177         extent_flags = btrfs_extent_flags(leaf, ei);
10178         extent_gen = btrfs_extent_generation(leaf, ei);
10179
10180         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10181                 error(
10182                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10183                     disk_bytenr, disk_num_bytes,
10184                     BTRFS_EXTENT_FLAG_DATA);
10185                 err |= BACKREF_MISMATCH;
10186         }
10187
10188         if (file_extent_gen < extent_gen) {
10189                 error(
10190 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
10191                         disk_bytenr, disk_num_bytes, file_extent_gen,
10192                         extent_gen);
10193                 err |= BACKREF_MISMATCH;
10194         }
10195
10196         /* Check data backref inside that extent item */
10197         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10198         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10199         ptr = (unsigned long)iref;
10200         end = (unsigned long)ei + item_size;
10201         while (ptr < end) {
10202                 iref = (struct btrfs_extent_inline_ref *)ptr;
10203                 type = btrfs_extent_inline_ref_type(leaf, iref);
10204                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10205
10206                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10207                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10208                         if (ref_root == owner || ref_root == root->objectid)
10209                                 found_dbackref = 1;
10210                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10211                         found_dbackref = !check_tree_block_ref(root, NULL,
10212                                 btrfs_extent_inline_ref_offset(leaf, iref),
10213                                 0, owner);
10214                 }
10215
10216                 if (found_dbackref)
10217                         break;
10218                 ptr += btrfs_extent_inline_ref_size(type);
10219         }
10220
10221         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10222         if (!found_dbackref) {
10223                 btrfs_release_path(&path);
10224
10225                 btrfs_init_path(&path);
10226                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10227                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10228                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10229                                 fi_key.objectid, fi_key.offset);
10230
10231                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10232                                         &dbref_key, &path, 0, 0);
10233                 if (!ret)
10234                         found_dbackref = 1;
10235         }
10236
10237         if (!found_dbackref)
10238                 err |= BACKREF_MISSING;
10239 error:
10240         btrfs_release_path(&path);
10241         if (err & BACKREF_MISSING) {
10242                 error("data extent[%llu %llu] backref lost",
10243                       disk_bytenr, disk_num_bytes);
10244         }
10245         return err;
10246 }
10247
10248 /*
10249  * Get real tree block level for the case like shared block
10250  * Return >= 0 as tree level
10251  * Return <0 for error
10252  */
10253 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10254 {
10255         struct extent_buffer *eb;
10256         struct btrfs_path path;
10257         struct btrfs_key key;
10258         struct btrfs_extent_item *ei;
10259         u64 flags;
10260         u64 transid;
10261         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10262         u8 backref_level;
10263         u8 header_level;
10264         int ret;
10265
10266         /* Search extent tree for extent generation and level */
10267         key.objectid = bytenr;
10268         key.type = BTRFS_METADATA_ITEM_KEY;
10269         key.offset = (u64)-1;
10270
10271         btrfs_init_path(&path);
10272         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10273         if (ret < 0)
10274                 goto release_out;
10275         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10276         if (ret < 0)
10277                 goto release_out;
10278         if (ret > 0) {
10279                 ret = -ENOENT;
10280                 goto release_out;
10281         }
10282
10283         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10284         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10285                             struct btrfs_extent_item);
10286         flags = btrfs_extent_flags(path.nodes[0], ei);
10287         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10288                 ret = -ENOENT;
10289                 goto release_out;
10290         }
10291
10292         /* Get transid for later read_tree_block() check */
10293         transid = btrfs_extent_generation(path.nodes[0], ei);
10294
10295         /* Get backref level as one source */
10296         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10297                 backref_level = key.offset;
10298         } else {
10299                 struct btrfs_tree_block_info *info;
10300
10301                 info = (struct btrfs_tree_block_info *)(ei + 1);
10302                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10303         }
10304         btrfs_release_path(&path);
10305
10306         /* Get level from tree block as an alternative source */
10307         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10308         if (!extent_buffer_uptodate(eb)) {
10309                 free_extent_buffer(eb);
10310                 return -EIO;
10311         }
10312         header_level = btrfs_header_level(eb);
10313         free_extent_buffer(eb);
10314
10315         if (header_level != backref_level)
10316                 return -EIO;
10317         return header_level;
10318
10319 release_out:
10320         btrfs_release_path(&path);
10321         return ret;
10322 }
10323
10324 /*
10325  * Check if a tree block backref is valid (points to a valid tree block)
10326  * if level == -1, level will be resolved
10327  * Return >0 for any error found and print error message
10328  */
10329 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10330                                     u64 bytenr, int level)
10331 {
10332         struct btrfs_root *root;
10333         struct btrfs_key key;
10334         struct btrfs_path path;
10335         struct extent_buffer *eb;
10336         struct extent_buffer *node;
10337         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10338         int err = 0;
10339         int ret;
10340
10341         /* Query level for level == -1 special case */
10342         if (level == -1)
10343                 level = query_tree_block_level(fs_info, bytenr);
10344         if (level < 0) {
10345                 err |= REFERENCER_MISSING;
10346                 goto out;
10347         }
10348
10349         key.objectid = root_id;
10350         key.type = BTRFS_ROOT_ITEM_KEY;
10351         key.offset = (u64)-1;
10352
10353         root = btrfs_read_fs_root(fs_info, &key);
10354         if (IS_ERR(root)) {
10355                 err |= REFERENCER_MISSING;
10356                 goto out;
10357         }
10358
10359         /* Read out the tree block to get item/node key */
10360         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10361         if (!extent_buffer_uptodate(eb)) {
10362                 err |= REFERENCER_MISSING;
10363                 free_extent_buffer(eb);
10364                 goto out;
10365         }
10366
10367         /* Empty tree, no need to check key */
10368         if (!btrfs_header_nritems(eb) && !level) {
10369                 free_extent_buffer(eb);
10370                 goto out;
10371         }
10372
10373         if (level)
10374                 btrfs_node_key_to_cpu(eb, &key, 0);
10375         else
10376                 btrfs_item_key_to_cpu(eb, &key, 0);
10377
10378         free_extent_buffer(eb);
10379
10380         btrfs_init_path(&path);
10381         path.lowest_level = level;
10382         /* Search with the first key, to ensure we can reach it */
10383         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10384         if (ret < 0) {
10385                 err |= REFERENCER_MISSING;
10386                 goto release_out;
10387         }
10388
10389         node = path.nodes[level];
10390         if (btrfs_header_bytenr(node) != bytenr) {
10391                 error(
10392         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10393                         bytenr, nodesize, bytenr,
10394                         btrfs_header_bytenr(node));
10395                 err |= REFERENCER_MISMATCH;
10396         }
10397         if (btrfs_header_level(node) != level) {
10398                 error(
10399         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10400                         bytenr, nodesize, level,
10401                         btrfs_header_level(node));
10402                 err |= REFERENCER_MISMATCH;
10403         }
10404
10405 release_out:
10406         btrfs_release_path(&path);
10407 out:
10408         if (err & REFERENCER_MISSING) {
10409                 if (level < 0)
10410                         error("extent [%llu %d] lost referencer (owner: %llu)",
10411                                 bytenr, nodesize, root_id);
10412                 else
10413                         error(
10414                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10415                                 bytenr, nodesize, root_id, level);
10416         }
10417
10418         return err;
10419 }
10420
10421 /*
10422  * Check referencer for shared block backref
10423  * If level == -1, this function will resolve the level.
10424  */
10425 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10426                                      u64 parent, u64 bytenr, int level)
10427 {
10428         struct extent_buffer *eb;
10429         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10430         u32 nr;
10431         int found_parent = 0;
10432         int i;
10433
10434         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10435         if (!extent_buffer_uptodate(eb))
10436                 goto out;
10437
10438         if (level == -1)
10439                 level = query_tree_block_level(fs_info, bytenr);
10440         if (level < 0)
10441                 goto out;
10442
10443         if (level + 1 != btrfs_header_level(eb))
10444                 goto out;
10445
10446         nr = btrfs_header_nritems(eb);
10447         for (i = 0; i < nr; i++) {
10448                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10449                         found_parent = 1;
10450                         break;
10451                 }
10452         }
10453 out:
10454         free_extent_buffer(eb);
10455         if (!found_parent) {
10456                 error(
10457         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10458                         bytenr, nodesize, parent, level);
10459                 return REFERENCER_MISSING;
10460         }
10461         return 0;
10462 }
10463
10464 /*
10465  * Check referencer for normal (inlined) data ref
10466  * If len == 0, it will be resolved by searching in extent tree
10467  */
10468 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10469                                      u64 root_id, u64 objectid, u64 offset,
10470                                      u64 bytenr, u64 len, u32 count)
10471 {
10472         struct btrfs_root *root;
10473         struct btrfs_root *extent_root = fs_info->extent_root;
10474         struct btrfs_key key;
10475         struct btrfs_path path;
10476         struct extent_buffer *leaf;
10477         struct btrfs_file_extent_item *fi;
10478         u32 found_count = 0;
10479         int slot;
10480         int ret = 0;
10481
10482         if (!len) {
10483                 key.objectid = bytenr;
10484                 key.type = BTRFS_EXTENT_ITEM_KEY;
10485                 key.offset = (u64)-1;
10486
10487                 btrfs_init_path(&path);
10488                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10489                 if (ret < 0)
10490                         goto out;
10491                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10492                 if (ret)
10493                         goto out;
10494                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10495                 if (key.objectid != bytenr ||
10496                     key.type != BTRFS_EXTENT_ITEM_KEY)
10497                         goto out;
10498                 len = key.offset;
10499                 btrfs_release_path(&path);
10500         }
10501         key.objectid = root_id;
10502         key.type = BTRFS_ROOT_ITEM_KEY;
10503         key.offset = (u64)-1;
10504         btrfs_init_path(&path);
10505
10506         root = btrfs_read_fs_root(fs_info, &key);
10507         if (IS_ERR(root))
10508                 goto out;
10509
10510         key.objectid = objectid;
10511         key.type = BTRFS_EXTENT_DATA_KEY;
10512         /*
10513          * It can be nasty as data backref offset is
10514          * file offset - file extent offset, which is smaller or
10515          * equal to original backref offset.  The only special case is
10516          * overflow.  So we need to special check and do further search.
10517          */
10518         key.offset = offset & (1ULL << 63) ? 0 : offset;
10519
10520         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10521         if (ret < 0)
10522                 goto out;
10523
10524         /*
10525          * Search afterwards to get correct one
10526          * NOTE: As we must do a comprehensive check on the data backref to
10527          * make sure the dref count also matches, we must iterate all file
10528          * extents for that inode.
10529          */
10530         while (1) {
10531                 leaf = path.nodes[0];
10532                 slot = path.slots[0];
10533
10534                 btrfs_item_key_to_cpu(leaf, &key, slot);
10535                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10536                         break;
10537                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10538                 /*
10539                  * Except normal disk bytenr and disk num bytes, we still
10540                  * need to do extra check on dbackref offset as
10541                  * dbackref offset = file_offset - file_extent_offset
10542                  */
10543                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10544                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10545                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10546                     offset)
10547                         found_count++;
10548
10549                 ret = btrfs_next_item(root, &path);
10550                 if (ret)
10551                         break;
10552         }
10553 out:
10554         btrfs_release_path(&path);
10555         if (found_count != count) {
10556                 error(
10557 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10558                         bytenr, len, root_id, objectid, offset, count, found_count);
10559                 return REFERENCER_MISSING;
10560         }
10561         return 0;
10562 }
10563
10564 /*
10565  * Check if the referencer of a shared data backref exists
10566  */
10567 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10568                                      u64 parent, u64 bytenr)
10569 {
10570         struct extent_buffer *eb;
10571         struct btrfs_key key;
10572         struct btrfs_file_extent_item *fi;
10573         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10574         u32 nr;
10575         int found_parent = 0;
10576         int i;
10577
10578         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10579         if (!extent_buffer_uptodate(eb))
10580                 goto out;
10581
10582         nr = btrfs_header_nritems(eb);
10583         for (i = 0; i < nr; i++) {
10584                 btrfs_item_key_to_cpu(eb, &key, i);
10585                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10586                         continue;
10587
10588                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10589                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10590                         continue;
10591
10592                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10593                         found_parent = 1;
10594                         break;
10595                 }
10596         }
10597
10598 out:
10599         free_extent_buffer(eb);
10600         if (!found_parent) {
10601                 error("shared extent %llu referencer lost (parent: %llu)",
10602                         bytenr, parent);
10603                 return REFERENCER_MISSING;
10604         }
10605         return 0;
10606 }
10607
10608 /*
10609  * This function will check a given extent item, including its backref and
10610  * itself (like crossing stripe boundary and type)
10611  *
10612  * Since we don't use extent_record anymore, introduce new error bit
10613  */
10614 static int check_extent_item(struct btrfs_fs_info *fs_info,
10615                              struct extent_buffer *eb, int slot)
10616 {
10617         struct btrfs_extent_item *ei;
10618         struct btrfs_extent_inline_ref *iref;
10619         struct btrfs_extent_data_ref *dref;
10620         unsigned long end;
10621         unsigned long ptr;
10622         int type;
10623         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10624         u32 item_size = btrfs_item_size_nr(eb, slot);
10625         u64 flags;
10626         u64 offset;
10627         int metadata = 0;
10628         int level;
10629         struct btrfs_key key;
10630         int ret;
10631         int err = 0;
10632
10633         btrfs_item_key_to_cpu(eb, &key, slot);
10634         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10635                 bytes_used += key.offset;
10636         else
10637                 bytes_used += nodesize;
10638
10639         if (item_size < sizeof(*ei)) {
10640                 /*
10641                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10642                  * old thing when on disk format is still un-determined.
10643                  * No need to care about it anymore
10644                  */
10645                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10646                 return -ENOTTY;
10647         }
10648
10649         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10650         flags = btrfs_extent_flags(eb, ei);
10651
10652         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10653                 metadata = 1;
10654         if (metadata && check_crossing_stripes(global_info, key.objectid,
10655                                                eb->len)) {
10656                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10657                       key.objectid, key.objectid + nodesize);
10658                 err |= CROSSING_STRIPE_BOUNDARY;
10659         }
10660
10661         ptr = (unsigned long)(ei + 1);
10662
10663         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10664                 /* Old EXTENT_ITEM metadata */
10665                 struct btrfs_tree_block_info *info;
10666
10667                 info = (struct btrfs_tree_block_info *)ptr;
10668                 level = btrfs_tree_block_level(eb, info);
10669                 ptr += sizeof(struct btrfs_tree_block_info);
10670         } else {
10671                 /* New METADATA_ITEM */
10672                 level = key.offset;
10673         }
10674         end = (unsigned long)ei + item_size;
10675
10676         if (ptr >= end) {
10677                 err |= ITEM_SIZE_MISMATCH;
10678                 goto out;
10679         }
10680
10681         /* Now check every backref in this extent item */
10682 next:
10683         iref = (struct btrfs_extent_inline_ref *)ptr;
10684         type = btrfs_extent_inline_ref_type(eb, iref);
10685         offset = btrfs_extent_inline_ref_offset(eb, iref);
10686         switch (type) {
10687         case BTRFS_TREE_BLOCK_REF_KEY:
10688                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10689                                                level);
10690                 err |= ret;
10691                 break;
10692         case BTRFS_SHARED_BLOCK_REF_KEY:
10693                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10694                                                  level);
10695                 err |= ret;
10696                 break;
10697         case BTRFS_EXTENT_DATA_REF_KEY:
10698                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10699                 ret = check_extent_data_backref(fs_info,
10700                                 btrfs_extent_data_ref_root(eb, dref),
10701                                 btrfs_extent_data_ref_objectid(eb, dref),
10702                                 btrfs_extent_data_ref_offset(eb, dref),
10703                                 key.objectid, key.offset,
10704                                 btrfs_extent_data_ref_count(eb, dref));
10705                 err |= ret;
10706                 break;
10707         case BTRFS_SHARED_DATA_REF_KEY:
10708                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10709                 err |= ret;
10710                 break;
10711         default:
10712                 error("extent[%llu %d %llu] has unknown ref type: %d",
10713                         key.objectid, key.type, key.offset, type);
10714                 err |= UNKNOWN_TYPE;
10715                 goto out;
10716         }
10717
10718         ptr += btrfs_extent_inline_ref_size(type);
10719         if (ptr < end)
10720                 goto next;
10721
10722 out:
10723         return err;
10724 }
10725
10726 /*
10727  * Check if a dev extent item is referred correctly by its chunk
10728  */
10729 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10730                                  struct extent_buffer *eb, int slot)
10731 {
10732         struct btrfs_root *chunk_root = fs_info->chunk_root;
10733         struct btrfs_dev_extent *ptr;
10734         struct btrfs_path path;
10735         struct btrfs_key chunk_key;
10736         struct btrfs_key devext_key;
10737         struct btrfs_chunk *chunk;
10738         struct extent_buffer *l;
10739         int num_stripes;
10740         u64 length;
10741         int i;
10742         int found_chunk = 0;
10743         int ret;
10744
10745         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10746         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10747         length = btrfs_dev_extent_length(eb, ptr);
10748
10749         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10750         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10751         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10752
10753         btrfs_init_path(&path);
10754         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10755         if (ret)
10756                 goto out;
10757
10758         l = path.nodes[0];
10759         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10760         if (btrfs_chunk_length(l, chunk) != length)
10761                 goto out;
10762
10763         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10764         for (i = 0; i < num_stripes; i++) {
10765                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10766                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10767
10768                 if (devid == devext_key.objectid &&
10769                     offset == devext_key.offset) {
10770                         found_chunk = 1;
10771                         break;
10772                 }
10773         }
10774 out:
10775         btrfs_release_path(&path);
10776         if (!found_chunk) {
10777                 error(
10778                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10779                         devext_key.objectid, devext_key.offset, length);
10780                 return REFERENCER_MISSING;
10781         }
10782         return 0;
10783 }
10784
10785 /*
10786  * Check if the used space is correct with the dev item
10787  */
10788 static int check_dev_item(struct btrfs_fs_info *fs_info,
10789                           struct extent_buffer *eb, int slot)
10790 {
10791         struct btrfs_root *dev_root = fs_info->dev_root;
10792         struct btrfs_dev_item *dev_item;
10793         struct btrfs_path path;
10794         struct btrfs_key key;
10795         struct btrfs_dev_extent *ptr;
10796         u64 dev_id;
10797         u64 used;
10798         u64 total = 0;
10799         int ret;
10800
10801         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10802         dev_id = btrfs_device_id(eb, dev_item);
10803         used = btrfs_device_bytes_used(eb, dev_item);
10804
10805         key.objectid = dev_id;
10806         key.type = BTRFS_DEV_EXTENT_KEY;
10807         key.offset = 0;
10808
10809         btrfs_init_path(&path);
10810         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10811         if (ret < 0) {
10812                 btrfs_item_key_to_cpu(eb, &key, slot);
10813                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10814                         key.objectid, key.type, key.offset);
10815                 btrfs_release_path(&path);
10816                 return REFERENCER_MISSING;
10817         }
10818
10819         /* Iterate dev_extents to calculate the used space of a device */
10820         while (1) {
10821                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10822
10823                 if (key.objectid > dev_id)
10824                         break;
10825                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10826                         goto next;
10827
10828                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10829                                      struct btrfs_dev_extent);
10830                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10831 next:
10832                 ret = btrfs_next_item(dev_root, &path);
10833                 if (ret)
10834                         break;
10835         }
10836         btrfs_release_path(&path);
10837
10838         if (used != total) {
10839                 btrfs_item_key_to_cpu(eb, &key, slot);
10840                 error(
10841 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10842                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10843                         BTRFS_DEV_EXTENT_KEY, dev_id);
10844                 return ACCOUNTING_MISMATCH;
10845         }
10846         return 0;
10847 }
10848
10849 /*
10850  * Check a block group item with its referener (chunk) and its used space
10851  * with extent/metadata item
10852  */
10853 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10854                                   struct extent_buffer *eb, int slot)
10855 {
10856         struct btrfs_root *extent_root = fs_info->extent_root;
10857         struct btrfs_root *chunk_root = fs_info->chunk_root;
10858         struct btrfs_block_group_item *bi;
10859         struct btrfs_block_group_item bg_item;
10860         struct btrfs_path path;
10861         struct btrfs_key bg_key;
10862         struct btrfs_key chunk_key;
10863         struct btrfs_key extent_key;
10864         struct btrfs_chunk *chunk;
10865         struct extent_buffer *leaf;
10866         struct btrfs_extent_item *ei;
10867         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10868         u64 flags;
10869         u64 bg_flags;
10870         u64 used;
10871         u64 total = 0;
10872         int ret;
10873         int err = 0;
10874
10875         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10876         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10877         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10878         used = btrfs_block_group_used(&bg_item);
10879         bg_flags = btrfs_block_group_flags(&bg_item);
10880
10881         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10882         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10883         chunk_key.offset = bg_key.objectid;
10884
10885         btrfs_init_path(&path);
10886         /* Search for the referencer chunk */
10887         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10888         if (ret) {
10889                 error(
10890                 "block group[%llu %llu] did not find the related chunk item",
10891                         bg_key.objectid, bg_key.offset);
10892                 err |= REFERENCER_MISSING;
10893         } else {
10894                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10895                                         struct btrfs_chunk);
10896                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10897                                                 bg_key.offset) {
10898                         error(
10899         "block group[%llu %llu] related chunk item length does not match",
10900                                 bg_key.objectid, bg_key.offset);
10901                         err |= REFERENCER_MISMATCH;
10902                 }
10903         }
10904         btrfs_release_path(&path);
10905
10906         /* Search from the block group bytenr */
10907         extent_key.objectid = bg_key.objectid;
10908         extent_key.type = 0;
10909         extent_key.offset = 0;
10910
10911         btrfs_init_path(&path);
10912         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10913         if (ret < 0)
10914                 goto out;
10915
10916         /* Iterate extent tree to account used space */
10917         while (1) {
10918                 leaf = path.nodes[0];
10919                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10920                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10921                         break;
10922
10923                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10924                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10925                         goto next;
10926                 if (extent_key.objectid < bg_key.objectid)
10927                         goto next;
10928
10929                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10930                         total += nodesize;
10931                 else
10932                         total += extent_key.offset;
10933
10934                 ei = btrfs_item_ptr(leaf, path.slots[0],
10935                                     struct btrfs_extent_item);
10936                 flags = btrfs_extent_flags(leaf, ei);
10937                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10938                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10939                                 error(
10940                         "bad extent[%llu, %llu) type mismatch with chunk",
10941                                         extent_key.objectid,
10942                                         extent_key.objectid + extent_key.offset);
10943                                 err |= CHUNK_TYPE_MISMATCH;
10944                         }
10945                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10946                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10947                                     BTRFS_BLOCK_GROUP_METADATA))) {
10948                                 error(
10949                         "bad extent[%llu, %llu) type mismatch with chunk",
10950                                         extent_key.objectid,
10951                                         extent_key.objectid + nodesize);
10952                                 err |= CHUNK_TYPE_MISMATCH;
10953                         }
10954                 }
10955 next:
10956                 ret = btrfs_next_item(extent_root, &path);
10957                 if (ret)
10958                         break;
10959         }
10960
10961 out:
10962         btrfs_release_path(&path);
10963
10964         if (total != used) {
10965                 error(
10966                 "block group[%llu %llu] used %llu but extent items used %llu",
10967                         bg_key.objectid, bg_key.offset, used, total);
10968                 err |= ACCOUNTING_MISMATCH;
10969         }
10970         return err;
10971 }
10972
10973 /*
10974  * Check a chunk item.
10975  * Including checking all referred dev_extents and block group
10976  */
10977 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10978                             struct extent_buffer *eb, int slot)
10979 {
10980         struct btrfs_root *extent_root = fs_info->extent_root;
10981         struct btrfs_root *dev_root = fs_info->dev_root;
10982         struct btrfs_path path;
10983         struct btrfs_key chunk_key;
10984         struct btrfs_key bg_key;
10985         struct btrfs_key devext_key;
10986         struct btrfs_chunk *chunk;
10987         struct extent_buffer *leaf;
10988         struct btrfs_block_group_item *bi;
10989         struct btrfs_block_group_item bg_item;
10990         struct btrfs_dev_extent *ptr;
10991         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10992         u64 length;
10993         u64 chunk_end;
10994         u64 type;
10995         u64 profile;
10996         int num_stripes;
10997         u64 offset;
10998         u64 objectid;
10999         int i;
11000         int ret;
11001         int err = 0;
11002
11003         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11004         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11005         length = btrfs_chunk_length(eb, chunk);
11006         chunk_end = chunk_key.offset + length;
11007         if (!IS_ALIGNED(length, sectorsize)) {
11008                 error("chunk[%llu %llu) not aligned to %u",
11009                         chunk_key.offset, chunk_end, sectorsize);
11010                 err |= BYTES_UNALIGNED;
11011                 goto out;
11012         }
11013
11014         type = btrfs_chunk_type(eb, chunk);
11015         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11016         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11017                 error("chunk[%llu %llu) has no chunk type",
11018                         chunk_key.offset, chunk_end);
11019                 err |= UNKNOWN_TYPE;
11020         }
11021         if (profile && (profile & (profile - 1))) {
11022                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11023                         chunk_key.offset, chunk_end, profile);
11024                 err |= UNKNOWN_TYPE;
11025         }
11026
11027         bg_key.objectid = chunk_key.offset;
11028         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11029         bg_key.offset = length;
11030
11031         btrfs_init_path(&path);
11032         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11033         if (ret) {
11034                 error(
11035                 "chunk[%llu %llu) did not find the related block group item",
11036                         chunk_key.offset, chunk_end);
11037                 err |= REFERENCER_MISSING;
11038         } else{
11039                 leaf = path.nodes[0];
11040                 bi = btrfs_item_ptr(leaf, path.slots[0],
11041                                     struct btrfs_block_group_item);
11042                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11043                                    sizeof(bg_item));
11044                 if (btrfs_block_group_flags(&bg_item) != type) {
11045                         error(
11046 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11047                                 chunk_key.offset, chunk_end, type,
11048                                 btrfs_block_group_flags(&bg_item));
11049                         err |= REFERENCER_MISSING;
11050                 }
11051         }
11052
11053         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11054         for (i = 0; i < num_stripes; i++) {
11055                 btrfs_release_path(&path);
11056                 btrfs_init_path(&path);
11057                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11058                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11059                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11060
11061                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11062                                         0, 0);
11063                 if (ret)
11064                         goto not_match_dev;
11065
11066                 leaf = path.nodes[0];
11067                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11068                                      struct btrfs_dev_extent);
11069                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11070                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11071                 if (objectid != chunk_key.objectid ||
11072                     offset != chunk_key.offset ||
11073                     btrfs_dev_extent_length(leaf, ptr) != length)
11074                         goto not_match_dev;
11075                 continue;
11076 not_match_dev:
11077                 err |= BACKREF_MISSING;
11078                 error(
11079                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11080                         chunk_key.objectid, chunk_end, i);
11081                 continue;
11082         }
11083         btrfs_release_path(&path);
11084 out:
11085         return err;
11086 }
11087
11088 /*
11089  * Main entry function to check known items and update related accounting info
11090  */
11091 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11092 {
11093         struct btrfs_fs_info *fs_info = root->fs_info;
11094         struct btrfs_key key;
11095         int slot = 0;
11096         int type;
11097         struct btrfs_extent_data_ref *dref;
11098         int ret;
11099         int err = 0;
11100
11101 next:
11102         btrfs_item_key_to_cpu(eb, &key, slot);
11103         type = key.type;
11104
11105         switch (type) {
11106         case BTRFS_EXTENT_DATA_KEY:
11107                 ret = check_extent_data_item(root, eb, slot);
11108                 err |= ret;
11109                 break;
11110         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11111                 ret = check_block_group_item(fs_info, eb, slot);
11112                 err |= ret;
11113                 break;
11114         case BTRFS_DEV_ITEM_KEY:
11115                 ret = check_dev_item(fs_info, eb, slot);
11116                 err |= ret;
11117                 break;
11118         case BTRFS_CHUNK_ITEM_KEY:
11119                 ret = check_chunk_item(fs_info, eb, slot);
11120                 err |= ret;
11121                 break;
11122         case BTRFS_DEV_EXTENT_KEY:
11123                 ret = check_dev_extent_item(fs_info, eb, slot);
11124                 err |= ret;
11125                 break;
11126         case BTRFS_EXTENT_ITEM_KEY:
11127         case BTRFS_METADATA_ITEM_KEY:
11128                 ret = check_extent_item(fs_info, eb, slot);
11129                 err |= ret;
11130                 break;
11131         case BTRFS_EXTENT_CSUM_KEY:
11132                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11133                 break;
11134         case BTRFS_TREE_BLOCK_REF_KEY:
11135                 ret = check_tree_block_backref(fs_info, key.offset,
11136                                                key.objectid, -1);
11137                 err |= ret;
11138                 break;
11139         case BTRFS_EXTENT_DATA_REF_KEY:
11140                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11141                 ret = check_extent_data_backref(fs_info,
11142                                 btrfs_extent_data_ref_root(eb, dref),
11143                                 btrfs_extent_data_ref_objectid(eb, dref),
11144                                 btrfs_extent_data_ref_offset(eb, dref),
11145                                 key.objectid, 0,
11146                                 btrfs_extent_data_ref_count(eb, dref));
11147                 err |= ret;
11148                 break;
11149         case BTRFS_SHARED_BLOCK_REF_KEY:
11150                 ret = check_shared_block_backref(fs_info, key.offset,
11151                                                  key.objectid, -1);
11152                 err |= ret;
11153                 break;
11154         case BTRFS_SHARED_DATA_REF_KEY:
11155                 ret = check_shared_data_backref(fs_info, key.offset,
11156                                                 key.objectid);
11157                 err |= ret;
11158                 break;
11159         default:
11160                 break;
11161         }
11162
11163         if (++slot < btrfs_header_nritems(eb))
11164                 goto next;
11165
11166         return err;
11167 }
11168
11169 /*
11170  * Helper function for later fs/subvol tree check.  To determine if a tree
11171  * block should be checked.
11172  * This function will ensure only the direct referencer with lowest rootid to
11173  * check a fs/subvolume tree block.
11174  *
11175  * Backref check at extent tree would detect errors like missing subvolume
11176  * tree, so we can do aggressive check to reduce duplicated checks.
11177  */
11178 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11179 {
11180         struct btrfs_root *extent_root = root->fs_info->extent_root;
11181         struct btrfs_key key;
11182         struct btrfs_path path;
11183         struct extent_buffer *leaf;
11184         int slot;
11185         struct btrfs_extent_item *ei;
11186         unsigned long ptr;
11187         unsigned long end;
11188         int type;
11189         u32 item_size;
11190         u64 offset;
11191         struct btrfs_extent_inline_ref *iref;
11192         int ret;
11193
11194         btrfs_init_path(&path);
11195         key.objectid = btrfs_header_bytenr(eb);
11196         key.type = BTRFS_METADATA_ITEM_KEY;
11197         key.offset = (u64)-1;
11198
11199         /*
11200          * Any failure in backref resolving means we can't determine
11201          * whom the tree block belongs to.
11202          * So in that case, we need to check that tree block
11203          */
11204         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11205         if (ret < 0)
11206                 goto need_check;
11207
11208         ret = btrfs_previous_extent_item(extent_root, &path,
11209                                          btrfs_header_bytenr(eb));
11210         if (ret)
11211                 goto need_check;
11212
11213         leaf = path.nodes[0];
11214         slot = path.slots[0];
11215         btrfs_item_key_to_cpu(leaf, &key, slot);
11216         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11217
11218         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11219                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11220         } else {
11221                 struct btrfs_tree_block_info *info;
11222
11223                 info = (struct btrfs_tree_block_info *)(ei + 1);
11224                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11225         }
11226
11227         item_size = btrfs_item_size_nr(leaf, slot);
11228         ptr = (unsigned long)iref;
11229         end = (unsigned long)ei + item_size;
11230         while (ptr < end) {
11231                 iref = (struct btrfs_extent_inline_ref *)ptr;
11232                 type = btrfs_extent_inline_ref_type(leaf, iref);
11233                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11234
11235                 /*
11236                  * We only check the tree block if current root is
11237                  * the lowest referencer of it.
11238                  */
11239                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11240                     offset < root->objectid) {
11241                         btrfs_release_path(&path);
11242                         return 0;
11243                 }
11244
11245                 ptr += btrfs_extent_inline_ref_size(type);
11246         }
11247         /*
11248          * Normally we should also check keyed tree block ref, but that may be
11249          * very time consuming.  Inlined ref should already make us skip a lot
11250          * of refs now.  So skip search keyed tree block ref.
11251          */
11252
11253 need_check:
11254         btrfs_release_path(&path);
11255         return 1;
11256 }
11257
11258 /*
11259  * Traversal function for tree block. We will do:
11260  * 1) Skip shared fs/subvolume tree blocks
11261  * 2) Update related bytes accounting
11262  * 3) Pre-order traversal
11263  */
11264 static int traverse_tree_block(struct btrfs_root *root,
11265                                 struct extent_buffer *node)
11266 {
11267         struct extent_buffer *eb;
11268         struct btrfs_key key;
11269         struct btrfs_key drop_key;
11270         int level;
11271         u64 nr;
11272         int i;
11273         int err = 0;
11274         int ret;
11275
11276         /*
11277          * Skip shared fs/subvolume tree block, in that case they will
11278          * be checked by referencer with lowest rootid
11279          */
11280         if (is_fstree(root->objectid) && !should_check(root, node))
11281                 return 0;
11282
11283         /* Update bytes accounting */
11284         total_btree_bytes += node->len;
11285         if (fs_root_objectid(btrfs_header_owner(node)))
11286                 total_fs_tree_bytes += node->len;
11287         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11288                 total_extent_tree_bytes += node->len;
11289         if (!found_old_backref &&
11290             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11291             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11292             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11293                 found_old_backref = 1;
11294
11295         /* pre-order tranversal, check itself first */
11296         level = btrfs_header_level(node);
11297         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11298                                    btrfs_header_level(node),
11299                                    btrfs_header_owner(node));
11300         err |= ret;
11301         if (err)
11302                 error(
11303         "check %s failed root %llu bytenr %llu level %d, force continue check",
11304                         level ? "node":"leaf", root->objectid,
11305                         btrfs_header_bytenr(node), btrfs_header_level(node));
11306
11307         if (!level) {
11308                 btree_space_waste += btrfs_leaf_free_space(root, node);
11309                 ret = check_leaf_items(root, node);
11310                 err |= ret;
11311                 return err;
11312         }
11313
11314         nr = btrfs_header_nritems(node);
11315         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11316         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11317                 sizeof(struct btrfs_key_ptr);
11318
11319         /* Then check all its children */
11320         for (i = 0; i < nr; i++) {
11321                 u64 blocknr = btrfs_node_blockptr(node, i);
11322
11323                 btrfs_node_key_to_cpu(node, &key, i);
11324                 if (level == root->root_item.drop_level &&
11325                     is_dropped_key(&key, &drop_key))
11326                         continue;
11327
11328                 /*
11329                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11330                  * to call the function itself.
11331                  */
11332                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11333                 if (extent_buffer_uptodate(eb)) {
11334                         ret = traverse_tree_block(root, eb);
11335                         err |= ret;
11336                 }
11337                 free_extent_buffer(eb);
11338         }
11339
11340         return err;
11341 }
11342
11343 /*
11344  * Low memory usage version check_chunks_and_extents.
11345  */
11346 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11347 {
11348         struct btrfs_path path;
11349         struct btrfs_key key;
11350         struct btrfs_root *root1;
11351         struct btrfs_root *cur_root;
11352         int err = 0;
11353         int ret;
11354
11355         root1 = root->fs_info->chunk_root;
11356         ret = traverse_tree_block(root1, root1->node);
11357         err |= ret;
11358
11359         root1 = root->fs_info->tree_root;
11360         ret = traverse_tree_block(root1, root1->node);
11361         err |= ret;
11362
11363         btrfs_init_path(&path);
11364         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11365         key.offset = 0;
11366         key.type = BTRFS_ROOT_ITEM_KEY;
11367
11368         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11369         if (ret) {
11370                 error("cannot find extent treet in tree_root");
11371                 goto out;
11372         }
11373
11374         while (1) {
11375                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11376                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11377                         goto next;
11378                 key.offset = (u64)-1;
11379
11380                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11381                 if (IS_ERR(cur_root) || !cur_root) {
11382                         error("failed to read tree: %lld", key.objectid);
11383                         goto next;
11384                 }
11385
11386                 ret = traverse_tree_block(cur_root, cur_root->node);
11387                 err |= ret;
11388
11389 next:
11390                 ret = btrfs_next_item(root1, &path);
11391                 if (ret)
11392                         goto out;
11393         }
11394
11395 out:
11396         btrfs_release_path(&path);
11397         return err;
11398 }
11399
11400 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11401                            struct btrfs_root *root, int overwrite)
11402 {
11403         struct extent_buffer *c;
11404         struct extent_buffer *old = root->node;
11405         int level;
11406         int ret;
11407         struct btrfs_disk_key disk_key = {0,0,0};
11408
11409         level = 0;
11410
11411         if (overwrite) {
11412                 c = old;
11413                 extent_buffer_get(c);
11414                 goto init;
11415         }
11416         c = btrfs_alloc_free_block(trans, root,
11417                                    root->nodesize,
11418                                    root->root_key.objectid,
11419                                    &disk_key, level, 0, 0);
11420         if (IS_ERR(c)) {
11421                 c = old;
11422                 extent_buffer_get(c);
11423                 overwrite = 1;
11424         }
11425 init:
11426         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11427         btrfs_set_header_level(c, level);
11428         btrfs_set_header_bytenr(c, c->start);
11429         btrfs_set_header_generation(c, trans->transid);
11430         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11431         btrfs_set_header_owner(c, root->root_key.objectid);
11432
11433         write_extent_buffer(c, root->fs_info->fsid,
11434                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11435
11436         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11437                             btrfs_header_chunk_tree_uuid(c),
11438                             BTRFS_UUID_SIZE);
11439
11440         btrfs_mark_buffer_dirty(c);
11441         /*
11442          * this case can happen in the following case:
11443          *
11444          * 1.overwrite previous root.
11445          *
11446          * 2.reinit reloc data root, this is because we skip pin
11447          * down reloc data tree before which means we can allocate
11448          * same block bytenr here.
11449          */
11450         if (old->start == c->start) {
11451                 btrfs_set_root_generation(&root->root_item,
11452                                           trans->transid);
11453                 root->root_item.level = btrfs_header_level(root->node);
11454                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11455                                         &root->root_key, &root->root_item);
11456                 if (ret) {
11457                         free_extent_buffer(c);
11458                         return ret;
11459                 }
11460         }
11461         free_extent_buffer(old);
11462         root->node = c;
11463         add_root_to_dirty_list(root);
11464         return 0;
11465 }
11466
11467 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11468                                 struct extent_buffer *eb, int tree_root)
11469 {
11470         struct extent_buffer *tmp;
11471         struct btrfs_root_item *ri;
11472         struct btrfs_key key;
11473         u64 bytenr;
11474         u32 nodesize;
11475         int level = btrfs_header_level(eb);
11476         int nritems;
11477         int ret;
11478         int i;
11479
11480         /*
11481          * If we have pinned this block before, don't pin it again.
11482          * This can not only avoid forever loop with broken filesystem
11483          * but also give us some speedups.
11484          */
11485         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11486                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11487                 return 0;
11488
11489         btrfs_pin_extent(fs_info, eb->start, eb->len);
11490
11491         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11492         nritems = btrfs_header_nritems(eb);
11493         for (i = 0; i < nritems; i++) {
11494                 if (level == 0) {
11495                         btrfs_item_key_to_cpu(eb, &key, i);
11496                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11497                                 continue;
11498                         /* Skip the extent root and reloc roots */
11499                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11500                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11501                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11502                                 continue;
11503                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11504                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11505
11506                         /*
11507                          * If at any point we start needing the real root we
11508                          * will have to build a stump root for the root we are
11509                          * in, but for now this doesn't actually use the root so
11510                          * just pass in extent_root.
11511                          */
11512                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11513                                               nodesize, 0);
11514                         if (!extent_buffer_uptodate(tmp)) {
11515                                 fprintf(stderr, "Error reading root block\n");
11516                                 return -EIO;
11517                         }
11518                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11519                         free_extent_buffer(tmp);
11520                         if (ret)
11521                                 return ret;
11522                 } else {
11523                         bytenr = btrfs_node_blockptr(eb, i);
11524
11525                         /* If we aren't the tree root don't read the block */
11526                         if (level == 1 && !tree_root) {
11527                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11528                                 continue;
11529                         }
11530
11531                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11532                                               nodesize, 0);
11533                         if (!extent_buffer_uptodate(tmp)) {
11534                                 fprintf(stderr, "Error reading tree block\n");
11535                                 return -EIO;
11536                         }
11537                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11538                         free_extent_buffer(tmp);
11539                         if (ret)
11540                                 return ret;
11541                 }
11542         }
11543
11544         return 0;
11545 }
11546
11547 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11548 {
11549         int ret;
11550
11551         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11552         if (ret)
11553                 return ret;
11554
11555         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11556 }
11557
11558 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11559 {
11560         struct btrfs_block_group_cache *cache;
11561         struct btrfs_path path;
11562         struct extent_buffer *leaf;
11563         struct btrfs_chunk *chunk;
11564         struct btrfs_key key;
11565         int ret;
11566         u64 start;
11567
11568         btrfs_init_path(&path);
11569         key.objectid = 0;
11570         key.type = BTRFS_CHUNK_ITEM_KEY;
11571         key.offset = 0;
11572         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11573         if (ret < 0) {
11574                 btrfs_release_path(&path);
11575                 return ret;
11576         }
11577
11578         /*
11579          * We do this in case the block groups were screwed up and had alloc
11580          * bits that aren't actually set on the chunks.  This happens with
11581          * restored images every time and could happen in real life I guess.
11582          */
11583         fs_info->avail_data_alloc_bits = 0;
11584         fs_info->avail_metadata_alloc_bits = 0;
11585         fs_info->avail_system_alloc_bits = 0;
11586
11587         /* First we need to create the in-memory block groups */
11588         while (1) {
11589                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11590                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11591                         if (ret < 0) {
11592                                 btrfs_release_path(&path);
11593                                 return ret;
11594                         }
11595                         if (ret) {
11596                                 ret = 0;
11597                                 break;
11598                         }
11599                 }
11600                 leaf = path.nodes[0];
11601                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11602                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11603                         path.slots[0]++;
11604                         continue;
11605                 }
11606
11607                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11608                 btrfs_add_block_group(fs_info, 0,
11609                                       btrfs_chunk_type(leaf, chunk),
11610                                       key.objectid, key.offset,
11611                                       btrfs_chunk_length(leaf, chunk));
11612                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11613                                  key.offset + btrfs_chunk_length(leaf, chunk),
11614                                  GFP_NOFS);
11615                 path.slots[0]++;
11616         }
11617         start = 0;
11618         while (1) {
11619                 cache = btrfs_lookup_first_block_group(fs_info, start);
11620                 if (!cache)
11621                         break;
11622                 cache->cached = 1;
11623                 start = cache->key.objectid + cache->key.offset;
11624         }
11625
11626         btrfs_release_path(&path);
11627         return 0;
11628 }
11629
11630 static int reset_balance(struct btrfs_trans_handle *trans,
11631                          struct btrfs_fs_info *fs_info)
11632 {
11633         struct btrfs_root *root = fs_info->tree_root;
11634         struct btrfs_path path;
11635         struct extent_buffer *leaf;
11636         struct btrfs_key key;
11637         int del_slot, del_nr = 0;
11638         int ret;
11639         int found = 0;
11640
11641         btrfs_init_path(&path);
11642         key.objectid = BTRFS_BALANCE_OBJECTID;
11643         key.type = BTRFS_BALANCE_ITEM_KEY;
11644         key.offset = 0;
11645         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11646         if (ret) {
11647                 if (ret > 0)
11648                         ret = 0;
11649                 if (!ret)
11650                         goto reinit_data_reloc;
11651                 else
11652                         goto out;
11653         }
11654
11655         ret = btrfs_del_item(trans, root, &path);
11656         if (ret)
11657                 goto out;
11658         btrfs_release_path(&path);
11659
11660         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11661         key.type = BTRFS_ROOT_ITEM_KEY;
11662         key.offset = 0;
11663         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11664         if (ret < 0)
11665                 goto out;
11666         while (1) {
11667                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11668                         if (!found)
11669                                 break;
11670
11671                         if (del_nr) {
11672                                 ret = btrfs_del_items(trans, root, &path,
11673                                                       del_slot, del_nr);
11674                                 del_nr = 0;
11675                                 if (ret)
11676                                         goto out;
11677                         }
11678                         key.offset++;
11679                         btrfs_release_path(&path);
11680
11681                         found = 0;
11682                         ret = btrfs_search_slot(trans, root, &key, &path,
11683                                                 -1, 1);
11684                         if (ret < 0)
11685                                 goto out;
11686                         continue;
11687                 }
11688                 found = 1;
11689                 leaf = path.nodes[0];
11690                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11691                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11692                         break;
11693                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11694                         path.slots[0]++;
11695                         continue;
11696                 }
11697                 if (!del_nr) {
11698                         del_slot = path.slots[0];
11699                         del_nr = 1;
11700                 } else {
11701                         del_nr++;
11702                 }
11703                 path.slots[0]++;
11704         }
11705
11706         if (del_nr) {
11707                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11708                 if (ret)
11709                         goto out;
11710         }
11711         btrfs_release_path(&path);
11712
11713 reinit_data_reloc:
11714         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11715         key.type = BTRFS_ROOT_ITEM_KEY;
11716         key.offset = (u64)-1;
11717         root = btrfs_read_fs_root(fs_info, &key);
11718         if (IS_ERR(root)) {
11719                 fprintf(stderr, "Error reading data reloc tree\n");
11720                 ret = PTR_ERR(root);
11721                 goto out;
11722         }
11723         record_root_in_trans(trans, root);
11724         ret = btrfs_fsck_reinit_root(trans, root, 0);
11725         if (ret)
11726                 goto out;
11727         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11728 out:
11729         btrfs_release_path(&path);
11730         return ret;
11731 }
11732
11733 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11734                               struct btrfs_fs_info *fs_info)
11735 {
11736         u64 start = 0;
11737         int ret;
11738
11739         /*
11740          * The only reason we don't do this is because right now we're just
11741          * walking the trees we find and pinning down their bytes, we don't look
11742          * at any of the leaves.  In order to do mixed groups we'd have to check
11743          * the leaves of any fs roots and pin down the bytes for any file
11744          * extents we find.  Not hard but why do it if we don't have to?
11745          */
11746         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11747                 fprintf(stderr, "We don't support re-initing the extent tree "
11748                         "for mixed block groups yet, please notify a btrfs "
11749                         "developer you want to do this so they can add this "
11750                         "functionality.\n");
11751                 return -EINVAL;
11752         }
11753
11754         /*
11755          * first we need to walk all of the trees except the extent tree and pin
11756          * down the bytes that are in use so we don't overwrite any existing
11757          * metadata.
11758          */
11759         ret = pin_metadata_blocks(fs_info);
11760         if (ret) {
11761                 fprintf(stderr, "error pinning down used bytes\n");
11762                 return ret;
11763         }
11764
11765         /*
11766          * Need to drop all the block groups since we're going to recreate all
11767          * of them again.
11768          */
11769         btrfs_free_block_groups(fs_info);
11770         ret = reset_block_groups(fs_info);
11771         if (ret) {
11772                 fprintf(stderr, "error resetting the block groups\n");
11773                 return ret;
11774         }
11775
11776         /* Ok we can allocate now, reinit the extent root */
11777         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11778         if (ret) {
11779                 fprintf(stderr, "extent root initialization failed\n");
11780                 /*
11781                  * When the transaction code is updated we should end the
11782                  * transaction, but for now progs only knows about commit so
11783                  * just return an error.
11784                  */
11785                 return ret;
11786         }
11787
11788         /*
11789          * Now we have all the in-memory block groups setup so we can make
11790          * allocations properly, and the metadata we care about is safe since we
11791          * pinned all of it above.
11792          */
11793         while (1) {
11794                 struct btrfs_block_group_cache *cache;
11795
11796                 cache = btrfs_lookup_first_block_group(fs_info, start);
11797                 if (!cache)
11798                         break;
11799                 start = cache->key.objectid + cache->key.offset;
11800                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11801                                         &cache->key, &cache->item,
11802                                         sizeof(cache->item));
11803                 if (ret) {
11804                         fprintf(stderr, "Error adding block group\n");
11805                         return ret;
11806                 }
11807                 btrfs_extent_post_op(trans, fs_info->extent_root);
11808         }
11809
11810         ret = reset_balance(trans, fs_info);
11811         if (ret)
11812                 fprintf(stderr, "error resetting the pending balance\n");
11813
11814         return ret;
11815 }
11816
11817 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11818 {
11819         struct btrfs_path path;
11820         struct btrfs_trans_handle *trans;
11821         struct btrfs_key key;
11822         int ret;
11823
11824         printf("Recowing metadata block %llu\n", eb->start);
11825         key.objectid = btrfs_header_owner(eb);
11826         key.type = BTRFS_ROOT_ITEM_KEY;
11827         key.offset = (u64)-1;
11828
11829         root = btrfs_read_fs_root(root->fs_info, &key);
11830         if (IS_ERR(root)) {
11831                 fprintf(stderr, "Couldn't find owner root %llu\n",
11832                         key.objectid);
11833                 return PTR_ERR(root);
11834         }
11835
11836         trans = btrfs_start_transaction(root, 1);
11837         if (IS_ERR(trans))
11838                 return PTR_ERR(trans);
11839
11840         btrfs_init_path(&path);
11841         path.lowest_level = btrfs_header_level(eb);
11842         if (path.lowest_level)
11843                 btrfs_node_key_to_cpu(eb, &key, 0);
11844         else
11845                 btrfs_item_key_to_cpu(eb, &key, 0);
11846
11847         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11848         btrfs_commit_transaction(trans, root);
11849         btrfs_release_path(&path);
11850         return ret;
11851 }
11852
11853 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11854 {
11855         struct btrfs_path path;
11856         struct btrfs_trans_handle *trans;
11857         struct btrfs_key key;
11858         int ret;
11859
11860         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11861                bad->key.type, bad->key.offset);
11862         key.objectid = bad->root_id;
11863         key.type = BTRFS_ROOT_ITEM_KEY;
11864         key.offset = (u64)-1;
11865
11866         root = btrfs_read_fs_root(root->fs_info, &key);
11867         if (IS_ERR(root)) {
11868                 fprintf(stderr, "Couldn't find owner root %llu\n",
11869                         key.objectid);
11870                 return PTR_ERR(root);
11871         }
11872
11873         trans = btrfs_start_transaction(root, 1);
11874         if (IS_ERR(trans))
11875                 return PTR_ERR(trans);
11876
11877         btrfs_init_path(&path);
11878         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11879         if (ret) {
11880                 if (ret > 0)
11881                         ret = 0;
11882                 goto out;
11883         }
11884         ret = btrfs_del_item(trans, root, &path);
11885 out:
11886         btrfs_commit_transaction(trans, root);
11887         btrfs_release_path(&path);
11888         return ret;
11889 }
11890
11891 static int zero_log_tree(struct btrfs_root *root)
11892 {
11893         struct btrfs_trans_handle *trans;
11894         int ret;
11895
11896         trans = btrfs_start_transaction(root, 1);
11897         if (IS_ERR(trans)) {
11898                 ret = PTR_ERR(trans);
11899                 return ret;
11900         }
11901         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11902         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11903         ret = btrfs_commit_transaction(trans, root);
11904         return ret;
11905 }
11906
11907 static int populate_csum(struct btrfs_trans_handle *trans,
11908                          struct btrfs_root *csum_root, char *buf, u64 start,
11909                          u64 len)
11910 {
11911         u64 offset = 0;
11912         u64 sectorsize;
11913         int ret = 0;
11914
11915         while (offset < len) {
11916                 sectorsize = csum_root->sectorsize;
11917                 ret = read_extent_data(csum_root, buf, start + offset,
11918                                        &sectorsize, 0);
11919                 if (ret)
11920                         break;
11921                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11922                                             start + offset, buf, sectorsize);
11923                 if (ret)
11924                         break;
11925                 offset += sectorsize;
11926         }
11927         return ret;
11928 }
11929
11930 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11931                                       struct btrfs_root *csum_root,
11932                                       struct btrfs_root *cur_root)
11933 {
11934         struct btrfs_path path;
11935         struct btrfs_key key;
11936         struct extent_buffer *node;
11937         struct btrfs_file_extent_item *fi;
11938         char *buf = NULL;
11939         u64 start = 0;
11940         u64 len = 0;
11941         int slot = 0;
11942         int ret = 0;
11943
11944         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11945         if (!buf)
11946                 return -ENOMEM;
11947
11948         btrfs_init_path(&path);
11949         key.objectid = 0;
11950         key.offset = 0;
11951         key.type = 0;
11952         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11953         if (ret < 0)
11954                 goto out;
11955         /* Iterate all regular file extents and fill its csum */
11956         while (1) {
11957                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11958
11959                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11960                         goto next;
11961                 node = path.nodes[0];
11962                 slot = path.slots[0];
11963                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11964                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11965                         goto next;
11966                 start = btrfs_file_extent_disk_bytenr(node, fi);
11967                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11968
11969                 ret = populate_csum(trans, csum_root, buf, start, len);
11970                 if (ret == -EEXIST)
11971                         ret = 0;
11972                 if (ret < 0)
11973                         goto out;
11974 next:
11975                 /*
11976                  * TODO: if next leaf is corrupted, jump to nearest next valid
11977                  * leaf.
11978                  */
11979                 ret = btrfs_next_item(cur_root, &path);
11980                 if (ret < 0)
11981                         goto out;
11982                 if (ret > 0) {
11983                         ret = 0;
11984                         goto out;
11985                 }
11986         }
11987
11988 out:
11989         btrfs_release_path(&path);
11990         free(buf);
11991         return ret;
11992 }
11993
11994 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11995                                   struct btrfs_root *csum_root)
11996 {
11997         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11998         struct btrfs_path path;
11999         struct btrfs_root *tree_root = fs_info->tree_root;
12000         struct btrfs_root *cur_root;
12001         struct extent_buffer *node;
12002         struct btrfs_key key;
12003         int slot = 0;
12004         int ret = 0;
12005
12006         btrfs_init_path(&path);
12007         key.objectid = BTRFS_FS_TREE_OBJECTID;
12008         key.offset = 0;
12009         key.type = BTRFS_ROOT_ITEM_KEY;
12010         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12011         if (ret < 0)
12012                 goto out;
12013         if (ret > 0) {
12014                 ret = -ENOENT;
12015                 goto out;
12016         }
12017
12018         while (1) {
12019                 node = path.nodes[0];
12020                 slot = path.slots[0];
12021                 btrfs_item_key_to_cpu(node, &key, slot);
12022                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12023                         goto out;
12024                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12025                         goto next;
12026                 if (!is_fstree(key.objectid))
12027                         goto next;
12028                 key.offset = (u64)-1;
12029
12030                 cur_root = btrfs_read_fs_root(fs_info, &key);
12031                 if (IS_ERR(cur_root) || !cur_root) {
12032                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12033                                 key.objectid);
12034                         goto out;
12035                 }
12036                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12037                                 cur_root);
12038                 if (ret < 0)
12039                         goto out;
12040 next:
12041                 ret = btrfs_next_item(tree_root, &path);
12042                 if (ret > 0) {
12043                         ret = 0;
12044                         goto out;
12045                 }
12046                 if (ret < 0)
12047                         goto out;
12048         }
12049
12050 out:
12051         btrfs_release_path(&path);
12052         return ret;
12053 }
12054
12055 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12056                                       struct btrfs_root *csum_root)
12057 {
12058         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12059         struct btrfs_path path;
12060         struct btrfs_extent_item *ei;
12061         struct extent_buffer *leaf;
12062         char *buf;
12063         struct btrfs_key key;
12064         int ret;
12065
12066         btrfs_init_path(&path);
12067         key.objectid = 0;
12068         key.type = BTRFS_EXTENT_ITEM_KEY;
12069         key.offset = 0;
12070         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12071         if (ret < 0) {
12072                 btrfs_release_path(&path);
12073                 return ret;
12074         }
12075
12076         buf = malloc(csum_root->sectorsize);
12077         if (!buf) {
12078                 btrfs_release_path(&path);
12079                 return -ENOMEM;
12080         }
12081
12082         while (1) {
12083                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12084                         ret = btrfs_next_leaf(extent_root, &path);
12085                         if (ret < 0)
12086                                 break;
12087                         if (ret) {
12088                                 ret = 0;
12089                                 break;
12090                         }
12091                 }
12092                 leaf = path.nodes[0];
12093
12094                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12095                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12096                         path.slots[0]++;
12097                         continue;
12098                 }
12099
12100                 ei = btrfs_item_ptr(leaf, path.slots[0],
12101                                     struct btrfs_extent_item);
12102                 if (!(btrfs_extent_flags(leaf, ei) &
12103                       BTRFS_EXTENT_FLAG_DATA)) {
12104                         path.slots[0]++;
12105                         continue;
12106                 }
12107
12108                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12109                                     key.offset);
12110                 if (ret)
12111                         break;
12112                 path.slots[0]++;
12113         }
12114
12115         btrfs_release_path(&path);
12116         free(buf);
12117         return ret;
12118 }
12119
12120 /*
12121  * Recalculate the csum and put it into the csum tree.
12122  *
12123  * Extent tree init will wipe out all the extent info, so in that case, we
12124  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12125  * will use fs/subvol trees to init the csum tree.
12126  */
12127 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12128                           struct btrfs_root *csum_root,
12129                           int search_fs_tree)
12130 {
12131         if (search_fs_tree)
12132                 return fill_csum_tree_from_fs(trans, csum_root);
12133         else
12134                 return fill_csum_tree_from_extent(trans, csum_root);
12135 }
12136
12137 static void free_roots_info_cache(void)
12138 {
12139         if (!roots_info_cache)
12140                 return;
12141
12142         while (!cache_tree_empty(roots_info_cache)) {
12143                 struct cache_extent *entry;
12144                 struct root_item_info *rii;
12145
12146                 entry = first_cache_extent(roots_info_cache);
12147                 if (!entry)
12148                         break;
12149                 remove_cache_extent(roots_info_cache, entry);
12150                 rii = container_of(entry, struct root_item_info, cache_extent);
12151                 free(rii);
12152         }
12153
12154         free(roots_info_cache);
12155         roots_info_cache = NULL;
12156 }
12157
12158 static int build_roots_info_cache(struct btrfs_fs_info *info)
12159 {
12160         int ret = 0;
12161         struct btrfs_key key;
12162         struct extent_buffer *leaf;
12163         struct btrfs_path path;
12164
12165         if (!roots_info_cache) {
12166                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12167                 if (!roots_info_cache)
12168                         return -ENOMEM;
12169                 cache_tree_init(roots_info_cache);
12170         }
12171
12172         btrfs_init_path(&path);
12173         key.objectid = 0;
12174         key.type = BTRFS_EXTENT_ITEM_KEY;
12175         key.offset = 0;
12176         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12177         if (ret < 0)
12178                 goto out;
12179         leaf = path.nodes[0];
12180
12181         while (1) {
12182                 struct btrfs_key found_key;
12183                 struct btrfs_extent_item *ei;
12184                 struct btrfs_extent_inline_ref *iref;
12185                 int slot = path.slots[0];
12186                 int type;
12187                 u64 flags;
12188                 u64 root_id;
12189                 u8 level;
12190                 struct cache_extent *entry;
12191                 struct root_item_info *rii;
12192
12193                 if (slot >= btrfs_header_nritems(leaf)) {
12194                         ret = btrfs_next_leaf(info->extent_root, &path);
12195                         if (ret < 0) {
12196                                 break;
12197                         } else if (ret) {
12198                                 ret = 0;
12199                                 break;
12200                         }
12201                         leaf = path.nodes[0];
12202                         slot = path.slots[0];
12203                 }
12204
12205                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12206
12207                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12208                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12209                         goto next;
12210
12211                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12212                 flags = btrfs_extent_flags(leaf, ei);
12213
12214                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12215                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12216                         goto next;
12217
12218                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12219                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12220                         level = found_key.offset;
12221                 } else {
12222                         struct btrfs_tree_block_info *binfo;
12223
12224                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12225                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12226                         level = btrfs_tree_block_level(leaf, binfo);
12227                 }
12228
12229                 /*
12230                  * For a root extent, it must be of the following type and the
12231                  * first (and only one) iref in the item.
12232                  */
12233                 type = btrfs_extent_inline_ref_type(leaf, iref);
12234                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12235                         goto next;
12236
12237                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12238                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12239                 if (!entry) {
12240                         rii = malloc(sizeof(struct root_item_info));
12241                         if (!rii) {
12242                                 ret = -ENOMEM;
12243                                 goto out;
12244                         }
12245                         rii->cache_extent.start = root_id;
12246                         rii->cache_extent.size = 1;
12247                         rii->level = (u8)-1;
12248                         entry = &rii->cache_extent;
12249                         ret = insert_cache_extent(roots_info_cache, entry);
12250                         ASSERT(ret == 0);
12251                 } else {
12252                         rii = container_of(entry, struct root_item_info,
12253                                            cache_extent);
12254                 }
12255
12256                 ASSERT(rii->cache_extent.start == root_id);
12257                 ASSERT(rii->cache_extent.size == 1);
12258
12259                 if (level > rii->level || rii->level == (u8)-1) {
12260                         rii->level = level;
12261                         rii->bytenr = found_key.objectid;
12262                         rii->gen = btrfs_extent_generation(leaf, ei);
12263                         rii->node_count = 1;
12264                 } else if (level == rii->level) {
12265                         rii->node_count++;
12266                 }
12267 next:
12268                 path.slots[0]++;
12269         }
12270
12271 out:
12272         btrfs_release_path(&path);
12273
12274         return ret;
12275 }
12276
12277 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12278                                   struct btrfs_path *path,
12279                                   const struct btrfs_key *root_key,
12280                                   const int read_only_mode)
12281 {
12282         const u64 root_id = root_key->objectid;
12283         struct cache_extent *entry;
12284         struct root_item_info *rii;
12285         struct btrfs_root_item ri;
12286         unsigned long offset;
12287
12288         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12289         if (!entry) {
12290                 fprintf(stderr,
12291                         "Error: could not find extent items for root %llu\n",
12292                         root_key->objectid);
12293                 return -ENOENT;
12294         }
12295
12296         rii = container_of(entry, struct root_item_info, cache_extent);
12297         ASSERT(rii->cache_extent.start == root_id);
12298         ASSERT(rii->cache_extent.size == 1);
12299
12300         if (rii->node_count != 1) {
12301                 fprintf(stderr,
12302                         "Error: could not find btree root extent for root %llu\n",
12303                         root_id);
12304                 return -ENOENT;
12305         }
12306
12307         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12308         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12309
12310         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12311             btrfs_root_level(&ri) != rii->level ||
12312             btrfs_root_generation(&ri) != rii->gen) {
12313
12314                 /*
12315                  * If we're in repair mode but our caller told us to not update
12316                  * the root item, i.e. just check if it needs to be updated, don't
12317                  * print this message, since the caller will call us again shortly
12318                  * for the same root item without read only mode (the caller will
12319                  * open a transaction first).
12320                  */
12321                 if (!(read_only_mode && repair))
12322                         fprintf(stderr,
12323                                 "%sroot item for root %llu,"
12324                                 " current bytenr %llu, current gen %llu, current level %u,"
12325                                 " new bytenr %llu, new gen %llu, new level %u\n",
12326                                 (read_only_mode ? "" : "fixing "),
12327                                 root_id,
12328                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12329                                 btrfs_root_level(&ri),
12330                                 rii->bytenr, rii->gen, rii->level);
12331
12332                 if (btrfs_root_generation(&ri) > rii->gen) {
12333                         fprintf(stderr,
12334                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12335                                 root_id, btrfs_root_generation(&ri), rii->gen);
12336                         return -EINVAL;
12337                 }
12338
12339                 if (!read_only_mode) {
12340                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12341                         btrfs_set_root_level(&ri, rii->level);
12342                         btrfs_set_root_generation(&ri, rii->gen);
12343                         write_extent_buffer(path->nodes[0], &ri,
12344                                             offset, sizeof(ri));
12345                 }
12346
12347                 return 1;
12348         }
12349
12350         return 0;
12351 }
12352
12353 /*
12354  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12355  * caused read-only snapshots to be corrupted if they were created at a moment
12356  * when the source subvolume/snapshot had orphan items. The issue was that the
12357  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12358  * node instead of the post orphan cleanup root node.
12359  * So this function, and its callees, just detects and fixes those cases. Even
12360  * though the regression was for read-only snapshots, this function applies to
12361  * any snapshot/subvolume root.
12362  * This must be run before any other repair code - not doing it so, makes other
12363  * repair code delete or modify backrefs in the extent tree for example, which
12364  * will result in an inconsistent fs after repairing the root items.
12365  */
12366 static int repair_root_items(struct btrfs_fs_info *info)
12367 {
12368         struct btrfs_path path;
12369         struct btrfs_key key;
12370         struct extent_buffer *leaf;
12371         struct btrfs_trans_handle *trans = NULL;
12372         int ret = 0;
12373         int bad_roots = 0;
12374         int need_trans = 0;
12375
12376         btrfs_init_path(&path);
12377
12378         ret = build_roots_info_cache(info);
12379         if (ret)
12380                 goto out;
12381
12382         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12383         key.type = BTRFS_ROOT_ITEM_KEY;
12384         key.offset = 0;
12385
12386 again:
12387         /*
12388          * Avoid opening and committing transactions if a leaf doesn't have
12389          * any root items that need to be fixed, so that we avoid rotating
12390          * backup roots unnecessarily.
12391          */
12392         if (need_trans) {
12393                 trans = btrfs_start_transaction(info->tree_root, 1);
12394                 if (IS_ERR(trans)) {
12395                         ret = PTR_ERR(trans);
12396                         goto out;
12397                 }
12398         }
12399
12400         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12401                                 0, trans ? 1 : 0);
12402         if (ret < 0)
12403                 goto out;
12404         leaf = path.nodes[0];
12405
12406         while (1) {
12407                 struct btrfs_key found_key;
12408
12409                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12410                         int no_more_keys = find_next_key(&path, &key);
12411
12412                         btrfs_release_path(&path);
12413                         if (trans) {
12414                                 ret = btrfs_commit_transaction(trans,
12415                                                                info->tree_root);
12416                                 trans = NULL;
12417                                 if (ret < 0)
12418                                         goto out;
12419                         }
12420                         need_trans = 0;
12421                         if (no_more_keys)
12422                                 break;
12423                         goto again;
12424                 }
12425
12426                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12427
12428                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12429                         goto next;
12430                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12431                         goto next;
12432
12433                 ret = maybe_repair_root_item(info, &path, &found_key,
12434                                              trans ? 0 : 1);
12435                 if (ret < 0)
12436                         goto out;
12437                 if (ret) {
12438                         if (!trans && repair) {
12439                                 need_trans = 1;
12440                                 key = found_key;
12441                                 btrfs_release_path(&path);
12442                                 goto again;
12443                         }
12444                         bad_roots++;
12445                 }
12446 next:
12447                 path.slots[0]++;
12448         }
12449         ret = 0;
12450 out:
12451         free_roots_info_cache();
12452         btrfs_release_path(&path);
12453         if (trans)
12454                 btrfs_commit_transaction(trans, info->tree_root);
12455         if (ret < 0)
12456                 return ret;
12457
12458         return bad_roots;
12459 }
12460
12461 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12462 {
12463         struct btrfs_trans_handle *trans;
12464         struct btrfs_block_group_cache *bg_cache;
12465         u64 current = 0;
12466         int ret = 0;
12467
12468         /* Clear all free space cache inodes and its extent data */
12469         while (1) {
12470                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12471                 if (!bg_cache)
12472                         break;
12473                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12474                 if (ret < 0)
12475                         return ret;
12476                 current = bg_cache->key.objectid + bg_cache->key.offset;
12477         }
12478
12479         /* Don't forget to set cache_generation to -1 */
12480         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12481         if (IS_ERR(trans)) {
12482                 error("failed to update super block cache generation");
12483                 return PTR_ERR(trans);
12484         }
12485         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12486         btrfs_commit_transaction(trans, fs_info->tree_root);
12487
12488         return ret;
12489 }
12490
12491 const char * const cmd_check_usage[] = {
12492         "btrfs check [options] <device>",
12493         "Check structural integrity of a filesystem (unmounted).",
12494         "Check structural integrity of an unmounted filesystem. Verify internal",
12495         "trees' consistency and item connectivity. In the repair mode try to",
12496         "fix the problems found. ",
12497         "WARNING: the repair mode is considered dangerous",
12498         "",
12499         "-s|--super <superblock>     use this superblock copy",
12500         "-b|--backup                 use the first valid backup root copy",
12501         "--repair                    try to repair the filesystem",
12502         "--readonly                  run in read-only mode (default)",
12503         "--init-csum-tree            create a new CRC tree",
12504         "--init-extent-tree          create a new extent tree",
12505         "--mode <MODE>               allows choice of memory/IO trade-offs",
12506         "                            where MODE is one of:",
12507         "                            original - read inodes and extents to memory (requires",
12508         "                                       more memory, does less IO)",
12509         "                            lowmem   - try to use less memory but read blocks again",
12510         "                                       when needed",
12511         "--check-data-csum           verify checksums of data blocks",
12512         "-Q|--qgroup-report          print a report on qgroup consistency",
12513         "-E|--subvol-extents <subvolid>",
12514         "                            print subvolume extents and sharing state",
12515         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12516         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12517         "-p|--progress               indicate progress",
12518         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12519         NULL
12520 };
12521
12522 int cmd_check(int argc, char **argv)
12523 {
12524         struct cache_tree root_cache;
12525         struct btrfs_root *root;
12526         struct btrfs_fs_info *info;
12527         u64 bytenr = 0;
12528         u64 subvolid = 0;
12529         u64 tree_root_bytenr = 0;
12530         u64 chunk_root_bytenr = 0;
12531         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12532         int ret;
12533         int err = 0;
12534         u64 num;
12535         int init_csum_tree = 0;
12536         int readonly = 0;
12537         int clear_space_cache = 0;
12538         int qgroup_report = 0;
12539         int qgroups_repaired = 0;
12540         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12541
12542         while(1) {
12543                 int c;
12544                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12545                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12546                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12547                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12548                 static const struct option long_options[] = {
12549                         { "super", required_argument, NULL, 's' },
12550                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12551                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12552                         { "init-csum-tree", no_argument, NULL,
12553                                 GETOPT_VAL_INIT_CSUM },
12554                         { "init-extent-tree", no_argument, NULL,
12555                                 GETOPT_VAL_INIT_EXTENT },
12556                         { "check-data-csum", no_argument, NULL,
12557                                 GETOPT_VAL_CHECK_CSUM },
12558                         { "backup", no_argument, NULL, 'b' },
12559                         { "subvol-extents", required_argument, NULL, 'E' },
12560                         { "qgroup-report", no_argument, NULL, 'Q' },
12561                         { "tree-root", required_argument, NULL, 'r' },
12562                         { "chunk-root", required_argument, NULL,
12563                                 GETOPT_VAL_CHUNK_TREE },
12564                         { "progress", no_argument, NULL, 'p' },
12565                         { "mode", required_argument, NULL,
12566                                 GETOPT_VAL_MODE },
12567                         { "clear-space-cache", required_argument, NULL,
12568                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12569                         { NULL, 0, NULL, 0}
12570                 };
12571
12572                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12573                 if (c < 0)
12574                         break;
12575                 switch(c) {
12576                         case 'a': /* ignored */ break;
12577                         case 'b':
12578                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12579                                 break;
12580                         case 's':
12581                                 num = arg_strtou64(optarg);
12582                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12583                                         error(
12584                                         "super mirror should be less than %d",
12585                                                 BTRFS_SUPER_MIRROR_MAX);
12586                                         exit(1);
12587                                 }
12588                                 bytenr = btrfs_sb_offset(((int)num));
12589                                 printf("using SB copy %llu, bytenr %llu\n", num,
12590                                        (unsigned long long)bytenr);
12591                                 break;
12592                         case 'Q':
12593                                 qgroup_report = 1;
12594                                 break;
12595                         case 'E':
12596                                 subvolid = arg_strtou64(optarg);
12597                                 break;
12598                         case 'r':
12599                                 tree_root_bytenr = arg_strtou64(optarg);
12600                                 break;
12601                         case GETOPT_VAL_CHUNK_TREE:
12602                                 chunk_root_bytenr = arg_strtou64(optarg);
12603                                 break;
12604                         case 'p':
12605                                 ctx.progress_enabled = true;
12606                                 break;
12607                         case '?':
12608                         case 'h':
12609                                 usage(cmd_check_usage);
12610                         case GETOPT_VAL_REPAIR:
12611                                 printf("enabling repair mode\n");
12612                                 repair = 1;
12613                                 ctree_flags |= OPEN_CTREE_WRITES;
12614                                 break;
12615                         case GETOPT_VAL_READONLY:
12616                                 readonly = 1;
12617                                 break;
12618                         case GETOPT_VAL_INIT_CSUM:
12619                                 printf("Creating a new CRC tree\n");
12620                                 init_csum_tree = 1;
12621                                 repair = 1;
12622                                 ctree_flags |= OPEN_CTREE_WRITES;
12623                                 break;
12624                         case GETOPT_VAL_INIT_EXTENT:
12625                                 init_extent_tree = 1;
12626                                 ctree_flags |= (OPEN_CTREE_WRITES |
12627                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12628                                 repair = 1;
12629                                 break;
12630                         case GETOPT_VAL_CHECK_CSUM:
12631                                 check_data_csum = 1;
12632                                 break;
12633                         case GETOPT_VAL_MODE:
12634                                 check_mode = parse_check_mode(optarg);
12635                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12636                                         error("unknown mode: %s", optarg);
12637                                         exit(1);
12638                                 }
12639                                 break;
12640                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12641                                 if (strcmp(optarg, "v1") == 0) {
12642                                         clear_space_cache = 1;
12643                                 } else if (strcmp(optarg, "v2") == 0) {
12644                                         clear_space_cache = 2;
12645                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12646                                 } else {
12647                                         error(
12648                 "invalid argument to --clear-space-cache, must be v1 or v2");
12649                                         exit(1);
12650                                 }
12651                                 ctree_flags |= OPEN_CTREE_WRITES;
12652                                 break;
12653                 }
12654         }
12655
12656         if (check_argc_exact(argc - optind, 1))
12657                 usage(cmd_check_usage);
12658
12659         if (ctx.progress_enabled) {
12660                 ctx.tp = TASK_NOTHING;
12661                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12662         }
12663
12664         /* This check is the only reason for --readonly to exist */
12665         if (readonly && repair) {
12666                 error("repair options are not compatible with --readonly");
12667                 exit(1);
12668         }
12669
12670         /*
12671          * Not supported yet
12672          */
12673         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12674                 error("low memory mode doesn't support repair yet");
12675                 exit(1);
12676         }
12677
12678         radix_tree_init();
12679         cache_tree_init(&root_cache);
12680
12681         if((ret = check_mounted(argv[optind])) < 0) {
12682                 error("could not check mount status: %s", strerror(-ret));
12683                 err |= !!ret;
12684                 goto err_out;
12685         } else if(ret) {
12686                 error("%s is currently mounted, aborting", argv[optind]);
12687                 ret = -EBUSY;
12688                 err |= !!ret;
12689                 goto err_out;
12690         }
12691
12692         /* only allow partial opening under repair mode */
12693         if (repair)
12694                 ctree_flags |= OPEN_CTREE_PARTIAL;
12695
12696         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12697                                   chunk_root_bytenr, ctree_flags);
12698         if (!info) {
12699                 error("cannot open file system");
12700                 ret = -EIO;
12701                 err |= !!ret;
12702                 goto err_out;
12703         }
12704
12705         global_info = info;
12706         root = info->fs_root;
12707         if (clear_space_cache == 1) {
12708                 if (btrfs_fs_compat_ro(info,
12709                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12710                         error(
12711                 "free space cache v2 detected, use --clear-space-cache v2");
12712                         ret = 1;
12713                         goto close_out;
12714                 }
12715                 printf("Clearing free space cache\n");
12716                 ret = clear_free_space_cache(info);
12717                 if (ret) {
12718                         error("failed to clear free space cache");
12719                         ret = 1;
12720                 } else {
12721                         printf("Free space cache cleared\n");
12722                 }
12723                 goto close_out;
12724         } else if (clear_space_cache == 2) {
12725                 if (!btrfs_fs_compat_ro(info,
12726                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12727                         printf("no free space cache v2 to clear\n");
12728                         ret = 0;
12729                         goto close_out;
12730                 }
12731                 printf("Clear free space cache v2\n");
12732                 ret = btrfs_clear_free_space_tree(info);
12733                 if (ret) {
12734                         error("failed to clear free space cache v2: %d", ret);
12735                         ret = 1;
12736                 } else {
12737                         printf("free space cache v2 cleared\n");
12738                 }
12739                 goto close_out;
12740         }
12741
12742         /*
12743          * repair mode will force us to commit transaction which
12744          * will make us fail to load log tree when mounting.
12745          */
12746         if (repair && btrfs_super_log_root(info->super_copy)) {
12747                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12748                 if (!ret) {
12749                         ret = 1;
12750                         err |= !!ret;
12751                         goto close_out;
12752                 }
12753                 ret = zero_log_tree(root);
12754                 err |= !!ret;
12755                 if (ret) {
12756                         error("failed to zero log tree: %d", ret);
12757                         goto close_out;
12758                 }
12759         }
12760
12761         uuid_unparse(info->super_copy->fsid, uuidbuf);
12762         if (qgroup_report) {
12763                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12764                        uuidbuf);
12765                 ret = qgroup_verify_all(info);
12766                 err |= !!ret;
12767                 if (ret == 0)
12768                         report_qgroups(1);
12769                 goto close_out;
12770         }
12771         if (subvolid) {
12772                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12773                        subvolid, argv[optind], uuidbuf);
12774                 ret = print_extent_state(info, subvolid);
12775                 err |= !!ret;
12776                 goto close_out;
12777         }
12778         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12779
12780         if (!extent_buffer_uptodate(info->tree_root->node) ||
12781             !extent_buffer_uptodate(info->dev_root->node) ||
12782             !extent_buffer_uptodate(info->chunk_root->node)) {
12783                 error("critical roots corrupted, unable to check the filesystem");
12784                 err |= !!ret;
12785                 ret = -EIO;
12786                 goto close_out;
12787         }
12788
12789         if (init_extent_tree || init_csum_tree) {
12790                 struct btrfs_trans_handle *trans;
12791
12792                 trans = btrfs_start_transaction(info->extent_root, 0);
12793                 if (IS_ERR(trans)) {
12794                         error("error starting transaction");
12795                         ret = PTR_ERR(trans);
12796                         err |= !!ret;
12797                         goto close_out;
12798                 }
12799
12800                 if (init_extent_tree) {
12801                         printf("Creating a new extent tree\n");
12802                         ret = reinit_extent_tree(trans, info);
12803                         err |= !!ret;
12804                         if (ret)
12805                                 goto close_out;
12806                 }
12807
12808                 if (init_csum_tree) {
12809                         printf("Reinitialize checksum tree\n");
12810                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12811                         if (ret) {
12812                                 error("checksum tree initialization failed: %d",
12813                                                 ret);
12814                                 ret = -EIO;
12815                                 err |= !!ret;
12816                                 goto close_out;
12817                         }
12818
12819                         ret = fill_csum_tree(trans, info->csum_root,
12820                                              init_extent_tree);
12821                         err |= !!ret;
12822                         if (ret) {
12823                                 error("checksum tree refilling failed: %d", ret);
12824                                 return -EIO;
12825                         }
12826                 }
12827                 /*
12828                  * Ok now we commit and run the normal fsck, which will add
12829                  * extent entries for all of the items it finds.
12830                  */
12831                 ret = btrfs_commit_transaction(trans, info->extent_root);
12832                 err |= !!ret;
12833                 if (ret)
12834                         goto close_out;
12835         }
12836         if (!extent_buffer_uptodate(info->extent_root->node)) {
12837                 error("critical: extent_root, unable to check the filesystem");
12838                 ret = -EIO;
12839                 err |= !!ret;
12840                 goto close_out;
12841         }
12842         if (!extent_buffer_uptodate(info->csum_root->node)) {
12843                 error("critical: csum_root, unable to check the filesystem");
12844                 ret = -EIO;
12845                 err |= !!ret;
12846                 goto close_out;
12847         }
12848
12849         if (!ctx.progress_enabled)
12850                 fprintf(stderr, "checking extents\n");
12851         if (check_mode == CHECK_MODE_LOWMEM)
12852                 ret = check_chunks_and_extents_v2(root);
12853         else
12854                 ret = check_chunks_and_extents(root);
12855         err |= !!ret;
12856         if (ret)
12857                 error(
12858                 "errors found in extent allocation tree or chunk allocation");
12859
12860         ret = repair_root_items(info);
12861         err |= !!ret;
12862         if (ret < 0)
12863                 goto close_out;
12864         if (repair) {
12865                 fprintf(stderr, "Fixed %d roots.\n", ret);
12866                 ret = 0;
12867         } else if (ret > 0) {
12868                 fprintf(stderr,
12869                        "Found %d roots with an outdated root item.\n",
12870                        ret);
12871                 fprintf(stderr,
12872                         "Please run a filesystem check with the option --repair to fix them.\n");
12873                 ret = 1;
12874                 err |= !!ret;
12875                 goto close_out;
12876         }
12877
12878         if (!ctx.progress_enabled) {
12879                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12880                         fprintf(stderr, "checking free space tree\n");
12881                 else
12882                         fprintf(stderr, "checking free space cache\n");
12883         }
12884         ret = check_space_cache(root);
12885         err |= !!ret;
12886         if (ret)
12887                 goto out;
12888
12889         /*
12890          * We used to have to have these hole extents in between our real
12891          * extents so if we don't have this flag set we need to make sure there
12892          * are no gaps in the file extents for inodes, otherwise we can just
12893          * ignore it when this happens.
12894          */
12895         no_holes = btrfs_fs_incompat(root->fs_info,
12896                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12897         if (!ctx.progress_enabled)
12898                 fprintf(stderr, "checking fs roots\n");
12899         if (check_mode == CHECK_MODE_LOWMEM)
12900                 ret = check_fs_roots_v2(root->fs_info);
12901         else
12902                 ret = check_fs_roots(root, &root_cache);
12903         err |= !!ret;
12904         if (ret)
12905                 goto out;
12906
12907         fprintf(stderr, "checking csums\n");
12908         ret = check_csums(root);
12909         err |= !!ret;
12910         if (ret)
12911                 goto out;
12912
12913         fprintf(stderr, "checking root refs\n");
12914         /* For low memory mode, check_fs_roots_v2 handles root refs */
12915         if (check_mode != CHECK_MODE_LOWMEM) {
12916                 ret = check_root_refs(root, &root_cache);
12917                 err |= !!ret;
12918                 if (ret)
12919                         goto out;
12920         }
12921
12922         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12923                 struct extent_buffer *eb;
12924
12925                 eb = list_first_entry(&root->fs_info->recow_ebs,
12926                                       struct extent_buffer, recow);
12927                 list_del_init(&eb->recow);
12928                 ret = recow_extent_buffer(root, eb);
12929                 err |= !!ret;
12930                 if (ret)
12931                         break;
12932         }
12933
12934         while (!list_empty(&delete_items)) {
12935                 struct bad_item *bad;
12936
12937                 bad = list_first_entry(&delete_items, struct bad_item, list);
12938                 list_del_init(&bad->list);
12939                 if (repair) {
12940                         ret = delete_bad_item(root, bad);
12941                         err |= !!ret;
12942                 }
12943                 free(bad);
12944         }
12945
12946         if (info->quota_enabled) {
12947                 fprintf(stderr, "checking quota groups\n");
12948                 ret = qgroup_verify_all(info);
12949                 err |= !!ret;
12950                 if (ret)
12951                         goto out;
12952                 report_qgroups(0);
12953                 ret = repair_qgroups(info, &qgroups_repaired);
12954                 err |= !!ret;
12955                 if (err)
12956                         goto out;
12957                 ret = 0;
12958         }
12959
12960         if (!list_empty(&root->fs_info->recow_ebs)) {
12961                 error("transid errors in file system");
12962                 ret = 1;
12963                 err |= !!ret;
12964         }
12965 out:
12966         if (found_old_backref) { /*
12967                  * there was a disk format change when mixed
12968                  * backref was in testing tree. The old format
12969                  * existed about one week.
12970                  */
12971                 printf("\n * Found old mixed backref format. "
12972                        "The old format is not supported! *"
12973                        "\n * Please mount the FS in readonly mode, "
12974                        "backup data and re-format the FS. *\n\n");
12975                 err |= 1;
12976         }
12977         printf("found %llu bytes used err is %d\n",
12978                (unsigned long long)bytes_used, ret);
12979         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12980         printf("total tree bytes: %llu\n",
12981                (unsigned long long)total_btree_bytes);
12982         printf("total fs tree bytes: %llu\n",
12983                (unsigned long long)total_fs_tree_bytes);
12984         printf("total extent tree bytes: %llu\n",
12985                (unsigned long long)total_extent_tree_bytes);
12986         printf("btree space waste bytes: %llu\n",
12987                (unsigned long long)btree_space_waste);
12988         printf("file data blocks allocated: %llu\n referenced %llu\n",
12989                 (unsigned long long)data_bytes_allocated,
12990                 (unsigned long long)data_bytes_referenced);
12991
12992         free_qgroup_counts();
12993         free_root_recs_tree(&root_cache);
12994 close_out:
12995         close_ctree(root);
12996 err_out:
12997         if (ctx.progress_enabled)
12998                 task_deinit(ctx.info);
12999
13000         return err;
13001 }