btrfs-progs: check: cleanup use of level_size
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize,
832                                          root->fs_info->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (cur + sizeof(*di) + name_len > total ||
1516                     name_len > BTRFS_NAME_LEN) {
1517                         error = REF_ERR_NAME_TOO_LONG;
1518
1519                         if (cur + sizeof(*di) > total)
1520                                 break;
1521                         len = min_t(u32, total - cur - sizeof(*di),
1522                                     BTRFS_NAME_LEN);
1523                 } else {
1524                         len = name_len;
1525                         error = 0;
1526                 }
1527
1528                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529
1530                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1531                     key->offset != btrfs_name_hash(namebuf, len)) {
1532                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1533                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1534                         key->objectid, key->offset, namebuf, len, filetype,
1535                         key->offset, btrfs_name_hash(namebuf, len));
1536                 }
1537
1538                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1539                         add_inode_backref(inode_cache, location.objectid,
1540                                           key->objectid, key->offset, namebuf,
1541                                           len, filetype, key->type, error);
1542                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1543                         add_inode_backref(root_cache, location.objectid,
1544                                           key->objectid, key->offset,
1545                                           namebuf, len, filetype,
1546                                           key->type, error);
1547                 } else {
1548                         fprintf(stderr, "invalid location in dir item %u\n",
1549                                 location.type);
1550                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1551                                           key->objectid, key->offset, namebuf,
1552                                           len, filetype, key->type, error);
1553                 }
1554
1555                 len = sizeof(*di) + name_len + data_len;
1556                 di = (struct btrfs_dir_item *)((char *)di + len);
1557                 cur += len;
1558         }
1559         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1560                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1561
1562         return 0;
1563 }
1564
1565 static int process_inode_ref(struct extent_buffer *eb,
1566                              int slot, struct btrfs_key *key,
1567                              struct shared_node *active_node)
1568 {
1569         u32 total;
1570         u32 cur = 0;
1571         u32 len;
1572         u32 name_len;
1573         u64 index;
1574         int error;
1575         struct cache_tree *inode_cache;
1576         struct btrfs_inode_ref *ref;
1577         char namebuf[BTRFS_NAME_LEN];
1578
1579         inode_cache = &active_node->inode_cache;
1580
1581         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1582         total = btrfs_item_size_nr(eb, slot);
1583         while (cur < total) {
1584                 name_len = btrfs_inode_ref_name_len(eb, ref);
1585                 index = btrfs_inode_ref_index(eb, ref);
1586
1587                 /* inode_ref + namelen should not cross item boundary */
1588                 if (cur + sizeof(*ref) + name_len > total ||
1589                     name_len > BTRFS_NAME_LEN) {
1590                         if (total < cur + sizeof(*ref))
1591                                 break;
1592
1593                         /* Still try to read out the remaining part */
1594                         len = min_t(u32, total - cur - sizeof(*ref),
1595                                     BTRFS_NAME_LEN);
1596                         error = REF_ERR_NAME_TOO_LONG;
1597                 } else {
1598                         len = name_len;
1599                         error = 0;
1600                 }
1601
1602                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, key->offset,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*ref) + name_len;
1607                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611 }
1612
1613 static int process_inode_extref(struct extent_buffer *eb,
1614                                 int slot, struct btrfs_key *key,
1615                                 struct shared_node *active_node)
1616 {
1617         u32 total;
1618         u32 cur = 0;
1619         u32 len;
1620         u32 name_len;
1621         u64 index;
1622         u64 parent;
1623         int error;
1624         struct cache_tree *inode_cache;
1625         struct btrfs_inode_extref *extref;
1626         char namebuf[BTRFS_NAME_LEN];
1627
1628         inode_cache = &active_node->inode_cache;
1629
1630         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631         total = btrfs_item_size_nr(eb, slot);
1632         while (cur < total) {
1633                 name_len = btrfs_inode_extref_name_len(eb, extref);
1634                 index = btrfs_inode_extref_index(eb, extref);
1635                 parent = btrfs_inode_extref_parent(eb, extref);
1636                 if (name_len <= BTRFS_NAME_LEN) {
1637                         len = name_len;
1638                         error = 0;
1639                 } else {
1640                         len = BTRFS_NAME_LEN;
1641                         error = REF_ERR_NAME_TOO_LONG;
1642                 }
1643                 read_extent_buffer(eb, namebuf,
1644                                    (unsigned long)(extref + 1), len);
1645                 add_inode_backref(inode_cache, key->objectid, parent,
1646                                   index, namebuf, len, 0, key->type, error);
1647
1648                 len = sizeof(*extref) + name_len;
1649                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1650                 cur += len;
1651         }
1652         return 0;
1653
1654 }
1655
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657                             u64 len, u64 *found)
1658 {
1659         struct btrfs_key key;
1660         struct btrfs_path path;
1661         struct extent_buffer *leaf;
1662         int ret;
1663         size_t size;
1664         *found = 0;
1665         u64 csum_end;
1666         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667
1668         btrfs_init_path(&path);
1669
1670         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671         key.offset = start;
1672         key.type = BTRFS_EXTENT_CSUM_KEY;
1673
1674         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1675                                 &key, &path, 0, 0);
1676         if (ret < 0)
1677                 goto out;
1678         if (ret > 0 && path.slots[0] > 0) {
1679                 leaf = path.nodes[0];
1680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682                     key.type == BTRFS_EXTENT_CSUM_KEY)
1683                         path.slots[0]--;
1684         }
1685
1686         while (len > 0) {
1687                 leaf = path.nodes[0];
1688                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1690                         if (ret > 0)
1691                                 break;
1692                         else if (ret < 0)
1693                                 goto out;
1694                         leaf = path.nodes[0];
1695                 }
1696
1697                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699                     key.type != BTRFS_EXTENT_CSUM_KEY)
1700                         break;
1701
1702                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703                 if (key.offset >= start + len)
1704                         break;
1705
1706                 if (key.offset > start)
1707                         start = key.offset;
1708
1709                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710                 csum_end = key.offset + (size / csum_size) *
1711                            root->fs_info->sectorsize;
1712                 if (csum_end > start) {
1713                         size = min(csum_end - start, len);
1714                         len -= size;
1715                         start += size;
1716                         *found += size;
1717                 }
1718
1719                 path.slots[0]++;
1720         }
1721 out:
1722         btrfs_release_path(&path);
1723         if (ret < 0)
1724                 return ret;
1725         return 0;
1726 }
1727
1728 static int process_file_extent(struct btrfs_root *root,
1729                                 struct extent_buffer *eb,
1730                                 int slot, struct btrfs_key *key,
1731                                 struct shared_node *active_node)
1732 {
1733         struct inode_record *rec;
1734         struct btrfs_file_extent_item *fi;
1735         u64 num_bytes = 0;
1736         u64 disk_bytenr = 0;
1737         u64 extent_offset = 0;
1738         u64 mask = root->fs_info->sectorsize - 1;
1739         int extent_type;
1740         int ret;
1741
1742         rec = active_node->current;
1743         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1744         rec->found_file_extent = 1;
1745
1746         if (rec->extent_start == (u64)-1) {
1747                 rec->extent_start = key->offset;
1748                 rec->extent_end = key->offset;
1749         }
1750
1751         if (rec->extent_end > key->offset)
1752                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1753         else if (rec->extent_end < key->offset) {
1754                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1755                                            key->offset - rec->extent_end);
1756                 if (ret < 0)
1757                         return ret;
1758         }
1759
1760         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1761         extent_type = btrfs_file_extent_type(eb, fi);
1762
1763         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1764                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765                 if (num_bytes == 0)
1766                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767                 rec->found_size += num_bytes;
1768                 num_bytes = (num_bytes + mask) & ~mask;
1769         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1770                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1771                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1772                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1773                 extent_offset = btrfs_file_extent_offset(eb, fi);
1774                 if (num_bytes == 0 || (num_bytes & mask))
1775                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1776                 if (num_bytes + extent_offset >
1777                     btrfs_file_extent_ram_bytes(eb, fi))
1778                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1780                     (btrfs_file_extent_compression(eb, fi) ||
1781                      btrfs_file_extent_encryption(eb, fi) ||
1782                      btrfs_file_extent_other_encoding(eb, fi)))
1783                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784                 if (disk_bytenr > 0)
1785                         rec->found_size += num_bytes;
1786         } else {
1787                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788         }
1789         rec->extent_end = key->offset + num_bytes;
1790
1791         /*
1792          * The data reloc tree will copy full extents into its inode and then
1793          * copy the corresponding csums.  Because the extent it copied could be
1794          * a preallocated extent that hasn't been written to yet there may be no
1795          * csums to copy, ergo we won't have csums for our file extent.  This is
1796          * ok so just don't bother checking csums if the inode belongs to the
1797          * data reloc tree.
1798          */
1799         if (disk_bytenr > 0 &&
1800             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801                 u64 found;
1802                 if (btrfs_file_extent_compression(eb, fi))
1803                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804                 else
1805                         disk_bytenr += extent_offset;
1806
1807                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1808                 if (ret < 0)
1809                         return ret;
1810                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811                         if (found > 0)
1812                                 rec->found_csum_item = 1;
1813                         if (found < num_bytes)
1814                                 rec->some_csum_missing = 1;
1815                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816                         if (found > 0)
1817                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1818                 }
1819         }
1820         return 0;
1821 }
1822
1823 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1824                             struct walk_control *wc)
1825 {
1826         struct btrfs_key key;
1827         u32 nritems;
1828         int i;
1829         int ret = 0;
1830         struct cache_tree *inode_cache;
1831         struct shared_node *active_node;
1832
1833         if (wc->root_level == wc->active_node &&
1834             btrfs_root_refs(&root->root_item) == 0)
1835                 return 0;
1836
1837         active_node = wc->nodes[wc->active_node];
1838         inode_cache = &active_node->inode_cache;
1839         nritems = btrfs_header_nritems(eb);
1840         for (i = 0; i < nritems; i++) {
1841                 btrfs_item_key_to_cpu(eb, &key, i);
1842
1843                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844                         continue;
1845                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1846                         continue;
1847
1848                 if (active_node->current == NULL ||
1849                     active_node->current->ino < key.objectid) {
1850                         if (active_node->current) {
1851                                 active_node->current->checked = 1;
1852                                 maybe_free_inode_rec(inode_cache,
1853                                                      active_node->current);
1854                         }
1855                         active_node->current = get_inode_rec(inode_cache,
1856                                                              key.objectid, 1);
1857                         BUG_ON(IS_ERR(active_node->current));
1858                 }
1859                 switch (key.type) {
1860                 case BTRFS_DIR_ITEM_KEY:
1861                 case BTRFS_DIR_INDEX_KEY:
1862                         ret = process_dir_item(eb, i, &key, active_node);
1863                         break;
1864                 case BTRFS_INODE_REF_KEY:
1865                         ret = process_inode_ref(eb, i, &key, active_node);
1866                         break;
1867                 case BTRFS_INODE_EXTREF_KEY:
1868                         ret = process_inode_extref(eb, i, &key, active_node);
1869                         break;
1870                 case BTRFS_INODE_ITEM_KEY:
1871                         ret = process_inode_item(eb, i, &key, active_node);
1872                         break;
1873                 case BTRFS_EXTENT_DATA_KEY:
1874                         ret = process_file_extent(root, eb, i, &key,
1875                                                   active_node);
1876                         break;
1877                 default:
1878                         break;
1879                 };
1880         }
1881         return ret;
1882 }
1883
1884 struct node_refs {
1885         u64 bytenr[BTRFS_MAX_LEVEL];
1886         u64 refs[BTRFS_MAX_LEVEL];
1887         int need_check[BTRFS_MAX_LEVEL];
1888 };
1889
1890 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1891                              struct node_refs *nrefs, u64 level);
1892 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1893                             unsigned int ext_ref);
1894
1895 /*
1896  * Returns >0  Found error, not fatal, should continue
1897  * Returns <0  Fatal error, must exit the whole check
1898  * Returns 0   No errors found
1899  */
1900 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1901                                struct node_refs *nrefs, int *level, int ext_ref)
1902 {
1903         struct extent_buffer *cur = path->nodes[0];
1904         struct btrfs_key key;
1905         u64 cur_bytenr;
1906         u32 nritems;
1907         u64 first_ino = 0;
1908         int root_level = btrfs_header_level(root->node);
1909         int i;
1910         int ret = 0; /* Final return value */
1911         int err = 0; /* Positive error bitmap */
1912
1913         cur_bytenr = cur->start;
1914
1915         /* skip to first inode item or the first inode number change */
1916         nritems = btrfs_header_nritems(cur);
1917         for (i = 0; i < nritems; i++) {
1918                 btrfs_item_key_to_cpu(cur, &key, i);
1919                 if (i == 0)
1920                         first_ino = key.objectid;
1921                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1922                     (first_ino && first_ino != key.objectid))
1923                         break;
1924         }
1925         if (i == nritems) {
1926                 path->slots[0] = nritems;
1927                 return 0;
1928         }
1929         path->slots[0] = i;
1930
1931 again:
1932         err |= check_inode_item(root, path, ext_ref);
1933
1934         if (err & LAST_ITEM)
1935                 goto out;
1936
1937         /* still have inode items in thie leaf */
1938         if (cur->start == cur_bytenr)
1939                 goto again;
1940
1941         /*
1942          * we have switched to another leaf, above nodes may
1943          * have changed, here walk down the path, if a node
1944          * or leaf is shared, check whether we can skip this
1945          * node or leaf.
1946          */
1947         for (i = root_level; i >= 0; i--) {
1948                 if (path->nodes[i]->start == nrefs->bytenr[i])
1949                         continue;
1950
1951                 ret = update_nodes_refs(root,
1952                                 path->nodes[i]->start,
1953                                 nrefs, i);
1954                 if (ret)
1955                         goto out;
1956
1957                 if (!nrefs->need_check[i]) {
1958                         *level += 1;
1959                         break;
1960                 }
1961         }
1962
1963         for (i = 0; i < *level; i++) {
1964                 free_extent_buffer(path->nodes[i]);
1965                 path->nodes[i] = NULL;
1966         }
1967 out:
1968         err &= ~LAST_ITEM;
1969         if (err && !ret)
1970                 ret = err;
1971         return ret;
1972 }
1973
1974 static void reada_walk_down(struct btrfs_root *root,
1975                             struct extent_buffer *node, int slot)
1976 {
1977         struct btrfs_fs_info *fs_info = root->fs_info;
1978         u64 bytenr;
1979         u64 ptr_gen;
1980         u32 nritems;
1981         int i;
1982         int level;
1983
1984         level = btrfs_header_level(node);
1985         if (level != 1)
1986                 return;
1987
1988         nritems = btrfs_header_nritems(node);
1989         for (i = slot; i < nritems; i++) {
1990                 bytenr = btrfs_node_blockptr(node, i);
1991                 ptr_gen = btrfs_node_ptr_generation(node, i);
1992                 readahead_tree_block(fs_info, bytenr, fs_info->nodesize,
1993                                 ptr_gen);
1994         }
1995 }
1996
1997 /*
1998  * Check the child node/leaf by the following condition:
1999  * 1. the first item key of the node/leaf should be the same with the one
2000  *    in parent.
2001  * 2. block in parent node should match the child node/leaf.
2002  * 3. generation of parent node and child's header should be consistent.
2003  *
2004  * Or the child node/leaf pointed by the key in parent is not valid.
2005  *
2006  * We hope to check leaf owner too, but since subvol may share leaves,
2007  * which makes leaf owner check not so strong, key check should be
2008  * sufficient enough for that case.
2009  */
2010 static int check_child_node(struct extent_buffer *parent, int slot,
2011                             struct extent_buffer *child)
2012 {
2013         struct btrfs_key parent_key;
2014         struct btrfs_key child_key;
2015         int ret = 0;
2016
2017         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2018         if (btrfs_header_level(child) == 0)
2019                 btrfs_item_key_to_cpu(child, &child_key, 0);
2020         else
2021                 btrfs_node_key_to_cpu(child, &child_key, 0);
2022
2023         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2024                 ret = -EINVAL;
2025                 fprintf(stderr,
2026                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2027                         parent_key.objectid, parent_key.type, parent_key.offset,
2028                         child_key.objectid, child_key.type, child_key.offset);
2029         }
2030         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2031                 ret = -EINVAL;
2032                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2033                         btrfs_node_blockptr(parent, slot),
2034                         btrfs_header_bytenr(child));
2035         }
2036         if (btrfs_node_ptr_generation(parent, slot) !=
2037             btrfs_header_generation(child)) {
2038                 ret = -EINVAL;
2039                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2040                         btrfs_header_generation(child),
2041                         btrfs_node_ptr_generation(parent, slot));
2042         }
2043         return ret;
2044 }
2045
2046 /*
2047  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2048  * in every fs or file tree check. Here we find its all root ids, and only check
2049  * it in the fs or file tree which has the smallest root id.
2050  */
2051 static int need_check(struct btrfs_root *root, struct ulist *roots)
2052 {
2053         struct rb_node *node;
2054         struct ulist_node *u;
2055
2056         if (roots->nnodes == 1)
2057                 return 1;
2058
2059         node = rb_first(&roots->root);
2060         u = rb_entry(node, struct ulist_node, rb_node);
2061         /*
2062          * current root id is not smallest, we skip it and let it be checked
2063          * in the fs or file tree who hash the smallest root id.
2064          */
2065         if (root->objectid != u->val)
2066                 return 0;
2067
2068         return 1;
2069 }
2070
2071 /*
2072  * for a tree node or leaf, we record its reference count, so later if we still
2073  * process this node or leaf, don't need to compute its reference count again.
2074  */
2075 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2076                              struct node_refs *nrefs, u64 level)
2077 {
2078         int check, ret;
2079         u64 refs;
2080         struct ulist *roots;
2081
2082         if (nrefs->bytenr[level] != bytenr) {
2083                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2084                                        level, 1, &refs, NULL);
2085                 if (ret < 0)
2086                         return ret;
2087
2088                 nrefs->bytenr[level] = bytenr;
2089                 nrefs->refs[level] = refs;
2090                 if (refs > 1) {
2091                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2092                                                    0, &roots);
2093                         if (ret)
2094                                 return -EIO;
2095
2096                         check = need_check(root, roots);
2097                         ulist_free(roots);
2098                         nrefs->need_check[level] = check;
2099                 } else {
2100                         nrefs->need_check[level] = 1;
2101                 }
2102         }
2103
2104         return 0;
2105 }
2106
2107 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2108                           struct walk_control *wc, int *level,
2109                           struct node_refs *nrefs)
2110 {
2111         enum btrfs_tree_block_status status;
2112         u64 bytenr;
2113         u64 ptr_gen;
2114         struct btrfs_fs_info *fs_info = root->fs_info;
2115         struct extent_buffer *next;
2116         struct extent_buffer *cur;
2117         int ret, err = 0;
2118         u64 refs;
2119
2120         WARN_ON(*level < 0);
2121         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2122
2123         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2124                 refs = nrefs->refs[*level];
2125                 ret = 0;
2126         } else {
2127                 ret = btrfs_lookup_extent_info(NULL, root,
2128                                        path->nodes[*level]->start,
2129                                        *level, 1, &refs, NULL);
2130                 if (ret < 0) {
2131                         err = ret;
2132                         goto out;
2133                 }
2134                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2135                 nrefs->refs[*level] = refs;
2136         }
2137
2138         if (refs > 1) {
2139                 ret = enter_shared_node(root, path->nodes[*level]->start,
2140                                         refs, wc, *level);
2141                 if (ret > 0) {
2142                         err = ret;
2143                         goto out;
2144                 }
2145         }
2146
2147         while (*level >= 0) {
2148                 WARN_ON(*level < 0);
2149                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2150                 cur = path->nodes[*level];
2151
2152                 if (btrfs_header_level(cur) != *level)
2153                         WARN_ON(1);
2154
2155                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2156                         break;
2157                 if (*level == 0) {
2158                         ret = process_one_leaf(root, cur, wc);
2159                         if (ret < 0)
2160                                 err = ret;
2161                         break;
2162                 }
2163                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2164                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2165
2166                 if (bytenr == nrefs->bytenr[*level - 1]) {
2167                         refs = nrefs->refs[*level - 1];
2168                 } else {
2169                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2170                                         *level - 1, 1, &refs, NULL);
2171                         if (ret < 0) {
2172                                 refs = 0;
2173                         } else {
2174                                 nrefs->bytenr[*level - 1] = bytenr;
2175                                 nrefs->refs[*level - 1] = refs;
2176                         }
2177                 }
2178
2179                 if (refs > 1) {
2180                         ret = enter_shared_node(root, bytenr, refs,
2181                                                 wc, *level - 1);
2182                         if (ret > 0) {
2183                                 path->slots[*level]++;
2184                                 continue;
2185                         }
2186                 }
2187
2188                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2189                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2190                         free_extent_buffer(next);
2191                         reada_walk_down(root, cur, path->slots[*level]);
2192                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2193                         if (!extent_buffer_uptodate(next)) {
2194                                 struct btrfs_key node_key;
2195
2196                                 btrfs_node_key_to_cpu(path->nodes[*level],
2197                                                       &node_key,
2198                                                       path->slots[*level]);
2199                                 btrfs_add_corrupt_extent_record(root->fs_info,
2200                                                 &node_key,
2201                                                 path->nodes[*level]->start,
2202                                                 root->fs_info->nodesize,
2203                                                 *level);
2204                                 err = -EIO;
2205                                 goto out;
2206                         }
2207                 }
2208
2209                 ret = check_child_node(cur, path->slots[*level], next);
2210                 if (ret) {
2211                         free_extent_buffer(next);
2212                         err = ret;
2213                         goto out;
2214                 }
2215
2216                 if (btrfs_is_leaf(next))
2217                         status = btrfs_check_leaf(root, NULL, next);
2218                 else
2219                         status = btrfs_check_node(root, NULL, next);
2220                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2221                         free_extent_buffer(next);
2222                         err = -EIO;
2223                         goto out;
2224                 }
2225
2226                 *level = *level - 1;
2227                 free_extent_buffer(path->nodes[*level]);
2228                 path->nodes[*level] = next;
2229                 path->slots[*level] = 0;
2230         }
2231 out:
2232         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2233         return err;
2234 }
2235
2236 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2237                             unsigned int ext_ref);
2238
2239 /*
2240  * Returns >0  Found error, should continue
2241  * Returns <0  Fatal error, must exit the whole check
2242  * Returns 0   No errors found
2243  */
2244 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2245                              int *level, struct node_refs *nrefs, int ext_ref)
2246 {
2247         enum btrfs_tree_block_status status;
2248         u64 bytenr;
2249         u64 ptr_gen;
2250         struct btrfs_fs_info *fs_info = root->fs_info;
2251         struct extent_buffer *next;
2252         struct extent_buffer *cur;
2253         int ret;
2254
2255         WARN_ON(*level < 0);
2256         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2257
2258         ret = update_nodes_refs(root, path->nodes[*level]->start,
2259                                 nrefs, *level);
2260         if (ret < 0)
2261                 return ret;
2262
2263         while (*level >= 0) {
2264                 WARN_ON(*level < 0);
2265                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2266                 cur = path->nodes[*level];
2267
2268                 if (btrfs_header_level(cur) != *level)
2269                         WARN_ON(1);
2270
2271                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2272                         break;
2273                 /* Don't forgot to check leaf/node validation */
2274                 if (*level == 0) {
2275                         ret = btrfs_check_leaf(root, NULL, cur);
2276                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2277                                 ret = -EIO;
2278                                 break;
2279                         }
2280                         ret = process_one_leaf_v2(root, path, nrefs,
2281                                                   level, ext_ref);
2282                         break;
2283                 } else {
2284                         ret = btrfs_check_node(root, NULL, cur);
2285                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2286                                 ret = -EIO;
2287                                 break;
2288                         }
2289                 }
2290                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2291                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2292
2293                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2294                 if (ret)
2295                         break;
2296                 if (!nrefs->need_check[*level - 1]) {
2297                         path->slots[*level]++;
2298                         continue;
2299                 }
2300
2301                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2302                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2303                         free_extent_buffer(next);
2304                         reada_walk_down(root, cur, path->slots[*level]);
2305                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2306                         if (!extent_buffer_uptodate(next)) {
2307                                 struct btrfs_key node_key;
2308
2309                                 btrfs_node_key_to_cpu(path->nodes[*level],
2310                                                       &node_key,
2311                                                       path->slots[*level]);
2312                                 btrfs_add_corrupt_extent_record(fs_info,
2313                                                 &node_key,
2314                                                 path->nodes[*level]->start,
2315                                                 fs_info->nodesize,
2316                                                 *level);
2317                                 ret = -EIO;
2318                                 break;
2319                         }
2320                 }
2321
2322                 ret = check_child_node(cur, path->slots[*level], next);
2323                 if (ret < 0) 
2324                         break;
2325
2326                 if (btrfs_is_leaf(next))
2327                         status = btrfs_check_leaf(root, NULL, next);
2328                 else
2329                         status = btrfs_check_node(root, NULL, next);
2330                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2331                         free_extent_buffer(next);
2332                         ret = -EIO;
2333                         break;
2334                 }
2335
2336                 *level = *level - 1;
2337                 free_extent_buffer(path->nodes[*level]);
2338                 path->nodes[*level] = next;
2339                 path->slots[*level] = 0;
2340         }
2341         return ret;
2342 }
2343
2344 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2345                         struct walk_control *wc, int *level)
2346 {
2347         int i;
2348         struct extent_buffer *leaf;
2349
2350         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2351                 leaf = path->nodes[i];
2352                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2353                         path->slots[i]++;
2354                         *level = i;
2355                         return 0;
2356                 } else {
2357                         free_extent_buffer(path->nodes[*level]);
2358                         path->nodes[*level] = NULL;
2359                         BUG_ON(*level > wc->active_node);
2360                         if (*level == wc->active_node)
2361                                 leave_shared_node(root, wc, *level);
2362                         *level = i + 1;
2363                 }
2364         }
2365         return 1;
2366 }
2367
2368 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2369                            int *level)
2370 {
2371         int i;
2372         struct extent_buffer *leaf;
2373
2374         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2375                 leaf = path->nodes[i];
2376                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2377                         path->slots[i]++;
2378                         *level = i;
2379                         return 0;
2380                 } else {
2381                         free_extent_buffer(path->nodes[*level]);
2382                         path->nodes[*level] = NULL;
2383                         *level = i + 1;
2384                 }
2385         }
2386         return 1;
2387 }
2388
2389 static int check_root_dir(struct inode_record *rec)
2390 {
2391         struct inode_backref *backref;
2392         int ret = -1;
2393
2394         if (!rec->found_inode_item || rec->errors)
2395                 goto out;
2396         if (rec->nlink != 1 || rec->found_link != 0)
2397                 goto out;
2398         if (list_empty(&rec->backrefs))
2399                 goto out;
2400         backref = to_inode_backref(rec->backrefs.next);
2401         if (!backref->found_inode_ref)
2402                 goto out;
2403         if (backref->index != 0 || backref->namelen != 2 ||
2404             memcmp(backref->name, "..", 2))
2405                 goto out;
2406         if (backref->found_dir_index || backref->found_dir_item)
2407                 goto out;
2408         ret = 0;
2409 out:
2410         return ret;
2411 }
2412
2413 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2414                               struct btrfs_root *root, struct btrfs_path *path,
2415                               struct inode_record *rec)
2416 {
2417         struct btrfs_inode_item *ei;
2418         struct btrfs_key key;
2419         int ret;
2420
2421         key.objectid = rec->ino;
2422         key.type = BTRFS_INODE_ITEM_KEY;
2423         key.offset = (u64)-1;
2424
2425         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2426         if (ret < 0)
2427                 goto out;
2428         if (ret) {
2429                 if (!path->slots[0]) {
2430                         ret = -ENOENT;
2431                         goto out;
2432                 }
2433                 path->slots[0]--;
2434                 ret = 0;
2435         }
2436         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2437         if (key.objectid != rec->ino) {
2438                 ret = -ENOENT;
2439                 goto out;
2440         }
2441
2442         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2443                             struct btrfs_inode_item);
2444         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2445         btrfs_mark_buffer_dirty(path->nodes[0]);
2446         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2447         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2448                root->root_key.objectid);
2449 out:
2450         btrfs_release_path(path);
2451         return ret;
2452 }
2453
2454 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2455                                     struct btrfs_root *root,
2456                                     struct btrfs_path *path,
2457                                     struct inode_record *rec)
2458 {
2459         int ret;
2460
2461         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2462         btrfs_release_path(path);
2463         if (!ret)
2464                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2465         return ret;
2466 }
2467
2468 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2469                                struct btrfs_root *root,
2470                                struct btrfs_path *path,
2471                                struct inode_record *rec)
2472 {
2473         struct btrfs_inode_item *ei;
2474         struct btrfs_key key;
2475         int ret = 0;
2476
2477         key.objectid = rec->ino;
2478         key.type = BTRFS_INODE_ITEM_KEY;
2479         key.offset = 0;
2480
2481         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2482         if (ret) {
2483                 if (ret > 0)
2484                         ret = -ENOENT;
2485                 goto out;
2486         }
2487
2488         /* Since ret == 0, no need to check anything */
2489         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2490                             struct btrfs_inode_item);
2491         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2492         btrfs_mark_buffer_dirty(path->nodes[0]);
2493         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2494         printf("reset nbytes for ino %llu root %llu\n",
2495                rec->ino, root->root_key.objectid);
2496 out:
2497         btrfs_release_path(path);
2498         return ret;
2499 }
2500
2501 static int add_missing_dir_index(struct btrfs_root *root,
2502                                  struct cache_tree *inode_cache,
2503                                  struct inode_record *rec,
2504                                  struct inode_backref *backref)
2505 {
2506         struct btrfs_path path;
2507         struct btrfs_trans_handle *trans;
2508         struct btrfs_dir_item *dir_item;
2509         struct extent_buffer *leaf;
2510         struct btrfs_key key;
2511         struct btrfs_disk_key disk_key;
2512         struct inode_record *dir_rec;
2513         unsigned long name_ptr;
2514         u32 data_size = sizeof(*dir_item) + backref->namelen;
2515         int ret;
2516
2517         trans = btrfs_start_transaction(root, 1);
2518         if (IS_ERR(trans))
2519                 return PTR_ERR(trans);
2520
2521         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2522                 (unsigned long long)rec->ino);
2523
2524         btrfs_init_path(&path);
2525         key.objectid = backref->dir;
2526         key.type = BTRFS_DIR_INDEX_KEY;
2527         key.offset = backref->index;
2528         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2529         BUG_ON(ret);
2530
2531         leaf = path.nodes[0];
2532         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2533
2534         disk_key.objectid = cpu_to_le64(rec->ino);
2535         disk_key.type = BTRFS_INODE_ITEM_KEY;
2536         disk_key.offset = 0;
2537
2538         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2539         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2540         btrfs_set_dir_data_len(leaf, dir_item, 0);
2541         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2542         name_ptr = (unsigned long)(dir_item + 1);
2543         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2544         btrfs_mark_buffer_dirty(leaf);
2545         btrfs_release_path(&path);
2546         btrfs_commit_transaction(trans, root);
2547
2548         backref->found_dir_index = 1;
2549         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2550         BUG_ON(IS_ERR(dir_rec));
2551         if (!dir_rec)
2552                 return 0;
2553         dir_rec->found_size += backref->namelen;
2554         if (dir_rec->found_size == dir_rec->isize &&
2555             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2556                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2557         if (dir_rec->found_size != dir_rec->isize)
2558                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2559
2560         return 0;
2561 }
2562
2563 static int delete_dir_index(struct btrfs_root *root,
2564                             struct inode_backref *backref)
2565 {
2566         struct btrfs_trans_handle *trans;
2567         struct btrfs_dir_item *di;
2568         struct btrfs_path path;
2569         int ret = 0;
2570
2571         trans = btrfs_start_transaction(root, 1);
2572         if (IS_ERR(trans))
2573                 return PTR_ERR(trans);
2574
2575         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2576                 (unsigned long long)backref->dir,
2577                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2578                 (unsigned long long)root->objectid);
2579
2580         btrfs_init_path(&path);
2581         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2582                                     backref->name, backref->namelen,
2583                                     backref->index, -1);
2584         if (IS_ERR(di)) {
2585                 ret = PTR_ERR(di);
2586                 btrfs_release_path(&path);
2587                 btrfs_commit_transaction(trans, root);
2588                 if (ret == -ENOENT)
2589                         return 0;
2590                 return ret;
2591         }
2592
2593         if (!di)
2594                 ret = btrfs_del_item(trans, root, &path);
2595         else
2596                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2597         BUG_ON(ret);
2598         btrfs_release_path(&path);
2599         btrfs_commit_transaction(trans, root);
2600         return ret;
2601 }
2602
2603 static int create_inode_item(struct btrfs_root *root,
2604                              struct inode_record *rec,
2605                              int root_dir)
2606 {
2607         struct btrfs_trans_handle *trans;
2608         struct btrfs_inode_item inode_item;
2609         time_t now = time(NULL);
2610         int ret;
2611
2612         trans = btrfs_start_transaction(root, 1);
2613         if (IS_ERR(trans)) {
2614                 ret = PTR_ERR(trans);
2615                 return ret;
2616         }
2617
2618         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2619                 "be incomplete, please check permissions and content after "
2620                 "the fsck completes.\n", (unsigned long long)root->objectid,
2621                 (unsigned long long)rec->ino);
2622
2623         memset(&inode_item, 0, sizeof(inode_item));
2624         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2625         if (root_dir)
2626                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2627         else
2628                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2629         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2630         if (rec->found_dir_item) {
2631                 if (rec->found_file_extent)
2632                         fprintf(stderr, "root %llu inode %llu has both a dir "
2633                                 "item and extents, unsure if it is a dir or a "
2634                                 "regular file so setting it as a directory\n",
2635                                 (unsigned long long)root->objectid,
2636                                 (unsigned long long)rec->ino);
2637                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2638                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2639         } else if (!rec->found_dir_item) {
2640                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2641                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2642         }
2643         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2644         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2645         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2646         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2647         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2648         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2649         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2650         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2651
2652         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2653         BUG_ON(ret);
2654         btrfs_commit_transaction(trans, root);
2655         return 0;
2656 }
2657
2658 static int repair_inode_backrefs(struct btrfs_root *root,
2659                                  struct inode_record *rec,
2660                                  struct cache_tree *inode_cache,
2661                                  int delete)
2662 {
2663         struct inode_backref *tmp, *backref;
2664         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2665         int ret = 0;
2666         int repaired = 0;
2667
2668         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2669                 if (!delete && rec->ino == root_dirid) {
2670                         if (!rec->found_inode_item) {
2671                                 ret = create_inode_item(root, rec, 1);
2672                                 if (ret)
2673                                         break;
2674                                 repaired++;
2675                         }
2676                 }
2677
2678                 /* Index 0 for root dir's are special, don't mess with it */
2679                 if (rec->ino == root_dirid && backref->index == 0)
2680                         continue;
2681
2682                 if (delete &&
2683                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2684                      (backref->found_dir_index && backref->found_inode_ref &&
2685                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2686                         ret = delete_dir_index(root, backref);
2687                         if (ret)
2688                                 break;
2689                         repaired++;
2690                         list_del(&backref->list);
2691                         free(backref);
2692                         continue;
2693                 }
2694
2695                 if (!delete && !backref->found_dir_index &&
2696                     backref->found_dir_item && backref->found_inode_ref) {
2697                         ret = add_missing_dir_index(root, inode_cache, rec,
2698                                                     backref);
2699                         if (ret)
2700                                 break;
2701                         repaired++;
2702                         if (backref->found_dir_item &&
2703                             backref->found_dir_index) {
2704                                 if (!backref->errors &&
2705                                     backref->found_inode_ref) {
2706                                         list_del(&backref->list);
2707                                         free(backref);
2708                                         continue;
2709                                 }
2710                         }
2711                 }
2712
2713                 if (!delete && (!backref->found_dir_index &&
2714                                 !backref->found_dir_item &&
2715                                 backref->found_inode_ref)) {
2716                         struct btrfs_trans_handle *trans;
2717                         struct btrfs_key location;
2718
2719                         ret = check_dir_conflict(root, backref->name,
2720                                                  backref->namelen,
2721                                                  backref->dir,
2722                                                  backref->index);
2723                         if (ret) {
2724                                 /*
2725                                  * let nlink fixing routine to handle it,
2726                                  * which can do it better.
2727                                  */
2728                                 ret = 0;
2729                                 break;
2730                         }
2731                         location.objectid = rec->ino;
2732                         location.type = BTRFS_INODE_ITEM_KEY;
2733                         location.offset = 0;
2734
2735                         trans = btrfs_start_transaction(root, 1);
2736                         if (IS_ERR(trans)) {
2737                                 ret = PTR_ERR(trans);
2738                                 break;
2739                         }
2740                         fprintf(stderr, "adding missing dir index/item pair "
2741                                 "for inode %llu\n",
2742                                 (unsigned long long)rec->ino);
2743                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2744                                                     backref->namelen,
2745                                                     backref->dir, &location,
2746                                                     imode_to_type(rec->imode),
2747                                                     backref->index);
2748                         BUG_ON(ret);
2749                         btrfs_commit_transaction(trans, root);
2750                         repaired++;
2751                 }
2752
2753                 if (!delete && (backref->found_inode_ref &&
2754                                 backref->found_dir_index &&
2755                                 backref->found_dir_item &&
2756                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2757                                 !rec->found_inode_item)) {
2758                         ret = create_inode_item(root, rec, 0);
2759                         if (ret)
2760                                 break;
2761                         repaired++;
2762                 }
2763
2764         }
2765         return ret ? ret : repaired;
2766 }
2767
2768 /*
2769  * To determine the file type for nlink/inode_item repair
2770  *
2771  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2772  * Return -ENOENT if file type is not found.
2773  */
2774 static int find_file_type(struct inode_record *rec, u8 *type)
2775 {
2776         struct inode_backref *backref;
2777
2778         /* For inode item recovered case */
2779         if (rec->found_inode_item) {
2780                 *type = imode_to_type(rec->imode);
2781                 return 0;
2782         }
2783
2784         list_for_each_entry(backref, &rec->backrefs, list) {
2785                 if (backref->found_dir_index || backref->found_dir_item) {
2786                         *type = backref->filetype;
2787                         return 0;
2788                 }
2789         }
2790         return -ENOENT;
2791 }
2792
2793 /*
2794  * To determine the file name for nlink repair
2795  *
2796  * Return 0 if file name is found, set name and namelen.
2797  * Return -ENOENT if file name is not found.
2798  */
2799 static int find_file_name(struct inode_record *rec,
2800                           char *name, int *namelen)
2801 {
2802         struct inode_backref *backref;
2803
2804         list_for_each_entry(backref, &rec->backrefs, list) {
2805                 if (backref->found_dir_index || backref->found_dir_item ||
2806                     backref->found_inode_ref) {
2807                         memcpy(name, backref->name, backref->namelen);
2808                         *namelen = backref->namelen;
2809                         return 0;
2810                 }
2811         }
2812         return -ENOENT;
2813 }
2814
2815 /* Reset the nlink of the inode to the correct one */
2816 static int reset_nlink(struct btrfs_trans_handle *trans,
2817                        struct btrfs_root *root,
2818                        struct btrfs_path *path,
2819                        struct inode_record *rec)
2820 {
2821         struct inode_backref *backref;
2822         struct inode_backref *tmp;
2823         struct btrfs_key key;
2824         struct btrfs_inode_item *inode_item;
2825         int ret = 0;
2826
2827         /* We don't believe this either, reset it and iterate backref */
2828         rec->found_link = 0;
2829
2830         /* Remove all backref including the valid ones */
2831         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2832                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2833                                    backref->index, backref->name,
2834                                    backref->namelen, 0);
2835                 if (ret < 0)
2836                         goto out;
2837
2838                 /* remove invalid backref, so it won't be added back */
2839                 if (!(backref->found_dir_index &&
2840                       backref->found_dir_item &&
2841                       backref->found_inode_ref)) {
2842                         list_del(&backref->list);
2843                         free(backref);
2844                 } else {
2845                         rec->found_link++;
2846                 }
2847         }
2848
2849         /* Set nlink to 0 */
2850         key.objectid = rec->ino;
2851         key.type = BTRFS_INODE_ITEM_KEY;
2852         key.offset = 0;
2853         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2854         if (ret < 0)
2855                 goto out;
2856         if (ret > 0) {
2857                 ret = -ENOENT;
2858                 goto out;
2859         }
2860         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2861                                     struct btrfs_inode_item);
2862         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2863         btrfs_mark_buffer_dirty(path->nodes[0]);
2864         btrfs_release_path(path);
2865
2866         /*
2867          * Add back valid inode_ref/dir_item/dir_index,
2868          * add_link() will handle the nlink inc, so new nlink must be correct
2869          */
2870         list_for_each_entry(backref, &rec->backrefs, list) {
2871                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2872                                      backref->name, backref->namelen,
2873                                      backref->filetype, &backref->index, 1);
2874                 if (ret < 0)
2875                         goto out;
2876         }
2877 out:
2878         btrfs_release_path(path);
2879         return ret;
2880 }
2881
2882 static int get_highest_inode(struct btrfs_trans_handle *trans,
2883                                 struct btrfs_root *root,
2884                                 struct btrfs_path *path,
2885                                 u64 *highest_ino)
2886 {
2887         struct btrfs_key key, found_key;
2888         int ret;
2889
2890         btrfs_init_path(path);
2891         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2892         key.offset = -1;
2893         key.type = BTRFS_INODE_ITEM_KEY;
2894         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2895         if (ret == 1) {
2896                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2897                                 path->slots[0] - 1);
2898                 *highest_ino = found_key.objectid;
2899                 ret = 0;
2900         }
2901         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2902                 ret = -EOVERFLOW;
2903         btrfs_release_path(path);
2904         return ret;
2905 }
2906
2907 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2908                                struct btrfs_root *root,
2909                                struct btrfs_path *path,
2910                                struct inode_record *rec)
2911 {
2912         char *dir_name = "lost+found";
2913         char namebuf[BTRFS_NAME_LEN] = {0};
2914         u64 lost_found_ino;
2915         u32 mode = 0700;
2916         u8 type = 0;
2917         int namelen = 0;
2918         int name_recovered = 0;
2919         int type_recovered = 0;
2920         int ret = 0;
2921
2922         /*
2923          * Get file name and type first before these invalid inode ref
2924          * are deleted by remove_all_invalid_backref()
2925          */
2926         name_recovered = !find_file_name(rec, namebuf, &namelen);
2927         type_recovered = !find_file_type(rec, &type);
2928
2929         if (!name_recovered) {
2930                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2931                        rec->ino, rec->ino);
2932                 namelen = count_digits(rec->ino);
2933                 sprintf(namebuf, "%llu", rec->ino);
2934                 name_recovered = 1;
2935         }
2936         if (!type_recovered) {
2937                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2938                        rec->ino);
2939                 type = BTRFS_FT_REG_FILE;
2940                 type_recovered = 1;
2941         }
2942
2943         ret = reset_nlink(trans, root, path, rec);
2944         if (ret < 0) {
2945                 fprintf(stderr,
2946                         "Failed to reset nlink for inode %llu: %s\n",
2947                         rec->ino, strerror(-ret));
2948                 goto out;
2949         }
2950
2951         if (rec->found_link == 0) {
2952                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2953                 if (ret < 0)
2954                         goto out;
2955                 lost_found_ino++;
2956                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2957                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2958                                   mode);
2959                 if (ret < 0) {
2960                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2961                                 dir_name, strerror(-ret));
2962                         goto out;
2963                 }
2964                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2965                                      namebuf, namelen, type, NULL, 1);
2966                 /*
2967                  * Add ".INO" suffix several times to handle case where
2968                  * "FILENAME.INO" is already taken by another file.
2969                  */
2970                 while (ret == -EEXIST) {
2971                         /*
2972                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2973                          */
2974                         if (namelen + count_digits(rec->ino) + 1 >
2975                             BTRFS_NAME_LEN) {
2976                                 ret = -EFBIG;
2977                                 goto out;
2978                         }
2979                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2980                                  ".%llu", rec->ino);
2981                         namelen += count_digits(rec->ino) + 1;
2982                         ret = btrfs_add_link(trans, root, rec->ino,
2983                                              lost_found_ino, namebuf,
2984                                              namelen, type, NULL, 1);
2985                 }
2986                 if (ret < 0) {
2987                         fprintf(stderr,
2988                                 "Failed to link the inode %llu to %s dir: %s\n",
2989                                 rec->ino, dir_name, strerror(-ret));
2990                         goto out;
2991                 }
2992                 /*
2993                  * Just increase the found_link, don't actually add the
2994                  * backref. This will make things easier and this inode
2995                  * record will be freed after the repair is done.
2996                  * So fsck will not report problem about this inode.
2997                  */
2998                 rec->found_link++;
2999                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3000                        namelen, namebuf, dir_name);
3001         }
3002         printf("Fixed the nlink of inode %llu\n", rec->ino);
3003 out:
3004         /*
3005          * Clear the flag anyway, or we will loop forever for the same inode
3006          * as it will not be removed from the bad inode list and the dead loop
3007          * happens.
3008          */
3009         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3010         btrfs_release_path(path);
3011         return ret;
3012 }
3013
3014 /*
3015  * Check if there is any normal(reg or prealloc) file extent for given
3016  * ino.
3017  * This is used to determine the file type when neither its dir_index/item or
3018  * inode_item exists.
3019  *
3020  * This will *NOT* report error, if any error happens, just consider it does
3021  * not have any normal file extent.
3022  */
3023 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3024 {
3025         struct btrfs_path path;
3026         struct btrfs_key key;
3027         struct btrfs_key found_key;
3028         struct btrfs_file_extent_item *fi;
3029         u8 type;
3030         int ret = 0;
3031
3032         btrfs_init_path(&path);
3033         key.objectid = ino;
3034         key.type = BTRFS_EXTENT_DATA_KEY;
3035         key.offset = 0;
3036
3037         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3038         if (ret < 0) {
3039                 ret = 0;
3040                 goto out;
3041         }
3042         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3043                 ret = btrfs_next_leaf(root, &path);
3044                 if (ret) {
3045                         ret = 0;
3046                         goto out;
3047                 }
3048         }
3049         while (1) {
3050                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3051                                       path.slots[0]);
3052                 if (found_key.objectid != ino ||
3053                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3054                         break;
3055                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3056                                     struct btrfs_file_extent_item);
3057                 type = btrfs_file_extent_type(path.nodes[0], fi);
3058                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3059                         ret = 1;
3060                         goto out;
3061                 }
3062         }
3063 out:
3064         btrfs_release_path(&path);
3065         return ret;
3066 }
3067
3068 static u32 btrfs_type_to_imode(u8 type)
3069 {
3070         static u32 imode_by_btrfs_type[] = {
3071                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3072                 [BTRFS_FT_DIR]          = S_IFDIR,
3073                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3074                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3075                 [BTRFS_FT_FIFO]         = S_IFIFO,
3076                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3077                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3078         };
3079
3080         return imode_by_btrfs_type[(type)];
3081 }
3082
3083 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3084                                 struct btrfs_root *root,
3085                                 struct btrfs_path *path,
3086                                 struct inode_record *rec)
3087 {
3088         u8 filetype;
3089         u32 mode = 0700;
3090         int type_recovered = 0;
3091         int ret = 0;
3092
3093         printf("Trying to rebuild inode:%llu\n", rec->ino);
3094
3095         type_recovered = !find_file_type(rec, &filetype);
3096
3097         /*
3098          * Try to determine inode type if type not found.
3099          *
3100          * For found regular file extent, it must be FILE.
3101          * For found dir_item/index, it must be DIR.
3102          *
3103          * For undetermined one, use FILE as fallback.
3104          *
3105          * TODO:
3106          * 1. If found backref(inode_index/item is already handled) to it,
3107          *    it must be DIR.
3108          *    Need new inode-inode ref structure to allow search for that.
3109          */
3110         if (!type_recovered) {
3111                 if (rec->found_file_extent &&
3112                     find_normal_file_extent(root, rec->ino)) {
3113                         type_recovered = 1;
3114                         filetype = BTRFS_FT_REG_FILE;
3115                 } else if (rec->found_dir_item) {
3116                         type_recovered = 1;
3117                         filetype = BTRFS_FT_DIR;
3118                 } else if (!list_empty(&rec->orphan_extents)) {
3119                         type_recovered = 1;
3120                         filetype = BTRFS_FT_REG_FILE;
3121                 } else{
3122                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3123                                rec->ino);
3124                         type_recovered = 1;
3125                         filetype = BTRFS_FT_REG_FILE;
3126                 }
3127         }
3128
3129         ret = btrfs_new_inode(trans, root, rec->ino,
3130                               mode | btrfs_type_to_imode(filetype));
3131         if (ret < 0)
3132                 goto out;
3133
3134         /*
3135          * Here inode rebuild is done, we only rebuild the inode item,
3136          * don't repair the nlink(like move to lost+found).
3137          * That is the job of nlink repair.
3138          *
3139          * We just fill the record and return
3140          */
3141         rec->found_dir_item = 1;
3142         rec->imode = mode | btrfs_type_to_imode(filetype);
3143         rec->nlink = 0;
3144         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3145         /* Ensure the inode_nlinks repair function will be called */
3146         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3147 out:
3148         return ret;
3149 }
3150
3151 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3152                                       struct btrfs_root *root,
3153                                       struct btrfs_path *path,
3154                                       struct inode_record *rec)
3155 {
3156         struct orphan_data_extent *orphan;
3157         struct orphan_data_extent *tmp;
3158         int ret = 0;
3159
3160         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3161                 /*
3162                  * Check for conflicting file extents
3163                  *
3164                  * Here we don't know whether the extents is compressed or not,
3165                  * so we can only assume it not compressed nor data offset,
3166                  * and use its disk_len as extent length.
3167                  */
3168                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3169                                        orphan->offset, orphan->disk_len, 0);
3170                 btrfs_release_path(path);
3171                 if (ret < 0)
3172                         goto out;
3173                 if (!ret) {
3174                         fprintf(stderr,
3175                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3176                                 orphan->disk_bytenr, orphan->disk_len);
3177                         ret = btrfs_free_extent(trans,
3178                                         root->fs_info->extent_root,
3179                                         orphan->disk_bytenr, orphan->disk_len,
3180                                         0, root->objectid, orphan->objectid,
3181                                         orphan->offset);
3182                         if (ret < 0)
3183                                 goto out;
3184                 }
3185                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3186                                 orphan->offset, orphan->disk_bytenr,
3187                                 orphan->disk_len, orphan->disk_len);
3188                 if (ret < 0)
3189                         goto out;
3190
3191                 /* Update file size info */
3192                 rec->found_size += orphan->disk_len;
3193                 if (rec->found_size == rec->nbytes)
3194                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3195
3196                 /* Update the file extent hole info too */
3197                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3198                                            orphan->disk_len);
3199                 if (ret < 0)
3200                         goto out;
3201                 if (RB_EMPTY_ROOT(&rec->holes))
3202                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3203
3204                 list_del(&orphan->list);
3205                 free(orphan);
3206         }
3207         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3208 out:
3209         return ret;
3210 }
3211
3212 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3213                                         struct btrfs_root *root,
3214                                         struct btrfs_path *path,
3215                                         struct inode_record *rec)
3216 {
3217         struct rb_node *node;
3218         struct file_extent_hole *hole;
3219         int found = 0;
3220         int ret = 0;
3221
3222         node = rb_first(&rec->holes);
3223
3224         while (node) {
3225                 found = 1;
3226                 hole = rb_entry(node, struct file_extent_hole, node);
3227                 ret = btrfs_punch_hole(trans, root, rec->ino,
3228                                        hole->start, hole->len);
3229                 if (ret < 0)
3230                         goto out;
3231                 ret = del_file_extent_hole(&rec->holes, hole->start,
3232                                            hole->len);
3233                 if (ret < 0)
3234                         goto out;
3235                 if (RB_EMPTY_ROOT(&rec->holes))
3236                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3237                 node = rb_first(&rec->holes);
3238         }
3239         /* special case for a file losing all its file extent */
3240         if (!found) {
3241                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3242                                        round_up(rec->isize,
3243                                                 root->fs_info->sectorsize));
3244                 if (ret < 0)
3245                         goto out;
3246         }
3247         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3248                rec->ino, root->objectid);
3249 out:
3250         return ret;
3251 }
3252
3253 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3254 {
3255         struct btrfs_trans_handle *trans;
3256         struct btrfs_path path;
3257         int ret = 0;
3258
3259         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3260                              I_ERR_NO_ORPHAN_ITEM |
3261                              I_ERR_LINK_COUNT_WRONG |
3262                              I_ERR_NO_INODE_ITEM |
3263                              I_ERR_FILE_EXTENT_ORPHAN |
3264                              I_ERR_FILE_EXTENT_DISCOUNT|
3265                              I_ERR_FILE_NBYTES_WRONG)))
3266                 return rec->errors;
3267
3268         /*
3269          * For nlink repair, it may create a dir and add link, so
3270          * 2 for parent(256)'s dir_index and dir_item
3271          * 2 for lost+found dir's inode_item and inode_ref
3272          * 1 for the new inode_ref of the file
3273          * 2 for lost+found dir's dir_index and dir_item for the file
3274          */
3275         trans = btrfs_start_transaction(root, 7);
3276         if (IS_ERR(trans))
3277                 return PTR_ERR(trans);
3278
3279         btrfs_init_path(&path);
3280         if (rec->errors & I_ERR_NO_INODE_ITEM)
3281                 ret = repair_inode_no_item(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3283                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3285                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3286         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3287                 ret = repair_inode_isize(trans, root, &path, rec);
3288         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3289                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3290         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3291                 ret = repair_inode_nlinks(trans, root, &path, rec);
3292         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3293                 ret = repair_inode_nbytes(trans, root, &path, rec);
3294         btrfs_commit_transaction(trans, root);
3295         btrfs_release_path(&path);
3296         return ret;
3297 }
3298
3299 static int check_inode_recs(struct btrfs_root *root,
3300                             struct cache_tree *inode_cache)
3301 {
3302         struct cache_extent *cache;
3303         struct ptr_node *node;
3304         struct inode_record *rec;
3305         struct inode_backref *backref;
3306         int stage = 0;
3307         int ret = 0;
3308         int err = 0;
3309         u64 error = 0;
3310         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3311
3312         if (btrfs_root_refs(&root->root_item) == 0) {
3313                 if (!cache_tree_empty(inode_cache))
3314                         fprintf(stderr, "warning line %d\n", __LINE__);
3315                 return 0;
3316         }
3317
3318         /*
3319          * We need to repair backrefs first because we could change some of the
3320          * errors in the inode recs.
3321          *
3322          * We also need to go through and delete invalid backrefs first and then
3323          * add the correct ones second.  We do this because we may get EEXIST
3324          * when adding back the correct index because we hadn't yet deleted the
3325          * invalid index.
3326          *
3327          * For example, if we were missing a dir index then the directories
3328          * isize would be wrong, so if we fixed the isize to what we thought it
3329          * would be and then fixed the backref we'd still have a invalid fs, so
3330          * we need to add back the dir index and then check to see if the isize
3331          * is still wrong.
3332          */
3333         while (stage < 3) {
3334                 stage++;
3335                 if (stage == 3 && !err)
3336                         break;
3337
3338                 cache = search_cache_extent(inode_cache, 0);
3339                 while (repair && cache) {
3340                         node = container_of(cache, struct ptr_node, cache);
3341                         rec = node->data;
3342                         cache = next_cache_extent(cache);
3343
3344                         /* Need to free everything up and rescan */
3345                         if (stage == 3) {
3346                                 remove_cache_extent(inode_cache, &node->cache);
3347                                 free(node);
3348                                 free_inode_rec(rec);
3349                                 continue;
3350                         }
3351
3352                         if (list_empty(&rec->backrefs))
3353                                 continue;
3354
3355                         ret = repair_inode_backrefs(root, rec, inode_cache,
3356                                                     stage == 1);
3357                         if (ret < 0) {
3358                                 err = ret;
3359                                 stage = 2;
3360                                 break;
3361                         } if (ret > 0) {
3362                                 err = -EAGAIN;
3363                         }
3364                 }
3365         }
3366         if (err)
3367                 return err;
3368
3369         rec = get_inode_rec(inode_cache, root_dirid, 0);
3370         BUG_ON(IS_ERR(rec));
3371         if (rec) {
3372                 ret = check_root_dir(rec);
3373                 if (ret) {
3374                         fprintf(stderr, "root %llu root dir %llu error\n",
3375                                 (unsigned long long)root->root_key.objectid,
3376                                 (unsigned long long)root_dirid);
3377                         print_inode_error(root, rec);
3378                         error++;
3379                 }
3380         } else {
3381                 if (repair) {
3382                         struct btrfs_trans_handle *trans;
3383
3384                         trans = btrfs_start_transaction(root, 1);
3385                         if (IS_ERR(trans)) {
3386                                 err = PTR_ERR(trans);
3387                                 return err;
3388                         }
3389
3390                         fprintf(stderr,
3391                                 "root %llu missing its root dir, recreating\n",
3392                                 (unsigned long long)root->objectid);
3393
3394                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3395                         BUG_ON(ret);
3396
3397                         btrfs_commit_transaction(trans, root);
3398                         return -EAGAIN;
3399                 }
3400
3401                 fprintf(stderr, "root %llu root dir %llu not found\n",
3402                         (unsigned long long)root->root_key.objectid,
3403                         (unsigned long long)root_dirid);
3404         }
3405
3406         while (1) {
3407                 cache = search_cache_extent(inode_cache, 0);
3408                 if (!cache)
3409                         break;
3410                 node = container_of(cache, struct ptr_node, cache);
3411                 rec = node->data;
3412                 remove_cache_extent(inode_cache, &node->cache);
3413                 free(node);
3414                 if (rec->ino == root_dirid ||
3415                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3416                         free_inode_rec(rec);
3417                         continue;
3418                 }
3419
3420                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3421                         ret = check_orphan_item(root, rec->ino);
3422                         if (ret == 0)
3423                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3424                         if (can_free_inode_rec(rec)) {
3425                                 free_inode_rec(rec);
3426                                 continue;
3427                         }
3428                 }
3429
3430                 if (!rec->found_inode_item)
3431                         rec->errors |= I_ERR_NO_INODE_ITEM;
3432                 if (rec->found_link != rec->nlink)
3433                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3434                 if (repair) {
3435                         ret = try_repair_inode(root, rec);
3436                         if (ret == 0 && can_free_inode_rec(rec)) {
3437                                 free_inode_rec(rec);
3438                                 continue;
3439                         }
3440                         ret = 0;
3441                 }
3442
3443                 if (!(repair && ret == 0))
3444                         error++;
3445                 print_inode_error(root, rec);
3446                 list_for_each_entry(backref, &rec->backrefs, list) {
3447                         if (!backref->found_dir_item)
3448                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3449                         if (!backref->found_dir_index)
3450                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3451                         if (!backref->found_inode_ref)
3452                                 backref->errors |= REF_ERR_NO_INODE_REF;
3453                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3454                                 " namelen %u name %s filetype %d errors %x",
3455                                 (unsigned long long)backref->dir,
3456                                 (unsigned long long)backref->index,
3457                                 backref->namelen, backref->name,
3458                                 backref->filetype, backref->errors);
3459                         print_ref_error(backref->errors);
3460                 }
3461                 free_inode_rec(rec);
3462         }
3463         return (error > 0) ? -1 : 0;
3464 }
3465
3466 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3467                                         u64 objectid)
3468 {
3469         struct cache_extent *cache;
3470         struct root_record *rec = NULL;
3471         int ret;
3472
3473         cache = lookup_cache_extent(root_cache, objectid, 1);
3474         if (cache) {
3475                 rec = container_of(cache, struct root_record, cache);
3476         } else {
3477                 rec = calloc(1, sizeof(*rec));
3478                 if (!rec)
3479                         return ERR_PTR(-ENOMEM);
3480                 rec->objectid = objectid;
3481                 INIT_LIST_HEAD(&rec->backrefs);
3482                 rec->cache.start = objectid;
3483                 rec->cache.size = 1;
3484
3485                 ret = insert_cache_extent(root_cache, &rec->cache);
3486                 if (ret)
3487                         return ERR_PTR(-EEXIST);
3488         }
3489         return rec;
3490 }
3491
3492 static struct root_backref *get_root_backref(struct root_record *rec,
3493                                              u64 ref_root, u64 dir, u64 index,
3494                                              const char *name, int namelen)
3495 {
3496         struct root_backref *backref;
3497
3498         list_for_each_entry(backref, &rec->backrefs, list) {
3499                 if (backref->ref_root != ref_root || backref->dir != dir ||
3500                     backref->namelen != namelen)
3501                         continue;
3502                 if (memcmp(name, backref->name, namelen))
3503                         continue;
3504                 return backref;
3505         }
3506
3507         backref = calloc(1, sizeof(*backref) + namelen + 1);
3508         if (!backref)
3509                 return NULL;
3510         backref->ref_root = ref_root;
3511         backref->dir = dir;
3512         backref->index = index;
3513         backref->namelen = namelen;
3514         memcpy(backref->name, name, namelen);
3515         backref->name[namelen] = '\0';
3516         list_add_tail(&backref->list, &rec->backrefs);
3517         return backref;
3518 }
3519
3520 static void free_root_record(struct cache_extent *cache)
3521 {
3522         struct root_record *rec;
3523         struct root_backref *backref;
3524
3525         rec = container_of(cache, struct root_record, cache);
3526         while (!list_empty(&rec->backrefs)) {
3527                 backref = to_root_backref(rec->backrefs.next);
3528                 list_del(&backref->list);
3529                 free(backref);
3530         }
3531
3532         free(rec);
3533 }
3534
3535 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3536
3537 static int add_root_backref(struct cache_tree *root_cache,
3538                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3539                             const char *name, int namelen,
3540                             int item_type, int errors)
3541 {
3542         struct root_record *rec;
3543         struct root_backref *backref;
3544
3545         rec = get_root_rec(root_cache, root_id);
3546         BUG_ON(IS_ERR(rec));
3547         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3548         BUG_ON(!backref);
3549
3550         backref->errors |= errors;
3551
3552         if (item_type != BTRFS_DIR_ITEM_KEY) {
3553                 if (backref->found_dir_index || backref->found_back_ref ||
3554                     backref->found_forward_ref) {
3555                         if (backref->index != index)
3556                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3557                 } else {
3558                         backref->index = index;
3559                 }
3560         }
3561
3562         if (item_type == BTRFS_DIR_ITEM_KEY) {
3563                 if (backref->found_forward_ref)
3564                         rec->found_ref++;
3565                 backref->found_dir_item = 1;
3566         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3567                 backref->found_dir_index = 1;
3568         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3569                 if (backref->found_forward_ref)
3570                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3571                 else if (backref->found_dir_item)
3572                         rec->found_ref++;
3573                 backref->found_forward_ref = 1;
3574         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3575                 if (backref->found_back_ref)
3576                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3577                 backref->found_back_ref = 1;
3578         } else {
3579                 BUG_ON(1);
3580         }
3581
3582         if (backref->found_forward_ref && backref->found_dir_item)
3583                 backref->reachable = 1;
3584         return 0;
3585 }
3586
3587 static int merge_root_recs(struct btrfs_root *root,
3588                            struct cache_tree *src_cache,
3589                            struct cache_tree *dst_cache)
3590 {
3591         struct cache_extent *cache;
3592         struct ptr_node *node;
3593         struct inode_record *rec;
3594         struct inode_backref *backref;
3595         int ret = 0;
3596
3597         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3598                 free_inode_recs_tree(src_cache);
3599                 return 0;
3600         }
3601
3602         while (1) {
3603                 cache = search_cache_extent(src_cache, 0);
3604                 if (!cache)
3605                         break;
3606                 node = container_of(cache, struct ptr_node, cache);
3607                 rec = node->data;
3608                 remove_cache_extent(src_cache, &node->cache);
3609                 free(node);
3610
3611                 ret = is_child_root(root, root->objectid, rec->ino);
3612                 if (ret < 0)
3613                         break;
3614                 else if (ret == 0)
3615                         goto skip;
3616
3617                 list_for_each_entry(backref, &rec->backrefs, list) {
3618                         BUG_ON(backref->found_inode_ref);
3619                         if (backref->found_dir_item)
3620                                 add_root_backref(dst_cache, rec->ino,
3621                                         root->root_key.objectid, backref->dir,
3622                                         backref->index, backref->name,
3623                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3624                                         backref->errors);
3625                         if (backref->found_dir_index)
3626                                 add_root_backref(dst_cache, rec->ino,
3627                                         root->root_key.objectid, backref->dir,
3628                                         backref->index, backref->name,
3629                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3630                                         backref->errors);
3631                 }
3632 skip:
3633                 free_inode_rec(rec);
3634         }
3635         if (ret < 0)
3636                 return ret;
3637         return 0;
3638 }
3639
3640 static int check_root_refs(struct btrfs_root *root,
3641                            struct cache_tree *root_cache)
3642 {
3643         struct root_record *rec;
3644         struct root_record *ref_root;
3645         struct root_backref *backref;
3646         struct cache_extent *cache;
3647         int loop = 1;
3648         int ret;
3649         int error;
3650         int errors = 0;
3651
3652         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3653         BUG_ON(IS_ERR(rec));
3654         rec->found_ref = 1;
3655
3656         /* fixme: this can not detect circular references */
3657         while (loop) {
3658                 loop = 0;
3659                 cache = search_cache_extent(root_cache, 0);
3660                 while (1) {
3661                         if (!cache)
3662                                 break;
3663                         rec = container_of(cache, struct root_record, cache);
3664                         cache = next_cache_extent(cache);
3665
3666                         if (rec->found_ref == 0)
3667                                 continue;
3668
3669                         list_for_each_entry(backref, &rec->backrefs, list) {
3670                                 if (!backref->reachable)
3671                                         continue;
3672
3673                                 ref_root = get_root_rec(root_cache,
3674                                                         backref->ref_root);
3675                                 BUG_ON(IS_ERR(ref_root));
3676                                 if (ref_root->found_ref > 0)
3677                                         continue;
3678
3679                                 backref->reachable = 0;
3680                                 rec->found_ref--;
3681                                 if (rec->found_ref == 0)
3682                                         loop = 1;
3683                         }
3684                 }
3685         }
3686
3687         cache = search_cache_extent(root_cache, 0);
3688         while (1) {
3689                 if (!cache)
3690                         break;
3691                 rec = container_of(cache, struct root_record, cache);
3692                 cache = next_cache_extent(cache);
3693
3694                 if (rec->found_ref == 0 &&
3695                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3696                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3697                         ret = check_orphan_item(root->fs_info->tree_root,
3698                                                 rec->objectid);
3699                         if (ret == 0)
3700                                 continue;
3701
3702                         /*
3703                          * If we don't have a root item then we likely just have
3704                          * a dir item in a snapshot for this root but no actual
3705                          * ref key or anything so it's meaningless.
3706                          */
3707                         if (!rec->found_root_item)
3708                                 continue;
3709                         errors++;
3710                         fprintf(stderr, "fs tree %llu not referenced\n",
3711                                 (unsigned long long)rec->objectid);
3712                 }
3713
3714                 error = 0;
3715                 if (rec->found_ref > 0 && !rec->found_root_item)
3716                         error = 1;
3717                 list_for_each_entry(backref, &rec->backrefs, list) {
3718                         if (!backref->found_dir_item)
3719                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3720                         if (!backref->found_dir_index)
3721                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3722                         if (!backref->found_back_ref)
3723                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3724                         if (!backref->found_forward_ref)
3725                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3726                         if (backref->reachable && backref->errors)
3727                                 error = 1;
3728                 }
3729                 if (!error)
3730                         continue;
3731
3732                 errors++;
3733                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3734                         (unsigned long long)rec->objectid, rec->found_ref,
3735                          rec->found_root_item ? "" : "not found");
3736
3737                 list_for_each_entry(backref, &rec->backrefs, list) {
3738                         if (!backref->reachable)
3739                                 continue;
3740                         if (!backref->errors && rec->found_root_item)
3741                                 continue;
3742                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3743                                 " index %llu namelen %u name %s errors %x\n",
3744                                 (unsigned long long)backref->ref_root,
3745                                 (unsigned long long)backref->dir,
3746                                 (unsigned long long)backref->index,
3747                                 backref->namelen, backref->name,
3748                                 backref->errors);
3749                         print_ref_error(backref->errors);
3750                 }
3751         }
3752         return errors > 0 ? 1 : 0;
3753 }
3754
3755 static int process_root_ref(struct extent_buffer *eb, int slot,
3756                             struct btrfs_key *key,
3757                             struct cache_tree *root_cache)
3758 {
3759         u64 dirid;
3760         u64 index;
3761         u32 len;
3762         u32 name_len;
3763         struct btrfs_root_ref *ref;
3764         char namebuf[BTRFS_NAME_LEN];
3765         int error;
3766
3767         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3768
3769         dirid = btrfs_root_ref_dirid(eb, ref);
3770         index = btrfs_root_ref_sequence(eb, ref);
3771         name_len = btrfs_root_ref_name_len(eb, ref);
3772
3773         if (name_len <= BTRFS_NAME_LEN) {
3774                 len = name_len;
3775                 error = 0;
3776         } else {
3777                 len = BTRFS_NAME_LEN;
3778                 error = REF_ERR_NAME_TOO_LONG;
3779         }
3780         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3781
3782         if (key->type == BTRFS_ROOT_REF_KEY) {
3783                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3784                                  index, namebuf, len, key->type, error);
3785         } else {
3786                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3787                                  index, namebuf, len, key->type, error);
3788         }
3789         return 0;
3790 }
3791
3792 static void free_corrupt_block(struct cache_extent *cache)
3793 {
3794         struct btrfs_corrupt_block *corrupt;
3795
3796         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3797         free(corrupt);
3798 }
3799
3800 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3801
3802 /*
3803  * Repair the btree of the given root.
3804  *
3805  * The fix is to remove the node key in corrupt_blocks cache_tree.
3806  * and rebalance the tree.
3807  * After the fix, the btree should be writeable.
3808  */
3809 static int repair_btree(struct btrfs_root *root,
3810                         struct cache_tree *corrupt_blocks)
3811 {
3812         struct btrfs_trans_handle *trans;
3813         struct btrfs_path path;
3814         struct btrfs_corrupt_block *corrupt;
3815         struct cache_extent *cache;
3816         struct btrfs_key key;
3817         u64 offset;
3818         int level;
3819         int ret = 0;
3820
3821         if (cache_tree_empty(corrupt_blocks))
3822                 return 0;
3823
3824         trans = btrfs_start_transaction(root, 1);
3825         if (IS_ERR(trans)) {
3826                 ret = PTR_ERR(trans);
3827                 fprintf(stderr, "Error starting transaction: %s\n",
3828                         strerror(-ret));
3829                 return ret;
3830         }
3831         btrfs_init_path(&path);
3832         cache = first_cache_extent(corrupt_blocks);
3833         while (cache) {
3834                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3835                                        cache);
3836                 level = corrupt->level;
3837                 path.lowest_level = level;
3838                 key.objectid = corrupt->key.objectid;
3839                 key.type = corrupt->key.type;
3840                 key.offset = corrupt->key.offset;
3841
3842                 /*
3843                  * Here we don't want to do any tree balance, since it may
3844                  * cause a balance with corrupted brother leaf/node,
3845                  * so ins_len set to 0 here.
3846                  * Balance will be done after all corrupt node/leaf is deleted.
3847                  */
3848                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3849                 if (ret < 0)
3850                         goto out;
3851                 offset = btrfs_node_blockptr(path.nodes[level],
3852                                              path.slots[level]);
3853
3854                 /* Remove the ptr */
3855                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3856                 if (ret < 0)
3857                         goto out;
3858                 /*
3859                  * Remove the corresponding extent
3860                  * return value is not concerned.
3861                  */
3862                 btrfs_release_path(&path);
3863                 ret = btrfs_free_extent(trans, root, offset,
3864                                 root->fs_info->nodesize, 0,
3865                                 root->root_key.objectid, level - 1, 0);
3866                 cache = next_cache_extent(cache);
3867         }
3868
3869         /* Balance the btree using btrfs_search_slot() */
3870         cache = first_cache_extent(corrupt_blocks);
3871         while (cache) {
3872                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3873                                        cache);
3874                 memcpy(&key, &corrupt->key, sizeof(key));
3875                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3876                 if (ret < 0)
3877                         goto out;
3878                 /* return will always >0 since it won't find the item */
3879                 ret = 0;
3880                 btrfs_release_path(&path);
3881                 cache = next_cache_extent(cache);
3882         }
3883 out:
3884         btrfs_commit_transaction(trans, root);
3885         btrfs_release_path(&path);
3886         return ret;
3887 }
3888
3889 static int check_fs_root(struct btrfs_root *root,
3890                          struct cache_tree *root_cache,
3891                          struct walk_control *wc)
3892 {
3893         int ret = 0;
3894         int err = 0;
3895         int wret;
3896         int level;
3897         struct btrfs_path path;
3898         struct shared_node root_node;
3899         struct root_record *rec;
3900         struct btrfs_root_item *root_item = &root->root_item;
3901         struct cache_tree corrupt_blocks;
3902         struct orphan_data_extent *orphan;
3903         struct orphan_data_extent *tmp;
3904         enum btrfs_tree_block_status status;
3905         struct node_refs nrefs;
3906
3907         /*
3908          * Reuse the corrupt_block cache tree to record corrupted tree block
3909          *
3910          * Unlike the usage in extent tree check, here we do it in a per
3911          * fs/subvol tree base.
3912          */
3913         cache_tree_init(&corrupt_blocks);
3914         root->fs_info->corrupt_blocks = &corrupt_blocks;
3915
3916         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3917                 rec = get_root_rec(root_cache, root->root_key.objectid);
3918                 BUG_ON(IS_ERR(rec));
3919                 if (btrfs_root_refs(root_item) > 0)
3920                         rec->found_root_item = 1;
3921         }
3922
3923         btrfs_init_path(&path);
3924         memset(&root_node, 0, sizeof(root_node));
3925         cache_tree_init(&root_node.root_cache);
3926         cache_tree_init(&root_node.inode_cache);
3927         memset(&nrefs, 0, sizeof(nrefs));
3928
3929         /* Move the orphan extent record to corresponding inode_record */
3930         list_for_each_entry_safe(orphan, tmp,
3931                                  &root->orphan_data_extents, list) {
3932                 struct inode_record *inode;
3933
3934                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3935                                       1);
3936                 BUG_ON(IS_ERR(inode));
3937                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3938                 list_move(&orphan->list, &inode->orphan_extents);
3939         }
3940
3941         level = btrfs_header_level(root->node);
3942         memset(wc->nodes, 0, sizeof(wc->nodes));
3943         wc->nodes[level] = &root_node;
3944         wc->active_node = level;
3945         wc->root_level = level;
3946
3947         /* We may not have checked the root block, lets do that now */
3948         if (btrfs_is_leaf(root->node))
3949                 status = btrfs_check_leaf(root, NULL, root->node);
3950         else
3951                 status = btrfs_check_node(root, NULL, root->node);
3952         if (status != BTRFS_TREE_BLOCK_CLEAN)
3953                 return -EIO;
3954
3955         if (btrfs_root_refs(root_item) > 0 ||
3956             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3957                 path.nodes[level] = root->node;
3958                 extent_buffer_get(root->node);
3959                 path.slots[level] = 0;
3960         } else {
3961                 struct btrfs_key key;
3962                 struct btrfs_disk_key found_key;
3963
3964                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3965                 level = root_item->drop_level;
3966                 path.lowest_level = level;
3967                 if (level > btrfs_header_level(root->node) ||
3968                     level >= BTRFS_MAX_LEVEL) {
3969                         error("ignoring invalid drop level: %u", level);
3970                         goto skip_walking;
3971                 }
3972                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3973                 if (wret < 0)
3974                         goto skip_walking;
3975                 btrfs_node_key(path.nodes[level], &found_key,
3976                                 path.slots[level]);
3977                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3978                                         sizeof(found_key)));
3979         }
3980
3981         while (1) {
3982                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3983                 if (wret < 0)
3984                         ret = wret;
3985                 if (wret != 0)
3986                         break;
3987
3988                 wret = walk_up_tree(root, &path, wc, &level);
3989                 if (wret < 0)
3990                         ret = wret;
3991                 if (wret != 0)
3992                         break;
3993         }
3994 skip_walking:
3995         btrfs_release_path(&path);
3996
3997         if (!cache_tree_empty(&corrupt_blocks)) {
3998                 struct cache_extent *cache;
3999                 struct btrfs_corrupt_block *corrupt;
4000
4001                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4002                        root->root_key.objectid);
4003                 cache = first_cache_extent(&corrupt_blocks);
4004                 while (cache) {
4005                         corrupt = container_of(cache,
4006                                                struct btrfs_corrupt_block,
4007                                                cache);
4008                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4009                                cache->start, corrupt->level,
4010                                corrupt->key.objectid, corrupt->key.type,
4011                                corrupt->key.offset);
4012                         cache = next_cache_extent(cache);
4013                 }
4014                 if (repair) {
4015                         printf("Try to repair the btree for root %llu\n",
4016                                root->root_key.objectid);
4017                         ret = repair_btree(root, &corrupt_blocks);
4018                         if (ret < 0)
4019                                 fprintf(stderr, "Failed to repair btree: %s\n",
4020                                         strerror(-ret));
4021                         if (!ret)
4022                                 printf("Btree for root %llu is fixed\n",
4023                                        root->root_key.objectid);
4024                 }
4025         }
4026
4027         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4028         if (err < 0)
4029                 ret = err;
4030
4031         if (root_node.current) {
4032                 root_node.current->checked = 1;
4033                 maybe_free_inode_rec(&root_node.inode_cache,
4034                                 root_node.current);
4035         }
4036
4037         err = check_inode_recs(root, &root_node.inode_cache);
4038         if (!ret)
4039                 ret = err;
4040
4041         free_corrupt_blocks_tree(&corrupt_blocks);
4042         root->fs_info->corrupt_blocks = NULL;
4043         free_orphan_data_extents(&root->orphan_data_extents);
4044         return ret;
4045 }
4046
4047 static int fs_root_objectid(u64 objectid)
4048 {
4049         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4050             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4051                 return 1;
4052         return is_fstree(objectid);
4053 }
4054
4055 static int check_fs_roots(struct btrfs_root *root,
4056                           struct cache_tree *root_cache)
4057 {
4058         struct btrfs_path path;
4059         struct btrfs_key key;
4060         struct walk_control wc;
4061         struct extent_buffer *leaf, *tree_node;
4062         struct btrfs_root *tmp_root;
4063         struct btrfs_root *tree_root = root->fs_info->tree_root;
4064         int ret;
4065         int err = 0;
4066
4067         if (ctx.progress_enabled) {
4068                 ctx.tp = TASK_FS_ROOTS;
4069                 task_start(ctx.info);
4070         }
4071
4072         /*
4073          * Just in case we made any changes to the extent tree that weren't
4074          * reflected into the free space cache yet.
4075          */
4076         if (repair)
4077                 reset_cached_block_groups(root->fs_info);
4078         memset(&wc, 0, sizeof(wc));
4079         cache_tree_init(&wc.shared);
4080         btrfs_init_path(&path);
4081
4082 again:
4083         key.offset = 0;
4084         key.objectid = 0;
4085         key.type = BTRFS_ROOT_ITEM_KEY;
4086         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4087         if (ret < 0) {
4088                 err = 1;
4089                 goto out;
4090         }
4091         tree_node = tree_root->node;
4092         while (1) {
4093                 if (tree_node != tree_root->node) {
4094                         free_root_recs_tree(root_cache);
4095                         btrfs_release_path(&path);
4096                         goto again;
4097                 }
4098                 leaf = path.nodes[0];
4099                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4100                         ret = btrfs_next_leaf(tree_root, &path);
4101                         if (ret) {
4102                                 if (ret < 0)
4103                                         err = 1;
4104                                 break;
4105                         }
4106                         leaf = path.nodes[0];
4107                 }
4108                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4109                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4110                     fs_root_objectid(key.objectid)) {
4111                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4112                                 tmp_root = btrfs_read_fs_root_no_cache(
4113                                                 root->fs_info, &key);
4114                         } else {
4115                                 key.offset = (u64)-1;
4116                                 tmp_root = btrfs_read_fs_root(
4117                                                 root->fs_info, &key);
4118                         }
4119                         if (IS_ERR(tmp_root)) {
4120                                 err = 1;
4121                                 goto next;
4122                         }
4123                         ret = check_fs_root(tmp_root, root_cache, &wc);
4124                         if (ret == -EAGAIN) {
4125                                 free_root_recs_tree(root_cache);
4126                                 btrfs_release_path(&path);
4127                                 goto again;
4128                         }
4129                         if (ret)
4130                                 err = 1;
4131                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4132                                 btrfs_free_fs_root(tmp_root);
4133                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4134                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4135                         process_root_ref(leaf, path.slots[0], &key,
4136                                          root_cache);
4137                 }
4138 next:
4139                 path.slots[0]++;
4140         }
4141 out:
4142         btrfs_release_path(&path);
4143         if (err)
4144                 free_extent_cache_tree(&wc.shared);
4145         if (!cache_tree_empty(&wc.shared))
4146                 fprintf(stderr, "warning line %d\n", __LINE__);
4147
4148         task_stop(ctx.info);
4149
4150         return err;
4151 }
4152
4153 /*
4154  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4155  * INODE_REF/INODE_EXTREF match.
4156  *
4157  * @root:       the root of the fs/file tree
4158  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4159  * @key:        the key of the DIR_ITEM/DIR_INDEX
4160  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4161  *              distinguish root_dir between normal dir/file
4162  * @name:       the name in the INODE_REF/INODE_EXTREF
4163  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4164  * @mode:       the st_mode of INODE_ITEM
4165  *
4166  * Return 0 if no error occurred.
4167  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4168  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4169  * dir/file.
4170  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4171  * not match for normal dir/file.
4172  */
4173 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4174                          struct btrfs_key *key, u64 index, char *name,
4175                          u32 namelen, u32 mode)
4176 {
4177         struct btrfs_path path;
4178         struct extent_buffer *node;
4179         struct btrfs_dir_item *di;
4180         struct btrfs_key location;
4181         char namebuf[BTRFS_NAME_LEN] = {0};
4182         u32 total;
4183         u32 cur = 0;
4184         u32 len;
4185         u32 name_len;
4186         u32 data_len;
4187         u8 filetype;
4188         int slot;
4189         int ret;
4190
4191         btrfs_init_path(&path);
4192         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4193         if (ret < 0) {
4194                 ret = DIR_ITEM_MISSING;
4195                 goto out;
4196         }
4197
4198         /* Process root dir and goto out*/
4199         if (index == 0) {
4200                 if (ret == 0) {
4201                         ret = ROOT_DIR_ERROR;
4202                         error(
4203                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4204                                 root->objectid,
4205                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4206                                         "REF" : "EXTREF",
4207                                 ref_key->objectid, ref_key->offset,
4208                                 key->type == BTRFS_DIR_ITEM_KEY ?
4209                                         "DIR_ITEM" : "DIR_INDEX");
4210                 } else {
4211                         ret = 0;
4212                 }
4213
4214                 goto out;
4215         }
4216
4217         /* Process normal file/dir */
4218         if (ret > 0) {
4219                 ret = DIR_ITEM_MISSING;
4220                 error(
4221                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4222                         root->objectid,
4223                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4224                         ref_key->objectid, ref_key->offset,
4225                         key->type == BTRFS_DIR_ITEM_KEY ?
4226                                 "DIR_ITEM" : "DIR_INDEX",
4227                         key->objectid, key->offset, namelen, name,
4228                         imode_to_type(mode));
4229                 goto out;
4230         }
4231
4232         /* Check whether inode_id/filetype/name match */
4233         node = path.nodes[0];
4234         slot = path.slots[0];
4235         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4236         total = btrfs_item_size_nr(node, slot);
4237         while (cur < total) {
4238                 ret = DIR_ITEM_MISMATCH;
4239                 name_len = btrfs_dir_name_len(node, di);
4240                 data_len = btrfs_dir_data_len(node, di);
4241
4242                 btrfs_dir_item_key_to_cpu(node, di, &location);
4243                 if (location.objectid != ref_key->objectid ||
4244                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4245                     location.offset != 0)
4246                         goto next;
4247
4248                 filetype = btrfs_dir_type(node, di);
4249                 if (imode_to_type(mode) != filetype)
4250                         goto next;
4251
4252                 if (cur + sizeof(*di) + name_len > total ||
4253                     name_len > BTRFS_NAME_LEN) {
4254                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4255                                 root->objectid,
4256                                 key->type == BTRFS_DIR_ITEM_KEY ?
4257                                 "DIR_ITEM" : "DIR_INDEX",
4258                                 key->objectid, key->offset, name_len);
4259
4260                         if (cur + sizeof(*di) > total)
4261                                 break;
4262                         len = min_t(u32, total - cur - sizeof(*di),
4263                                     BTRFS_NAME_LEN);
4264                 } else {
4265                         len = name_len;
4266                 }
4267
4268                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4269                 if (len != namelen || strncmp(namebuf, name, len))
4270                         goto next;
4271
4272                 ret = 0;
4273                 goto out;
4274 next:
4275                 len = sizeof(*di) + name_len + data_len;
4276                 di = (struct btrfs_dir_item *)((char *)di + len);
4277                 cur += len;
4278         }
4279         if (ret == DIR_ITEM_MISMATCH)
4280                 error(
4281                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4282                         root->objectid,
4283                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4284                         ref_key->objectid, ref_key->offset,
4285                         key->type == BTRFS_DIR_ITEM_KEY ?
4286                                 "DIR_ITEM" : "DIR_INDEX",
4287                         key->objectid, key->offset, namelen, name,
4288                         imode_to_type(mode));
4289 out:
4290         btrfs_release_path(&path);
4291         return ret;
4292 }
4293
4294 /*
4295  * Traverse the given INODE_REF and call find_dir_item() to find related
4296  * DIR_ITEM/DIR_INDEX.
4297  *
4298  * @root:       the root of the fs/file tree
4299  * @ref_key:    the key of the INODE_REF
4300  * @refs:       the count of INODE_REF
4301  * @mode:       the st_mode of INODE_ITEM
4302  *
4303  * Return 0 if no error occurred.
4304  */
4305 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4306                            struct extent_buffer *node, int slot, u64 *refs,
4307                            int mode)
4308 {
4309         struct btrfs_key key;
4310         struct btrfs_inode_ref *ref;
4311         char namebuf[BTRFS_NAME_LEN] = {0};
4312         u32 total;
4313         u32 cur = 0;
4314         u32 len;
4315         u32 name_len;
4316         u64 index;
4317         int ret, err = 0;
4318
4319         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4320         total = btrfs_item_size_nr(node, slot);
4321
4322 next:
4323         /* Update inode ref count */
4324         (*refs)++;
4325
4326         index = btrfs_inode_ref_index(node, ref);
4327         name_len = btrfs_inode_ref_name_len(node, ref);
4328         if (cur + sizeof(*ref) + name_len > total ||
4329             name_len > BTRFS_NAME_LEN) {
4330                 warning("root %llu INODE_REF[%llu %llu] name too long",
4331                         root->objectid, ref_key->objectid, ref_key->offset);
4332
4333                 if (total < cur + sizeof(*ref))
4334                         goto out;
4335                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4336         } else {
4337                 len = name_len;
4338         }
4339
4340         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4341
4342         /* Check root dir ref name */
4343         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4344                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4345                       root->objectid, ref_key->objectid, ref_key->offset,
4346                       namebuf);
4347                 err |= ROOT_DIR_ERROR;
4348         }
4349
4350         /* Find related DIR_INDEX */
4351         key.objectid = ref_key->offset;
4352         key.type = BTRFS_DIR_INDEX_KEY;
4353         key.offset = index;
4354         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4355         err |= ret;
4356
4357         /* Find related dir_item */
4358         key.objectid = ref_key->offset;
4359         key.type = BTRFS_DIR_ITEM_KEY;
4360         key.offset = btrfs_name_hash(namebuf, len);
4361         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4362         err |= ret;
4363
4364         len = sizeof(*ref) + name_len;
4365         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4366         cur += len;
4367         if (cur < total)
4368                 goto next;
4369
4370 out:
4371         return err;
4372 }
4373
4374 /*
4375  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4376  * DIR_ITEM/DIR_INDEX.
4377  *
4378  * @root:       the root of the fs/file tree
4379  * @ref_key:    the key of the INODE_EXTREF
4380  * @refs:       the count of INODE_EXTREF
4381  * @mode:       the st_mode of INODE_ITEM
4382  *
4383  * Return 0 if no error occurred.
4384  */
4385 static int check_inode_extref(struct btrfs_root *root,
4386                               struct btrfs_key *ref_key,
4387                               struct extent_buffer *node, int slot, u64 *refs,
4388                               int mode)
4389 {
4390         struct btrfs_key key;
4391         struct btrfs_inode_extref *extref;
4392         char namebuf[BTRFS_NAME_LEN] = {0};
4393         u32 total;
4394         u32 cur = 0;
4395         u32 len;
4396         u32 name_len;
4397         u64 index;
4398         u64 parent;
4399         int ret;
4400         int err = 0;
4401
4402         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4403         total = btrfs_item_size_nr(node, slot);
4404
4405 next:
4406         /* update inode ref count */
4407         (*refs)++;
4408         name_len = btrfs_inode_extref_name_len(node, extref);
4409         index = btrfs_inode_extref_index(node, extref);
4410         parent = btrfs_inode_extref_parent(node, extref);
4411         if (name_len <= BTRFS_NAME_LEN) {
4412                 len = name_len;
4413         } else {
4414                 len = BTRFS_NAME_LEN;
4415                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4416                         root->objectid, ref_key->objectid, ref_key->offset);
4417         }
4418         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4419
4420         /* Check root dir ref name */
4421         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4422                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4423                       root->objectid, ref_key->objectid, ref_key->offset,
4424                       namebuf);
4425                 err |= ROOT_DIR_ERROR;
4426         }
4427
4428         /* find related dir_index */
4429         key.objectid = parent;
4430         key.type = BTRFS_DIR_INDEX_KEY;
4431         key.offset = index;
4432         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4433         err |= ret;
4434
4435         /* find related dir_item */
4436         key.objectid = parent;
4437         key.type = BTRFS_DIR_ITEM_KEY;
4438         key.offset = btrfs_name_hash(namebuf, len);
4439         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4440         err |= ret;
4441
4442         len = sizeof(*extref) + name_len;
4443         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4444         cur += len;
4445
4446         if (cur < total)
4447                 goto next;
4448
4449         return err;
4450 }
4451
4452 /*
4453  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4454  * DIR_ITEM/DIR_INDEX match.
4455  *
4456  * @root:       the root of the fs/file tree
4457  * @key:        the key of the INODE_REF/INODE_EXTREF
4458  * @name:       the name in the INODE_REF/INODE_EXTREF
4459  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4460  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4461  * to (u64)-1
4462  * @ext_ref:    the EXTENDED_IREF feature
4463  *
4464  * Return 0 if no error occurred.
4465  * Return >0 for error bitmap
4466  */
4467 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4468                           char *name, int namelen, u64 index,
4469                           unsigned int ext_ref)
4470 {
4471         struct btrfs_path path;
4472         struct btrfs_inode_ref *ref;
4473         struct btrfs_inode_extref *extref;
4474         struct extent_buffer *node;
4475         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4476         u32 total;
4477         u32 cur = 0;
4478         u32 len;
4479         u32 ref_namelen;
4480         u64 ref_index;
4481         u64 parent;
4482         u64 dir_id;
4483         int slot;
4484         int ret;
4485
4486         btrfs_init_path(&path);
4487         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4488         if (ret) {
4489                 ret = INODE_REF_MISSING;
4490                 goto extref;
4491         }
4492
4493         node = path.nodes[0];
4494         slot = path.slots[0];
4495
4496         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4497         total = btrfs_item_size_nr(node, slot);
4498
4499         /* Iterate all entry of INODE_REF */
4500         while (cur < total) {
4501                 ret = INODE_REF_MISSING;
4502
4503                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4504                 ref_index = btrfs_inode_ref_index(node, ref);
4505                 if (index != (u64)-1 && index != ref_index)
4506                         goto next_ref;
4507
4508                 if (cur + sizeof(*ref) + ref_namelen > total ||
4509                     ref_namelen > BTRFS_NAME_LEN) {
4510                         warning("root %llu INODE %s[%llu %llu] name too long",
4511                                 root->objectid,
4512                                 key->type == BTRFS_INODE_REF_KEY ?
4513                                         "REF" : "EXTREF",
4514                                 key->objectid, key->offset);
4515
4516                         if (cur + sizeof(*ref) > total)
4517                                 break;
4518                         len = min_t(u32, total - cur - sizeof(*ref),
4519                                     BTRFS_NAME_LEN);
4520                 } else {
4521                         len = ref_namelen;
4522                 }
4523
4524                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4525                                    len);
4526
4527                 if (len != namelen || strncmp(ref_namebuf, name, len))
4528                         goto next_ref;
4529
4530                 ret = 0;
4531                 goto out;
4532 next_ref:
4533                 len = sizeof(*ref) + ref_namelen;
4534                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4535                 cur += len;
4536         }
4537
4538 extref:
4539         /* Skip if not support EXTENDED_IREF feature */
4540         if (!ext_ref)
4541                 goto out;
4542
4543         btrfs_release_path(&path);
4544         btrfs_init_path(&path);
4545
4546         dir_id = key->offset;
4547         key->type = BTRFS_INODE_EXTREF_KEY;
4548         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4549
4550         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4551         if (ret) {
4552                 ret = INODE_REF_MISSING;
4553                 goto out;
4554         }
4555
4556         node = path.nodes[0];
4557         slot = path.slots[0];
4558
4559         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4560         cur = 0;
4561         total = btrfs_item_size_nr(node, slot);
4562
4563         /* Iterate all entry of INODE_EXTREF */
4564         while (cur < total) {
4565                 ret = INODE_REF_MISSING;
4566
4567                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4568                 ref_index = btrfs_inode_extref_index(node, extref);
4569                 parent = btrfs_inode_extref_parent(node, extref);
4570                 if (index != (u64)-1 && index != ref_index)
4571                         goto next_extref;
4572
4573                 if (parent != dir_id)
4574                         goto next_extref;
4575
4576                 if (ref_namelen <= BTRFS_NAME_LEN) {
4577                         len = ref_namelen;
4578                 } else {
4579                         len = BTRFS_NAME_LEN;
4580                         warning("root %llu INODE %s[%llu %llu] name too long",
4581                                 root->objectid,
4582                                 key->type == BTRFS_INODE_REF_KEY ?
4583                                         "REF" : "EXTREF",
4584                                 key->objectid, key->offset);
4585                 }
4586                 read_extent_buffer(node, ref_namebuf,
4587                                    (unsigned long)(extref + 1), len);
4588
4589                 if (len != namelen || strncmp(ref_namebuf, name, len))
4590                         goto next_extref;
4591
4592                 ret = 0;
4593                 goto out;
4594
4595 next_extref:
4596                 len = sizeof(*extref) + ref_namelen;
4597                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4598                 cur += len;
4599
4600         }
4601 out:
4602         btrfs_release_path(&path);
4603         return ret;
4604 }
4605
4606 /*
4607  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4608  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4609  *
4610  * @root:       the root of the fs/file tree
4611  * @key:        the key of the INODE_REF/INODE_EXTREF
4612  * @size:       the st_size of the INODE_ITEM
4613  * @ext_ref:    the EXTENDED_IREF feature
4614  *
4615  * Return 0 if no error occurred.
4616  */
4617 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4618                           struct extent_buffer *node, int slot, u64 *size,
4619                           unsigned int ext_ref)
4620 {
4621         struct btrfs_dir_item *di;
4622         struct btrfs_inode_item *ii;
4623         struct btrfs_path path;
4624         struct btrfs_key location;
4625         char namebuf[BTRFS_NAME_LEN] = {0};
4626         u32 total;
4627         u32 cur = 0;
4628         u32 len;
4629         u32 name_len;
4630         u32 data_len;
4631         u8 filetype;
4632         u32 mode;
4633         u64 index;
4634         int ret;
4635         int err = 0;
4636
4637         /*
4638          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4639          * ignore index check.
4640          */
4641         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4642
4643         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4644         total = btrfs_item_size_nr(node, slot);
4645
4646         while (cur < total) {
4647                 data_len = btrfs_dir_data_len(node, di);
4648                 if (data_len)
4649                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4650                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4651                               "DIR_ITEM" : "DIR_INDEX",
4652                               key->objectid, key->offset, data_len);
4653
4654                 name_len = btrfs_dir_name_len(node, di);
4655                 if (cur + sizeof(*di) + name_len > total ||
4656                     name_len > BTRFS_NAME_LEN) {
4657                         warning("root %llu %s[%llu %llu] name too long",
4658                                 root->objectid,
4659                                 key->type == BTRFS_DIR_ITEM_KEY ?
4660                                 "DIR_ITEM" : "DIR_INDEX",
4661                                 key->objectid, key->offset);
4662
4663                         if (cur + sizeof(*di) > total)
4664                                 break;
4665                         len = min_t(u32, total - cur - sizeof(*di),
4666                                     BTRFS_NAME_LEN);
4667                 } else {
4668                         len = name_len;
4669                 }
4670                 (*size) += name_len;
4671
4672                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4673                 filetype = btrfs_dir_type(node, di);
4674
4675                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4676                     key->offset != btrfs_name_hash(namebuf, len)) {
4677                         err |= -EIO;
4678                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4679                                 root->objectid, key->objectid, key->offset,
4680                                 namebuf, len, filetype, key->offset,
4681                                 btrfs_name_hash(namebuf, len));
4682                 }
4683
4684                 btrfs_init_path(&path);
4685                 btrfs_dir_item_key_to_cpu(node, di, &location);
4686
4687                 /* Ignore related ROOT_ITEM check */
4688                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4689                         goto next;
4690
4691                 /* Check relative INODE_ITEM(existence/filetype) */
4692                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4693                 if (ret) {
4694                         err |= INODE_ITEM_MISSING;
4695                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4696                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4697                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4698                               key->offset, location.objectid, name_len,
4699                               namebuf, filetype);
4700                         goto next;
4701                 }
4702
4703                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4704                                     struct btrfs_inode_item);
4705                 mode = btrfs_inode_mode(path.nodes[0], ii);
4706
4707                 if (imode_to_type(mode) != filetype) {
4708                         err |= INODE_ITEM_MISMATCH;
4709                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4710                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4711                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4712                               key->offset, name_len, namebuf, filetype);
4713                 }
4714
4715                 /* Check relative INODE_REF/INODE_EXTREF */
4716                 location.type = BTRFS_INODE_REF_KEY;
4717                 location.offset = key->objectid;
4718                 ret = find_inode_ref(root, &location, namebuf, len,
4719                                        index, ext_ref);
4720                 err |= ret;
4721                 if (ret & INODE_REF_MISSING)
4722                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4723                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4724                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4725                               key->offset, name_len, namebuf, filetype);
4726
4727 next:
4728                 btrfs_release_path(&path);
4729                 len = sizeof(*di) + name_len + data_len;
4730                 di = (struct btrfs_dir_item *)((char *)di + len);
4731                 cur += len;
4732
4733                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4734                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4735                               root->objectid, key->objectid, key->offset);
4736                         break;
4737                 }
4738         }
4739
4740         return err;
4741 }
4742
4743 /*
4744  * Check file extent datasum/hole, update the size of the file extents,
4745  * check and update the last offset of the file extent.
4746  *
4747  * @root:       the root of fs/file tree.
4748  * @fkey:       the key of the file extent.
4749  * @nodatasum:  INODE_NODATASUM feature.
4750  * @size:       the sum of all EXTENT_DATA items size for this inode.
4751  * @end:        the offset of the last extent.
4752  *
4753  * Return 0 if no error occurred.
4754  */
4755 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4756                              struct extent_buffer *node, int slot,
4757                              unsigned int nodatasum, u64 *size, u64 *end)
4758 {
4759         struct btrfs_file_extent_item *fi;
4760         u64 disk_bytenr;
4761         u64 disk_num_bytes;
4762         u64 extent_num_bytes;
4763         u64 extent_offset;
4764         u64 csum_found;         /* In byte size, sectorsize aligned */
4765         u64 search_start;       /* Logical range start we search for csum */
4766         u64 search_len;         /* Logical range len we search for csum */
4767         unsigned int extent_type;
4768         unsigned int is_hole;
4769         int compressed = 0;
4770         int ret;
4771         int err = 0;
4772
4773         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4774
4775         /* Check inline extent */
4776         extent_type = btrfs_file_extent_type(node, fi);
4777         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4778                 struct btrfs_item *e = btrfs_item_nr(slot);
4779                 u32 item_inline_len;
4780
4781                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4782                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4783                 compressed = btrfs_file_extent_compression(node, fi);
4784                 if (extent_num_bytes == 0) {
4785                         error(
4786                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4787                                 root->objectid, fkey->objectid, fkey->offset);
4788                         err |= FILE_EXTENT_ERROR;
4789                 }
4790                 if (!compressed && extent_num_bytes != item_inline_len) {
4791                         error(
4792                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4793                                 root->objectid, fkey->objectid, fkey->offset,
4794                                 extent_num_bytes, item_inline_len);
4795                         err |= FILE_EXTENT_ERROR;
4796                 }
4797                 *end += extent_num_bytes;
4798                 *size += extent_num_bytes;
4799                 return err;
4800         }
4801
4802         /* Check extent type */
4803         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4804                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4805                 err |= FILE_EXTENT_ERROR;
4806                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4807                       root->objectid, fkey->objectid, fkey->offset);
4808                 return err;
4809         }
4810
4811         /* Check REG_EXTENT/PREALLOC_EXTENT */
4812         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4813         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4814         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4815         extent_offset = btrfs_file_extent_offset(node, fi);
4816         compressed = btrfs_file_extent_compression(node, fi);
4817         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4818
4819         /*
4820          * Check EXTENT_DATA csum
4821          *
4822          * For plain (uncompressed) extent, we should only check the range
4823          * we're referring to, as it's possible that part of prealloc extent
4824          * has been written, and has csum:
4825          *
4826          * |<--- Original large preallocated extent A ---->|
4827          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4828          *      No csum                         Has csum
4829          *
4830          * For compressed extent, we should check the whole range.
4831          */
4832         if (!compressed) {
4833                 search_start = disk_bytenr + extent_offset;
4834                 search_len = extent_num_bytes;
4835         } else {
4836                 search_start = disk_bytenr;
4837                 search_len = disk_num_bytes;
4838         }
4839         ret = count_csum_range(root, search_start, search_len, &csum_found);
4840         if (csum_found > 0 && nodatasum) {
4841                 err |= ODD_CSUM_ITEM;
4842                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4843                       root->objectid, fkey->objectid, fkey->offset);
4844         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4845                    !is_hole && (ret < 0 || csum_found < search_len)) {
4846                 err |= CSUM_ITEM_MISSING;
4847                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4848                       root->objectid, fkey->objectid, fkey->offset,
4849                       csum_found, search_len);
4850         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4851                 err |= ODD_CSUM_ITEM;
4852                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4853                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4854         }
4855
4856         /* Check EXTENT_DATA hole */
4857         if (!no_holes && *end != fkey->offset) {
4858                 err |= FILE_EXTENT_ERROR;
4859                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4860                       root->objectid, fkey->objectid, fkey->offset);
4861         }
4862
4863         *end += extent_num_bytes;
4864         if (!is_hole)
4865                 *size += extent_num_bytes;
4866
4867         return err;
4868 }
4869
4870 /*
4871  * Check INODE_ITEM and related ITEMs (the same inode number)
4872  * 1. check link count
4873  * 2. check inode ref/extref
4874  * 3. check dir item/index
4875  *
4876  * @ext_ref:    the EXTENDED_IREF feature
4877  *
4878  * Return 0 if no error occurred.
4879  * Return >0 for error or hit the traversal is done(by error bitmap)
4880  */
4881 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4882                             unsigned int ext_ref)
4883 {
4884         struct extent_buffer *node;
4885         struct btrfs_inode_item *ii;
4886         struct btrfs_key key;
4887         u64 inode_id;
4888         u32 mode;
4889         u64 nlink;
4890         u64 nbytes;
4891         u64 isize;
4892         u64 size = 0;
4893         u64 refs = 0;
4894         u64 extent_end = 0;
4895         u64 extent_size = 0;
4896         unsigned int dir;
4897         unsigned int nodatasum;
4898         int slot;
4899         int ret;
4900         int err = 0;
4901
4902         node = path->nodes[0];
4903         slot = path->slots[0];
4904
4905         btrfs_item_key_to_cpu(node, &key, slot);
4906         inode_id = key.objectid;
4907
4908         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4909                 ret = btrfs_next_item(root, path);
4910                 if (ret > 0)
4911                         err |= LAST_ITEM;
4912                 return err;
4913         }
4914
4915         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4916         isize = btrfs_inode_size(node, ii);
4917         nbytes = btrfs_inode_nbytes(node, ii);
4918         mode = btrfs_inode_mode(node, ii);
4919         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4920         nlink = btrfs_inode_nlink(node, ii);
4921         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4922
4923         while (1) {
4924                 ret = btrfs_next_item(root, path);
4925                 if (ret < 0) {
4926                         /* out will fill 'err' rusing current statistics */
4927                         goto out;
4928                 } else if (ret > 0) {
4929                         err |= LAST_ITEM;
4930                         goto out;
4931                 }
4932
4933                 node = path->nodes[0];
4934                 slot = path->slots[0];
4935                 btrfs_item_key_to_cpu(node, &key, slot);
4936                 if (key.objectid != inode_id)
4937                         goto out;
4938
4939                 switch (key.type) {
4940                 case BTRFS_INODE_REF_KEY:
4941                         ret = check_inode_ref(root, &key, node, slot, &refs,
4942                                               mode);
4943                         err |= ret;
4944                         break;
4945                 case BTRFS_INODE_EXTREF_KEY:
4946                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4947                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4948                                         root->objectid, key.objectid,
4949                                         key.offset);
4950                         ret = check_inode_extref(root, &key, node, slot, &refs,
4951                                                  mode);
4952                         err |= ret;
4953                         break;
4954                 case BTRFS_DIR_ITEM_KEY:
4955                 case BTRFS_DIR_INDEX_KEY:
4956                         if (!dir) {
4957                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4958                                         root->objectid, inode_id,
4959                                         imode_to_type(mode), key.objectid,
4960                                         key.offset);
4961                         }
4962                         ret = check_dir_item(root, &key, node, slot, &size,
4963                                              ext_ref);
4964                         err |= ret;
4965                         break;
4966                 case BTRFS_EXTENT_DATA_KEY:
4967                         if (dir) {
4968                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4969                                         root->objectid, inode_id, key.objectid,
4970                                         key.offset);
4971                         }
4972                         ret = check_file_extent(root, &key, node, slot,
4973                                                 nodatasum, &extent_size,
4974                                                 &extent_end);
4975                         err |= ret;
4976                         break;
4977                 case BTRFS_XATTR_ITEM_KEY:
4978                         break;
4979                 default:
4980                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4981                               key.objectid, key.type, key.offset);
4982                 }
4983         }
4984
4985 out:
4986         /* verify INODE_ITEM nlink/isize/nbytes */
4987         if (dir) {
4988                 if (nlink != 1) {
4989                         err |= LINK_COUNT_ERROR;
4990                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4991                               root->objectid, inode_id, nlink);
4992                 }
4993
4994                 /*
4995                  * Just a warning, as dir inode nbytes is just an
4996                  * instructive value.
4997                  */
4998                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4999                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5000                                 root->objectid, inode_id,
5001                                 root->fs_info->nodesize);
5002                 }
5003
5004                 if (isize != size) {
5005                         err |= ISIZE_ERROR;
5006                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5007                               root->objectid, inode_id, isize, size);
5008                 }
5009         } else {
5010                 if (nlink != refs) {
5011                         err |= LINK_COUNT_ERROR;
5012                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5013                               root->objectid, inode_id, nlink, refs);
5014                 } else if (!nlink) {
5015                         err |= ORPHAN_ITEM;
5016                 }
5017
5018                 if (!nbytes && !no_holes && extent_end < isize) {
5019                         err |= NBYTES_ERROR;
5020                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5021                               root->objectid, inode_id, isize);
5022                 }
5023
5024                 if (nbytes != extent_size) {
5025                         err |= NBYTES_ERROR;
5026                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5027                               root->objectid, inode_id, nbytes, extent_size);
5028                 }
5029         }
5030
5031         return err;
5032 }
5033
5034 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5035 {
5036         struct btrfs_path path;
5037         struct btrfs_key key;
5038         int err = 0;
5039         int ret;
5040
5041         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5042         key.type = BTRFS_INODE_ITEM_KEY;
5043         key.offset = 0;
5044
5045         /* For root being dropped, we don't need to check first inode */
5046         if (btrfs_root_refs(&root->root_item) == 0 &&
5047             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5048             key.objectid)
5049                 return 0;
5050
5051         btrfs_init_path(&path);
5052
5053         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5054         if (ret < 0)
5055                 goto out;
5056         if (ret > 0) {
5057                 ret = 0;
5058                 err |= INODE_ITEM_MISSING;
5059                 error("first inode item of root %llu is missing",
5060                       root->objectid);
5061         }
5062
5063         err |= check_inode_item(root, &path, ext_ref);
5064         err &= ~LAST_ITEM;
5065         if (err && !ret)
5066                 ret = -EIO;
5067 out:
5068         btrfs_release_path(&path);
5069         return ret;
5070 }
5071
5072 /*
5073  * Iterate all item on the tree and call check_inode_item() to check.
5074  *
5075  * @root:       the root of the tree to be checked.
5076  * @ext_ref:    the EXTENDED_IREF feature
5077  *
5078  * Return 0 if no error found.
5079  * Return <0 for error.
5080  */
5081 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5082 {
5083         struct btrfs_path path;
5084         struct node_refs nrefs;
5085         struct btrfs_root_item *root_item = &root->root_item;
5086         int ret;
5087         int level;
5088         int err = 0;
5089
5090         /*
5091          * We need to manually check the first inode item(256)
5092          * As the following traversal function will only start from
5093          * the first inode item in the leaf, if inode item(256) is missing
5094          * we will just skip it forever.
5095          */
5096         ret = check_fs_first_inode(root, ext_ref);
5097         if (ret < 0)
5098                 return ret;
5099
5100         memset(&nrefs, 0, sizeof(nrefs));
5101         level = btrfs_header_level(root->node);
5102         btrfs_init_path(&path);
5103
5104         if (btrfs_root_refs(root_item) > 0 ||
5105             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5106                 path.nodes[level] = root->node;
5107                 path.slots[level] = 0;
5108                 extent_buffer_get(root->node);
5109         } else {
5110                 struct btrfs_key key;
5111
5112                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5113                 level = root_item->drop_level;
5114                 path.lowest_level = level;
5115                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5116                 if (ret < 0)
5117                         goto out;
5118                 ret = 0;
5119         }
5120
5121         while (1) {
5122                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5123                 err |= !!ret;
5124
5125                 /* if ret is negative, walk shall stop */
5126                 if (ret < 0) {
5127                         ret = err;
5128                         break;
5129                 }
5130
5131                 ret = walk_up_tree_v2(root, &path, &level);
5132                 if (ret != 0) {
5133                         /* Normal exit, reset ret to err */
5134                         ret = err;
5135                         break;
5136                 }
5137         }
5138
5139 out:
5140         btrfs_release_path(&path);
5141         return ret;
5142 }
5143
5144 /*
5145  * Find the relative ref for root_ref and root_backref.
5146  *
5147  * @root:       the root of the root tree.
5148  * @ref_key:    the key of the root ref.
5149  *
5150  * Return 0 if no error occurred.
5151  */
5152 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5153                           struct extent_buffer *node, int slot)
5154 {
5155         struct btrfs_path path;
5156         struct btrfs_key key;
5157         struct btrfs_root_ref *ref;
5158         struct btrfs_root_ref *backref;
5159         char ref_name[BTRFS_NAME_LEN] = {0};
5160         char backref_name[BTRFS_NAME_LEN] = {0};
5161         u64 ref_dirid;
5162         u64 ref_seq;
5163         u32 ref_namelen;
5164         u64 backref_dirid;
5165         u64 backref_seq;
5166         u32 backref_namelen;
5167         u32 len;
5168         int ret;
5169         int err = 0;
5170
5171         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5172         ref_dirid = btrfs_root_ref_dirid(node, ref);
5173         ref_seq = btrfs_root_ref_sequence(node, ref);
5174         ref_namelen = btrfs_root_ref_name_len(node, ref);
5175
5176         if (ref_namelen <= BTRFS_NAME_LEN) {
5177                 len = ref_namelen;
5178         } else {
5179                 len = BTRFS_NAME_LEN;
5180                 warning("%s[%llu %llu] ref_name too long",
5181                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5182                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5183                         ref_key->offset);
5184         }
5185         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5186
5187         /* Find relative root_ref */
5188         key.objectid = ref_key->offset;
5189         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5190         key.offset = ref_key->objectid;
5191
5192         btrfs_init_path(&path);
5193         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5194         if (ret) {
5195                 err |= ROOT_REF_MISSING;
5196                 error("%s[%llu %llu] couldn't find relative ref",
5197                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5198                       "ROOT_REF" : "ROOT_BACKREF",
5199                       ref_key->objectid, ref_key->offset);
5200                 goto out;
5201         }
5202
5203         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5204                                  struct btrfs_root_ref);
5205         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5206         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5207         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5208
5209         if (backref_namelen <= BTRFS_NAME_LEN) {
5210                 len = backref_namelen;
5211         } else {
5212                 len = BTRFS_NAME_LEN;
5213                 warning("%s[%llu %llu] ref_name too long",
5214                         key.type == BTRFS_ROOT_REF_KEY ?
5215                         "ROOT_REF" : "ROOT_BACKREF",
5216                         key.objectid, key.offset);
5217         }
5218         read_extent_buffer(path.nodes[0], backref_name,
5219                            (unsigned long)(backref + 1), len);
5220
5221         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5222             ref_namelen != backref_namelen ||
5223             strncmp(ref_name, backref_name, len)) {
5224                 err |= ROOT_REF_MISMATCH;
5225                 error("%s[%llu %llu] mismatch relative ref",
5226                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5227                       "ROOT_REF" : "ROOT_BACKREF",
5228                       ref_key->objectid, ref_key->offset);
5229         }
5230 out:
5231         btrfs_release_path(&path);
5232         return err;
5233 }
5234
5235 /*
5236  * Check all fs/file tree in low_memory mode.
5237  *
5238  * 1. for fs tree root item, call check_fs_root_v2()
5239  * 2. for fs tree root ref/backref, call check_root_ref()
5240  *
5241  * Return 0 if no error occurred.
5242  */
5243 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5244 {
5245         struct btrfs_root *tree_root = fs_info->tree_root;
5246         struct btrfs_root *cur_root = NULL;
5247         struct btrfs_path path;
5248         struct btrfs_key key;
5249         struct extent_buffer *node;
5250         unsigned int ext_ref;
5251         int slot;
5252         int ret;
5253         int err = 0;
5254
5255         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5256
5257         btrfs_init_path(&path);
5258         key.objectid = BTRFS_FS_TREE_OBJECTID;
5259         key.offset = 0;
5260         key.type = BTRFS_ROOT_ITEM_KEY;
5261
5262         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5263         if (ret < 0) {
5264                 err = ret;
5265                 goto out;
5266         } else if (ret > 0) {
5267                 err = -ENOENT;
5268                 goto out;
5269         }
5270
5271         while (1) {
5272                 node = path.nodes[0];
5273                 slot = path.slots[0];
5274                 btrfs_item_key_to_cpu(node, &key, slot);
5275                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5276                         goto out;
5277                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5278                     fs_root_objectid(key.objectid)) {
5279                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5280                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5281                                                                        &key);
5282                         } else {
5283                                 key.offset = (u64)-1;
5284                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5285                         }
5286
5287                         if (IS_ERR(cur_root)) {
5288                                 error("Fail to read fs/subvol tree: %lld",
5289                                       key.objectid);
5290                                 err = -EIO;
5291                                 goto next;
5292                         }
5293
5294                         ret = check_fs_root_v2(cur_root, ext_ref);
5295                         err |= ret;
5296
5297                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5298                                 btrfs_free_fs_root(cur_root);
5299                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5300                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5301                         ret = check_root_ref(tree_root, &key, node, slot);
5302                         err |= ret;
5303                 }
5304 next:
5305                 ret = btrfs_next_item(tree_root, &path);
5306                 if (ret > 0)
5307                         goto out;
5308                 if (ret < 0) {
5309                         err = ret;
5310                         goto out;
5311                 }
5312         }
5313
5314 out:
5315         btrfs_release_path(&path);
5316         return err;
5317 }
5318
5319 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5320 {
5321         struct list_head *cur = rec->backrefs.next;
5322         struct extent_backref *back;
5323         struct tree_backref *tback;
5324         struct data_backref *dback;
5325         u64 found = 0;
5326         int err = 0;
5327
5328         while(cur != &rec->backrefs) {
5329                 back = to_extent_backref(cur);
5330                 cur = cur->next;
5331                 if (!back->found_extent_tree) {
5332                         err = 1;
5333                         if (!print_errs)
5334                                 goto out;
5335                         if (back->is_data) {
5336                                 dback = to_data_backref(back);
5337                                 fprintf(stderr, "Backref %llu %s %llu"
5338                                         " owner %llu offset %llu num_refs %lu"
5339                                         " not found in extent tree\n",
5340                                         (unsigned long long)rec->start,
5341                                         back->full_backref ?
5342                                         "parent" : "root",
5343                                         back->full_backref ?
5344                                         (unsigned long long)dback->parent:
5345                                         (unsigned long long)dback->root,
5346                                         (unsigned long long)dback->owner,
5347                                         (unsigned long long)dback->offset,
5348                                         (unsigned long)dback->num_refs);
5349                         } else {
5350                                 tback = to_tree_backref(back);
5351                                 fprintf(stderr, "Backref %llu parent %llu"
5352                                         " root %llu not found in extent tree\n",
5353                                         (unsigned long long)rec->start,
5354                                         (unsigned long long)tback->parent,
5355                                         (unsigned long long)tback->root);
5356                         }
5357                 }
5358                 if (!back->is_data && !back->found_ref) {
5359                         err = 1;
5360                         if (!print_errs)
5361                                 goto out;
5362                         tback = to_tree_backref(back);
5363                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5364                                 (unsigned long long)rec->start,
5365                                 back->full_backref ? "parent" : "root",
5366                                 back->full_backref ?
5367                                 (unsigned long long)tback->parent :
5368                                 (unsigned long long)tback->root, back);
5369                 }
5370                 if (back->is_data) {
5371                         dback = to_data_backref(back);
5372                         if (dback->found_ref != dback->num_refs) {
5373                                 err = 1;
5374                                 if (!print_errs)
5375                                         goto out;
5376                                 fprintf(stderr, "Incorrect local backref count"
5377                                         " on %llu %s %llu owner %llu"
5378                                         " offset %llu found %u wanted %u back %p\n",
5379                                         (unsigned long long)rec->start,
5380                                         back->full_backref ?
5381                                         "parent" : "root",
5382                                         back->full_backref ?
5383                                         (unsigned long long)dback->parent:
5384                                         (unsigned long long)dback->root,
5385                                         (unsigned long long)dback->owner,
5386                                         (unsigned long long)dback->offset,
5387                                         dback->found_ref, dback->num_refs, back);
5388                         }
5389                         if (dback->disk_bytenr != rec->start) {
5390                                 err = 1;
5391                                 if (!print_errs)
5392                                         goto out;
5393                                 fprintf(stderr, "Backref disk bytenr does not"
5394                                         " match extent record, bytenr=%llu, "
5395                                         "ref bytenr=%llu\n",
5396                                         (unsigned long long)rec->start,
5397                                         (unsigned long long)dback->disk_bytenr);
5398                         }
5399
5400                         if (dback->bytes != rec->nr) {
5401                                 err = 1;
5402                                 if (!print_errs)
5403                                         goto out;
5404                                 fprintf(stderr, "Backref bytes do not match "
5405                                         "extent backref, bytenr=%llu, ref "
5406                                         "bytes=%llu, backref bytes=%llu\n",
5407                                         (unsigned long long)rec->start,
5408                                         (unsigned long long)rec->nr,
5409                                         (unsigned long long)dback->bytes);
5410                         }
5411                 }
5412                 if (!back->is_data) {
5413                         found += 1;
5414                 } else {
5415                         dback = to_data_backref(back);
5416                         found += dback->found_ref;
5417                 }
5418         }
5419         if (found != rec->refs) {
5420                 err = 1;
5421                 if (!print_errs)
5422                         goto out;
5423                 fprintf(stderr, "Incorrect global backref count "
5424                         "on %llu found %llu wanted %llu\n",
5425                         (unsigned long long)rec->start,
5426                         (unsigned long long)found,
5427                         (unsigned long long)rec->refs);
5428         }
5429 out:
5430         return err;
5431 }
5432
5433 static int free_all_extent_backrefs(struct extent_record *rec)
5434 {
5435         struct extent_backref *back;
5436         struct list_head *cur;
5437         while (!list_empty(&rec->backrefs)) {
5438                 cur = rec->backrefs.next;
5439                 back = to_extent_backref(cur);
5440                 list_del(cur);
5441                 free(back);
5442         }
5443         return 0;
5444 }
5445
5446 static void free_extent_record_cache(struct cache_tree *extent_cache)
5447 {
5448         struct cache_extent *cache;
5449         struct extent_record *rec;
5450
5451         while (1) {
5452                 cache = first_cache_extent(extent_cache);
5453                 if (!cache)
5454                         break;
5455                 rec = container_of(cache, struct extent_record, cache);
5456                 remove_cache_extent(extent_cache, cache);
5457                 free_all_extent_backrefs(rec);
5458                 free(rec);
5459         }
5460 }
5461
5462 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5463                                  struct extent_record *rec)
5464 {
5465         if (rec->content_checked && rec->owner_ref_checked &&
5466             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5467             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5468             !rec->bad_full_backref && !rec->crossing_stripes &&
5469             !rec->wrong_chunk_type) {
5470                 remove_cache_extent(extent_cache, &rec->cache);
5471                 free_all_extent_backrefs(rec);
5472                 list_del_init(&rec->list);
5473                 free(rec);
5474         }
5475         return 0;
5476 }
5477
5478 static int check_owner_ref(struct btrfs_root *root,
5479                             struct extent_record *rec,
5480                             struct extent_buffer *buf)
5481 {
5482         struct extent_backref *node;
5483         struct tree_backref *back;
5484         struct btrfs_root *ref_root;
5485         struct btrfs_key key;
5486         struct btrfs_path path;
5487         struct extent_buffer *parent;
5488         int level;
5489         int found = 0;
5490         int ret;
5491
5492         list_for_each_entry(node, &rec->backrefs, list) {
5493                 if (node->is_data)
5494                         continue;
5495                 if (!node->found_ref)
5496                         continue;
5497                 if (node->full_backref)
5498                         continue;
5499                 back = to_tree_backref(node);
5500                 if (btrfs_header_owner(buf) == back->root)
5501                         return 0;
5502         }
5503         BUG_ON(rec->is_root);
5504
5505         /* try to find the block by search corresponding fs tree */
5506         key.objectid = btrfs_header_owner(buf);
5507         key.type = BTRFS_ROOT_ITEM_KEY;
5508         key.offset = (u64)-1;
5509
5510         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5511         if (IS_ERR(ref_root))
5512                 return 1;
5513
5514         level = btrfs_header_level(buf);
5515         if (level == 0)
5516                 btrfs_item_key_to_cpu(buf, &key, 0);
5517         else
5518                 btrfs_node_key_to_cpu(buf, &key, 0);
5519
5520         btrfs_init_path(&path);
5521         path.lowest_level = level + 1;
5522         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5523         if (ret < 0)
5524                 return 0;
5525
5526         parent = path.nodes[level + 1];
5527         if (parent && buf->start == btrfs_node_blockptr(parent,
5528                                                         path.slots[level + 1]))
5529                 found = 1;
5530
5531         btrfs_release_path(&path);
5532         return found ? 0 : 1;
5533 }
5534
5535 static int is_extent_tree_record(struct extent_record *rec)
5536 {
5537         struct list_head *cur = rec->backrefs.next;
5538         struct extent_backref *node;
5539         struct tree_backref *back;
5540         int is_extent = 0;
5541
5542         while(cur != &rec->backrefs) {
5543                 node = to_extent_backref(cur);
5544                 cur = cur->next;
5545                 if (node->is_data)
5546                         return 0;
5547                 back = to_tree_backref(node);
5548                 if (node->full_backref)
5549                         return 0;
5550                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5551                         is_extent = 1;
5552         }
5553         return is_extent;
5554 }
5555
5556
5557 static int record_bad_block_io(struct btrfs_fs_info *info,
5558                                struct cache_tree *extent_cache,
5559                                u64 start, u64 len)
5560 {
5561         struct extent_record *rec;
5562         struct cache_extent *cache;
5563         struct btrfs_key key;
5564
5565         cache = lookup_cache_extent(extent_cache, start, len);
5566         if (!cache)
5567                 return 0;
5568
5569         rec = container_of(cache, struct extent_record, cache);
5570         if (!is_extent_tree_record(rec))
5571                 return 0;
5572
5573         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5574         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5575 }
5576
5577 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5578                        struct extent_buffer *buf, int slot)
5579 {
5580         if (btrfs_header_level(buf)) {
5581                 struct btrfs_key_ptr ptr1, ptr2;
5582
5583                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5584                                    sizeof(struct btrfs_key_ptr));
5585                 read_extent_buffer(buf, &ptr2,
5586                                    btrfs_node_key_ptr_offset(slot + 1),
5587                                    sizeof(struct btrfs_key_ptr));
5588                 write_extent_buffer(buf, &ptr1,
5589                                     btrfs_node_key_ptr_offset(slot + 1),
5590                                     sizeof(struct btrfs_key_ptr));
5591                 write_extent_buffer(buf, &ptr2,
5592                                     btrfs_node_key_ptr_offset(slot),
5593                                     sizeof(struct btrfs_key_ptr));
5594                 if (slot == 0) {
5595                         struct btrfs_disk_key key;
5596                         btrfs_node_key(buf, &key, 0);
5597                         btrfs_fixup_low_keys(root, path, &key,
5598                                              btrfs_header_level(buf) + 1);
5599                 }
5600         } else {
5601                 struct btrfs_item *item1, *item2;
5602                 struct btrfs_key k1, k2;
5603                 char *item1_data, *item2_data;
5604                 u32 item1_offset, item2_offset, item1_size, item2_size;
5605
5606                 item1 = btrfs_item_nr(slot);
5607                 item2 = btrfs_item_nr(slot + 1);
5608                 btrfs_item_key_to_cpu(buf, &k1, slot);
5609                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5610                 item1_offset = btrfs_item_offset(buf, item1);
5611                 item2_offset = btrfs_item_offset(buf, item2);
5612                 item1_size = btrfs_item_size(buf, item1);
5613                 item2_size = btrfs_item_size(buf, item2);
5614
5615                 item1_data = malloc(item1_size);
5616                 if (!item1_data)
5617                         return -ENOMEM;
5618                 item2_data = malloc(item2_size);
5619                 if (!item2_data) {
5620                         free(item1_data);
5621                         return -ENOMEM;
5622                 }
5623
5624                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5625                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5626
5627                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5628                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5629                 free(item1_data);
5630                 free(item2_data);
5631
5632                 btrfs_set_item_offset(buf, item1, item2_offset);
5633                 btrfs_set_item_offset(buf, item2, item1_offset);
5634                 btrfs_set_item_size(buf, item1, item2_size);
5635                 btrfs_set_item_size(buf, item2, item1_size);
5636
5637                 path->slots[0] = slot;
5638                 btrfs_set_item_key_unsafe(root, path, &k2);
5639                 path->slots[0] = slot + 1;
5640                 btrfs_set_item_key_unsafe(root, path, &k1);
5641         }
5642         return 0;
5643 }
5644
5645 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5646 {
5647         struct extent_buffer *buf;
5648         struct btrfs_key k1, k2;
5649         int i;
5650         int level = path->lowest_level;
5651         int ret = -EIO;
5652
5653         buf = path->nodes[level];
5654         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5655                 if (level) {
5656                         btrfs_node_key_to_cpu(buf, &k1, i);
5657                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5658                 } else {
5659                         btrfs_item_key_to_cpu(buf, &k1, i);
5660                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5661                 }
5662                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5663                         continue;
5664                 ret = swap_values(root, path, buf, i);
5665                 if (ret)
5666                         break;
5667                 btrfs_mark_buffer_dirty(buf);
5668                 i = 0;
5669         }
5670         return ret;
5671 }
5672
5673 static int delete_bogus_item(struct btrfs_root *root,
5674                              struct btrfs_path *path,
5675                              struct extent_buffer *buf, int slot)
5676 {
5677         struct btrfs_key key;
5678         int nritems = btrfs_header_nritems(buf);
5679
5680         btrfs_item_key_to_cpu(buf, &key, slot);
5681
5682         /* These are all the keys we can deal with missing. */
5683         if (key.type != BTRFS_DIR_INDEX_KEY &&
5684             key.type != BTRFS_EXTENT_ITEM_KEY &&
5685             key.type != BTRFS_METADATA_ITEM_KEY &&
5686             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5687             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5688                 return -1;
5689
5690         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5691                (unsigned long long)key.objectid, key.type,
5692                (unsigned long long)key.offset, slot, buf->start);
5693         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5694                               btrfs_item_nr_offset(slot + 1),
5695                               sizeof(struct btrfs_item) *
5696                               (nritems - slot - 1));
5697         btrfs_set_header_nritems(buf, nritems - 1);
5698         if (slot == 0) {
5699                 struct btrfs_disk_key disk_key;
5700
5701                 btrfs_item_key(buf, &disk_key, 0);
5702                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5703         }
5704         btrfs_mark_buffer_dirty(buf);
5705         return 0;
5706 }
5707
5708 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5709 {
5710         struct extent_buffer *buf;
5711         int i;
5712         int ret = 0;
5713
5714         /* We should only get this for leaves */
5715         BUG_ON(path->lowest_level);
5716         buf = path->nodes[0];
5717 again:
5718         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5719                 unsigned int shift = 0, offset;
5720
5721                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5722                     BTRFS_LEAF_DATA_SIZE(root)) {
5723                         if (btrfs_item_end_nr(buf, i) >
5724                             BTRFS_LEAF_DATA_SIZE(root)) {
5725                                 ret = delete_bogus_item(root, path, buf, i);
5726                                 if (!ret)
5727                                         goto again;
5728                                 fprintf(stderr, "item is off the end of the "
5729                                         "leaf, can't fix\n");
5730                                 ret = -EIO;
5731                                 break;
5732                         }
5733                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5734                                 btrfs_item_end_nr(buf, i);
5735                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5736                            btrfs_item_offset_nr(buf, i - 1)) {
5737                         if (btrfs_item_end_nr(buf, i) >
5738                             btrfs_item_offset_nr(buf, i - 1)) {
5739                                 ret = delete_bogus_item(root, path, buf, i);
5740                                 if (!ret)
5741                                         goto again;
5742                                 fprintf(stderr, "items overlap, can't fix\n");
5743                                 ret = -EIO;
5744                                 break;
5745                         }
5746                         shift = btrfs_item_offset_nr(buf, i - 1) -
5747                                 btrfs_item_end_nr(buf, i);
5748                 }
5749                 if (!shift)
5750                         continue;
5751
5752                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5753                        i, shift, (unsigned long long)buf->start);
5754                 offset = btrfs_item_offset_nr(buf, i);
5755                 memmove_extent_buffer(buf,
5756                                       btrfs_leaf_data(buf) + offset + shift,
5757                                       btrfs_leaf_data(buf) + offset,
5758                                       btrfs_item_size_nr(buf, i));
5759                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5760                                       offset + shift);
5761                 btrfs_mark_buffer_dirty(buf);
5762         }
5763
5764         /*
5765          * We may have moved things, in which case we want to exit so we don't
5766          * write those changes out.  Once we have proper abort functionality in
5767          * progs this can be changed to something nicer.
5768          */
5769         BUG_ON(ret);
5770         return ret;
5771 }
5772
5773 /*
5774  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5775  * then just return -EIO.
5776  */
5777 static int try_to_fix_bad_block(struct btrfs_root *root,
5778                                 struct extent_buffer *buf,
5779                                 enum btrfs_tree_block_status status)
5780 {
5781         struct btrfs_trans_handle *trans;
5782         struct ulist *roots;
5783         struct ulist_node *node;
5784         struct btrfs_root *search_root;
5785         struct btrfs_path path;
5786         struct ulist_iterator iter;
5787         struct btrfs_key root_key, key;
5788         int ret;
5789
5790         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5791             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5792                 return -EIO;
5793
5794         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5795         if (ret)
5796                 return -EIO;
5797
5798         btrfs_init_path(&path);
5799         ULIST_ITER_INIT(&iter);
5800         while ((node = ulist_next(roots, &iter))) {
5801                 root_key.objectid = node->val;
5802                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5803                 root_key.offset = (u64)-1;
5804
5805                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5806                 if (IS_ERR(root)) {
5807                         ret = -EIO;
5808                         break;
5809                 }
5810
5811
5812                 trans = btrfs_start_transaction(search_root, 0);
5813                 if (IS_ERR(trans)) {
5814                         ret = PTR_ERR(trans);
5815                         break;
5816                 }
5817
5818                 path.lowest_level = btrfs_header_level(buf);
5819                 path.skip_check_block = 1;
5820                 if (path.lowest_level)
5821                         btrfs_node_key_to_cpu(buf, &key, 0);
5822                 else
5823                         btrfs_item_key_to_cpu(buf, &key, 0);
5824                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5825                 if (ret) {
5826                         ret = -EIO;
5827                         btrfs_commit_transaction(trans, search_root);
5828                         break;
5829                 }
5830                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5831                         ret = fix_key_order(search_root, &path);
5832                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5833                         ret = fix_item_offset(search_root, &path);
5834                 if (ret) {
5835                         btrfs_commit_transaction(trans, search_root);
5836                         break;
5837                 }
5838                 btrfs_release_path(&path);
5839                 btrfs_commit_transaction(trans, search_root);
5840         }
5841         ulist_free(roots);
5842         btrfs_release_path(&path);
5843         return ret;
5844 }
5845
5846 static int check_block(struct btrfs_root *root,
5847                        struct cache_tree *extent_cache,
5848                        struct extent_buffer *buf, u64 flags)
5849 {
5850         struct extent_record *rec;
5851         struct cache_extent *cache;
5852         struct btrfs_key key;
5853         enum btrfs_tree_block_status status;
5854         int ret = 0;
5855         int level;
5856
5857         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5858         if (!cache)
5859                 return 1;
5860         rec = container_of(cache, struct extent_record, cache);
5861         rec->generation = btrfs_header_generation(buf);
5862
5863         level = btrfs_header_level(buf);
5864         if (btrfs_header_nritems(buf) > 0) {
5865
5866                 if (level == 0)
5867                         btrfs_item_key_to_cpu(buf, &key, 0);
5868                 else
5869                         btrfs_node_key_to_cpu(buf, &key, 0);
5870
5871                 rec->info_objectid = key.objectid;
5872         }
5873         rec->info_level = level;
5874
5875         if (btrfs_is_leaf(buf))
5876                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5877         else
5878                 status = btrfs_check_node(root, &rec->parent_key, buf);
5879
5880         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5881                 if (repair)
5882                         status = try_to_fix_bad_block(root, buf, status);
5883                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5884                         ret = -EIO;
5885                         fprintf(stderr, "bad block %llu\n",
5886                                 (unsigned long long)buf->start);
5887                 } else {
5888                         /*
5889                          * Signal to callers we need to start the scan over
5890                          * again since we'll have cowed blocks.
5891                          */
5892                         ret = -EAGAIN;
5893                 }
5894         } else {
5895                 rec->content_checked = 1;
5896                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5897                         rec->owner_ref_checked = 1;
5898                 else {
5899                         ret = check_owner_ref(root, rec, buf);
5900                         if (!ret)
5901                                 rec->owner_ref_checked = 1;
5902                 }
5903         }
5904         if (!ret)
5905                 maybe_free_extent_rec(extent_cache, rec);
5906         return ret;
5907 }
5908
5909 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5910                                                 u64 parent, u64 root)
5911 {
5912         struct list_head *cur = rec->backrefs.next;
5913         struct extent_backref *node;
5914         struct tree_backref *back;
5915
5916         while(cur != &rec->backrefs) {
5917                 node = to_extent_backref(cur);
5918                 cur = cur->next;
5919                 if (node->is_data)
5920                         continue;
5921                 back = to_tree_backref(node);
5922                 if (parent > 0) {
5923                         if (!node->full_backref)
5924                                 continue;
5925                         if (parent == back->parent)
5926                                 return back;
5927                 } else {
5928                         if (node->full_backref)
5929                                 continue;
5930                         if (back->root == root)
5931                                 return back;
5932                 }
5933         }
5934         return NULL;
5935 }
5936
5937 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5938                                                 u64 parent, u64 root)
5939 {
5940         struct tree_backref *ref = malloc(sizeof(*ref));
5941
5942         if (!ref)
5943                 return NULL;
5944         memset(&ref->node, 0, sizeof(ref->node));
5945         if (parent > 0) {
5946                 ref->parent = parent;
5947                 ref->node.full_backref = 1;
5948         } else {
5949                 ref->root = root;
5950                 ref->node.full_backref = 0;
5951         }
5952         list_add_tail(&ref->node.list, &rec->backrefs);
5953
5954         return ref;
5955 }
5956
5957 static struct data_backref *find_data_backref(struct extent_record *rec,
5958                                                 u64 parent, u64 root,
5959                                                 u64 owner, u64 offset,
5960                                                 int found_ref,
5961                                                 u64 disk_bytenr, u64 bytes)
5962 {
5963         struct list_head *cur = rec->backrefs.next;
5964         struct extent_backref *node;
5965         struct data_backref *back;
5966
5967         while(cur != &rec->backrefs) {
5968                 node = to_extent_backref(cur);
5969                 cur = cur->next;
5970                 if (!node->is_data)
5971                         continue;
5972                 back = to_data_backref(node);
5973                 if (parent > 0) {
5974                         if (!node->full_backref)
5975                                 continue;
5976                         if (parent == back->parent)
5977                                 return back;
5978                 } else {
5979                         if (node->full_backref)
5980                                 continue;
5981                         if (back->root == root && back->owner == owner &&
5982                             back->offset == offset) {
5983                                 if (found_ref && node->found_ref &&
5984                                     (back->bytes != bytes ||
5985                                     back->disk_bytenr != disk_bytenr))
5986                                         continue;
5987                                 return back;
5988                         }
5989                 }
5990         }
5991         return NULL;
5992 }
5993
5994 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5995                                                 u64 parent, u64 root,
5996                                                 u64 owner, u64 offset,
5997                                                 u64 max_size)
5998 {
5999         struct data_backref *ref = malloc(sizeof(*ref));
6000
6001         if (!ref)
6002                 return NULL;
6003         memset(&ref->node, 0, sizeof(ref->node));
6004         ref->node.is_data = 1;
6005
6006         if (parent > 0) {
6007                 ref->parent = parent;
6008                 ref->owner = 0;
6009                 ref->offset = 0;
6010                 ref->node.full_backref = 1;
6011         } else {
6012                 ref->root = root;
6013                 ref->owner = owner;
6014                 ref->offset = offset;
6015                 ref->node.full_backref = 0;
6016         }
6017         ref->bytes = max_size;
6018         ref->found_ref = 0;
6019         ref->num_refs = 0;
6020         list_add_tail(&ref->node.list, &rec->backrefs);
6021         if (max_size > rec->max_size)
6022                 rec->max_size = max_size;
6023         return ref;
6024 }
6025
6026 /* Check if the type of extent matches with its chunk */
6027 static void check_extent_type(struct extent_record *rec)
6028 {
6029         struct btrfs_block_group_cache *bg_cache;
6030
6031         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6032         if (!bg_cache)
6033                 return;
6034
6035         /* data extent, check chunk directly*/
6036         if (!rec->metadata) {
6037                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6038                         rec->wrong_chunk_type = 1;
6039                 return;
6040         }
6041
6042         /* metadata extent, check the obvious case first */
6043         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6044                                  BTRFS_BLOCK_GROUP_METADATA))) {
6045                 rec->wrong_chunk_type = 1;
6046                 return;
6047         }
6048
6049         /*
6050          * Check SYSTEM extent, as it's also marked as metadata, we can only
6051          * make sure it's a SYSTEM extent by its backref
6052          */
6053         if (!list_empty(&rec->backrefs)) {
6054                 struct extent_backref *node;
6055                 struct tree_backref *tback;
6056                 u64 bg_type;
6057
6058                 node = to_extent_backref(rec->backrefs.next);
6059                 if (node->is_data) {
6060                         /* tree block shouldn't have data backref */
6061                         rec->wrong_chunk_type = 1;
6062                         return;
6063                 }
6064                 tback = container_of(node, struct tree_backref, node);
6065
6066                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6067                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6068                 else
6069                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6070                 if (!(bg_cache->flags & bg_type))
6071                         rec->wrong_chunk_type = 1;
6072         }
6073 }
6074
6075 /*
6076  * Allocate a new extent record, fill default values from @tmpl and insert int
6077  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6078  * the cache, otherwise it fails.
6079  */
6080 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6081                 struct extent_record *tmpl)
6082 {
6083         struct extent_record *rec;
6084         int ret = 0;
6085
6086         BUG_ON(tmpl->max_size == 0);
6087         rec = malloc(sizeof(*rec));
6088         if (!rec)
6089                 return -ENOMEM;
6090         rec->start = tmpl->start;
6091         rec->max_size = tmpl->max_size;
6092         rec->nr = max(tmpl->nr, tmpl->max_size);
6093         rec->found_rec = tmpl->found_rec;
6094         rec->content_checked = tmpl->content_checked;
6095         rec->owner_ref_checked = tmpl->owner_ref_checked;
6096         rec->num_duplicates = 0;
6097         rec->metadata = tmpl->metadata;
6098         rec->flag_block_full_backref = FLAG_UNSET;
6099         rec->bad_full_backref = 0;
6100         rec->crossing_stripes = 0;
6101         rec->wrong_chunk_type = 0;
6102         rec->is_root = tmpl->is_root;
6103         rec->refs = tmpl->refs;
6104         rec->extent_item_refs = tmpl->extent_item_refs;
6105         rec->parent_generation = tmpl->parent_generation;
6106         INIT_LIST_HEAD(&rec->backrefs);
6107         INIT_LIST_HEAD(&rec->dups);
6108         INIT_LIST_HEAD(&rec->list);
6109         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6110         rec->cache.start = tmpl->start;
6111         rec->cache.size = tmpl->nr;
6112         ret = insert_cache_extent(extent_cache, &rec->cache);
6113         if (ret) {
6114                 free(rec);
6115                 return ret;
6116         }
6117         bytes_used += rec->nr;
6118
6119         if (tmpl->metadata)
6120                 rec->crossing_stripes = check_crossing_stripes(global_info,
6121                                 rec->start, global_info->nodesize);
6122         check_extent_type(rec);
6123         return ret;
6124 }
6125
6126 /*
6127  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6128  * some are hints:
6129  * - refs              - if found, increase refs
6130  * - is_root           - if found, set
6131  * - content_checked   - if found, set
6132  * - owner_ref_checked - if found, set
6133  *
6134  * If not found, create a new one, initialize and insert.
6135  */
6136 static int add_extent_rec(struct cache_tree *extent_cache,
6137                 struct extent_record *tmpl)
6138 {
6139         struct extent_record *rec;
6140         struct cache_extent *cache;
6141         int ret = 0;
6142         int dup = 0;
6143
6144         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6145         if (cache) {
6146                 rec = container_of(cache, struct extent_record, cache);
6147                 if (tmpl->refs)
6148                         rec->refs++;
6149                 if (rec->nr == 1)
6150                         rec->nr = max(tmpl->nr, tmpl->max_size);
6151
6152                 /*
6153                  * We need to make sure to reset nr to whatever the extent
6154                  * record says was the real size, this way we can compare it to
6155                  * the backrefs.
6156                  */
6157                 if (tmpl->found_rec) {
6158                         if (tmpl->start != rec->start || rec->found_rec) {
6159                                 struct extent_record *tmp;
6160
6161                                 dup = 1;
6162                                 if (list_empty(&rec->list))
6163                                         list_add_tail(&rec->list,
6164                                                       &duplicate_extents);
6165
6166                                 /*
6167                                  * We have to do this song and dance in case we
6168                                  * find an extent record that falls inside of
6169                                  * our current extent record but does not have
6170                                  * the same objectid.
6171                                  */
6172                                 tmp = malloc(sizeof(*tmp));
6173                                 if (!tmp)
6174                                         return -ENOMEM;
6175                                 tmp->start = tmpl->start;
6176                                 tmp->max_size = tmpl->max_size;
6177                                 tmp->nr = tmpl->nr;
6178                                 tmp->found_rec = 1;
6179                                 tmp->metadata = tmpl->metadata;
6180                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6181                                 INIT_LIST_HEAD(&tmp->list);
6182                                 list_add_tail(&tmp->list, &rec->dups);
6183                                 rec->num_duplicates++;
6184                         } else {
6185                                 rec->nr = tmpl->nr;
6186                                 rec->found_rec = 1;
6187                         }
6188                 }
6189
6190                 if (tmpl->extent_item_refs && !dup) {
6191                         if (rec->extent_item_refs) {
6192                                 fprintf(stderr, "block %llu rec "
6193                                         "extent_item_refs %llu, passed %llu\n",
6194                                         (unsigned long long)tmpl->start,
6195                                         (unsigned long long)
6196                                                         rec->extent_item_refs,
6197                                         (unsigned long long)tmpl->extent_item_refs);
6198                         }
6199                         rec->extent_item_refs = tmpl->extent_item_refs;
6200                 }
6201                 if (tmpl->is_root)
6202                         rec->is_root = 1;
6203                 if (tmpl->content_checked)
6204                         rec->content_checked = 1;
6205                 if (tmpl->owner_ref_checked)
6206                         rec->owner_ref_checked = 1;
6207                 memcpy(&rec->parent_key, &tmpl->parent_key,
6208                                 sizeof(tmpl->parent_key));
6209                 if (tmpl->parent_generation)
6210                         rec->parent_generation = tmpl->parent_generation;
6211                 if (rec->max_size < tmpl->max_size)
6212                         rec->max_size = tmpl->max_size;
6213
6214                 /*
6215                  * A metadata extent can't cross stripe_len boundary, otherwise
6216                  * kernel scrub won't be able to handle it.
6217                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6218                  * it.
6219                  */
6220                 if (tmpl->metadata)
6221                         rec->crossing_stripes = check_crossing_stripes(
6222                                         global_info, rec->start,
6223                                         global_info->nodesize);
6224                 check_extent_type(rec);
6225                 maybe_free_extent_rec(extent_cache, rec);
6226                 return ret;
6227         }
6228
6229         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6230
6231         return ret;
6232 }
6233
6234 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6235                             u64 parent, u64 root, int found_ref)
6236 {
6237         struct extent_record *rec;
6238         struct tree_backref *back;
6239         struct cache_extent *cache;
6240         int ret;
6241
6242         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6243         if (!cache) {
6244                 struct extent_record tmpl;
6245
6246                 memset(&tmpl, 0, sizeof(tmpl));
6247                 tmpl.start = bytenr;
6248                 tmpl.nr = 1;
6249                 tmpl.metadata = 1;
6250                 tmpl.max_size = 1;
6251
6252                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6253                 if (ret)
6254                         return ret;
6255
6256                 /* really a bug in cache_extent implement now */
6257                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6258                 if (!cache)
6259                         return -ENOENT;
6260         }
6261
6262         rec = container_of(cache, struct extent_record, cache);
6263         if (rec->start != bytenr) {
6264                 /*
6265                  * Several cause, from unaligned bytenr to over lapping extents
6266                  */
6267                 return -EEXIST;
6268         }
6269
6270         back = find_tree_backref(rec, parent, root);
6271         if (!back) {
6272                 back = alloc_tree_backref(rec, parent, root);
6273                 if (!back)
6274                         return -ENOMEM;
6275         }
6276
6277         if (found_ref) {
6278                 if (back->node.found_ref) {
6279                         fprintf(stderr, "Extent back ref already exists "
6280                                 "for %llu parent %llu root %llu \n",
6281                                 (unsigned long long)bytenr,
6282                                 (unsigned long long)parent,
6283                                 (unsigned long long)root);
6284                 }
6285                 back->node.found_ref = 1;
6286         } else {
6287                 if (back->node.found_extent_tree) {
6288                         fprintf(stderr, "Extent back ref already exists "
6289                                 "for %llu parent %llu root %llu \n",
6290                                 (unsigned long long)bytenr,
6291                                 (unsigned long long)parent,
6292                                 (unsigned long long)root);
6293                 }
6294                 back->node.found_extent_tree = 1;
6295         }
6296         check_extent_type(rec);
6297         maybe_free_extent_rec(extent_cache, rec);
6298         return 0;
6299 }
6300
6301 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6302                             u64 parent, u64 root, u64 owner, u64 offset,
6303                             u32 num_refs, int found_ref, u64 max_size)
6304 {
6305         struct extent_record *rec;
6306         struct data_backref *back;
6307         struct cache_extent *cache;
6308         int ret;
6309
6310         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6311         if (!cache) {
6312                 struct extent_record tmpl;
6313
6314                 memset(&tmpl, 0, sizeof(tmpl));
6315                 tmpl.start = bytenr;
6316                 tmpl.nr = 1;
6317                 tmpl.max_size = max_size;
6318
6319                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6320                 if (ret)
6321                         return ret;
6322
6323                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6324                 if (!cache)
6325                         abort();
6326         }
6327
6328         rec = container_of(cache, struct extent_record, cache);
6329         if (rec->max_size < max_size)
6330                 rec->max_size = max_size;
6331
6332         /*
6333          * If found_ref is set then max_size is the real size and must match the
6334          * existing refs.  So if we have already found a ref then we need to
6335          * make sure that this ref matches the existing one, otherwise we need
6336          * to add a new backref so we can notice that the backrefs don't match
6337          * and we need to figure out who is telling the truth.  This is to
6338          * account for that awful fsync bug I introduced where we'd end up with
6339          * a btrfs_file_extent_item that would have its length include multiple
6340          * prealloc extents or point inside of a prealloc extent.
6341          */
6342         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6343                                  bytenr, max_size);
6344         if (!back) {
6345                 back = alloc_data_backref(rec, parent, root, owner, offset,
6346                                           max_size);
6347                 BUG_ON(!back);
6348         }
6349
6350         if (found_ref) {
6351                 BUG_ON(num_refs != 1);
6352                 if (back->node.found_ref)
6353                         BUG_ON(back->bytes != max_size);
6354                 back->node.found_ref = 1;
6355                 back->found_ref += 1;
6356                 back->bytes = max_size;
6357                 back->disk_bytenr = bytenr;
6358                 rec->refs += 1;
6359                 rec->content_checked = 1;
6360                 rec->owner_ref_checked = 1;
6361         } else {
6362                 if (back->node.found_extent_tree) {
6363                         fprintf(stderr, "Extent back ref already exists "
6364                                 "for %llu parent %llu root %llu "
6365                                 "owner %llu offset %llu num_refs %lu\n",
6366                                 (unsigned long long)bytenr,
6367                                 (unsigned long long)parent,
6368                                 (unsigned long long)root,
6369                                 (unsigned long long)owner,
6370                                 (unsigned long long)offset,
6371                                 (unsigned long)num_refs);
6372                 }
6373                 back->num_refs = num_refs;
6374                 back->node.found_extent_tree = 1;
6375         }
6376         maybe_free_extent_rec(extent_cache, rec);
6377         return 0;
6378 }
6379
6380 static int add_pending(struct cache_tree *pending,
6381                        struct cache_tree *seen, u64 bytenr, u32 size)
6382 {
6383         int ret;
6384         ret = add_cache_extent(seen, bytenr, size);
6385         if (ret)
6386                 return ret;
6387         add_cache_extent(pending, bytenr, size);
6388         return 0;
6389 }
6390
6391 static int pick_next_pending(struct cache_tree *pending,
6392                         struct cache_tree *reada,
6393                         struct cache_tree *nodes,
6394                         u64 last, struct block_info *bits, int bits_nr,
6395                         int *reada_bits)
6396 {
6397         unsigned long node_start = last;
6398         struct cache_extent *cache;
6399         int ret;
6400
6401         cache = search_cache_extent(reada, 0);
6402         if (cache) {
6403                 bits[0].start = cache->start;
6404                 bits[0].size = cache->size;
6405                 *reada_bits = 1;
6406                 return 1;
6407         }
6408         *reada_bits = 0;
6409         if (node_start > 32768)
6410                 node_start -= 32768;
6411
6412         cache = search_cache_extent(nodes, node_start);
6413         if (!cache)
6414                 cache = search_cache_extent(nodes, 0);
6415
6416         if (!cache) {
6417                  cache = search_cache_extent(pending, 0);
6418                  if (!cache)
6419                          return 0;
6420                  ret = 0;
6421                  do {
6422                          bits[ret].start = cache->start;
6423                          bits[ret].size = cache->size;
6424                          cache = next_cache_extent(cache);
6425                          ret++;
6426                  } while (cache && ret < bits_nr);
6427                  return ret;
6428         }
6429
6430         ret = 0;
6431         do {
6432                 bits[ret].start = cache->start;
6433                 bits[ret].size = cache->size;
6434                 cache = next_cache_extent(cache);
6435                 ret++;
6436         } while (cache && ret < bits_nr);
6437
6438         if (bits_nr - ret > 8) {
6439                 u64 lookup = bits[0].start + bits[0].size;
6440                 struct cache_extent *next;
6441                 next = search_cache_extent(pending, lookup);
6442                 while(next) {
6443                         if (next->start - lookup > 32768)
6444                                 break;
6445                         bits[ret].start = next->start;
6446                         bits[ret].size = next->size;
6447                         lookup = next->start + next->size;
6448                         ret++;
6449                         if (ret == bits_nr)
6450                                 break;
6451                         next = next_cache_extent(next);
6452                         if (!next)
6453                                 break;
6454                 }
6455         }
6456         return ret;
6457 }
6458
6459 static void free_chunk_record(struct cache_extent *cache)
6460 {
6461         struct chunk_record *rec;
6462
6463         rec = container_of(cache, struct chunk_record, cache);
6464         list_del_init(&rec->list);
6465         list_del_init(&rec->dextents);
6466         free(rec);
6467 }
6468
6469 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6470 {
6471         cache_tree_free_extents(chunk_cache, free_chunk_record);
6472 }
6473
6474 static void free_device_record(struct rb_node *node)
6475 {
6476         struct device_record *rec;
6477
6478         rec = container_of(node, struct device_record, node);
6479         free(rec);
6480 }
6481
6482 FREE_RB_BASED_TREE(device_cache, free_device_record);
6483
6484 int insert_block_group_record(struct block_group_tree *tree,
6485                               struct block_group_record *bg_rec)
6486 {
6487         int ret;
6488
6489         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6490         if (ret)
6491                 return ret;
6492
6493         list_add_tail(&bg_rec->list, &tree->block_groups);
6494         return 0;
6495 }
6496
6497 static void free_block_group_record(struct cache_extent *cache)
6498 {
6499         struct block_group_record *rec;
6500
6501         rec = container_of(cache, struct block_group_record, cache);
6502         list_del_init(&rec->list);
6503         free(rec);
6504 }
6505
6506 void free_block_group_tree(struct block_group_tree *tree)
6507 {
6508         cache_tree_free_extents(&tree->tree, free_block_group_record);
6509 }
6510
6511 int insert_device_extent_record(struct device_extent_tree *tree,
6512                                 struct device_extent_record *de_rec)
6513 {
6514         int ret;
6515
6516         /*
6517          * Device extent is a bit different from the other extents, because
6518          * the extents which belong to the different devices may have the
6519          * same start and size, so we need use the special extent cache
6520          * search/insert functions.
6521          */
6522         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6523         if (ret)
6524                 return ret;
6525
6526         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6527         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6528         return 0;
6529 }
6530
6531 static void free_device_extent_record(struct cache_extent *cache)
6532 {
6533         struct device_extent_record *rec;
6534
6535         rec = container_of(cache, struct device_extent_record, cache);
6536         if (!list_empty(&rec->chunk_list))
6537                 list_del_init(&rec->chunk_list);
6538         if (!list_empty(&rec->device_list))
6539                 list_del_init(&rec->device_list);
6540         free(rec);
6541 }
6542
6543 void free_device_extent_tree(struct device_extent_tree *tree)
6544 {
6545         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6546 }
6547
6548 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6549 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6550                                  struct extent_buffer *leaf, int slot)
6551 {
6552         struct btrfs_extent_ref_v0 *ref0;
6553         struct btrfs_key key;
6554         int ret;
6555
6556         btrfs_item_key_to_cpu(leaf, &key, slot);
6557         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6558         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6559                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6560                                 0, 0);
6561         } else {
6562                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6563                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6564         }
6565         return ret;
6566 }
6567 #endif
6568
6569 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6570                                             struct btrfs_key *key,
6571                                             int slot)
6572 {
6573         struct btrfs_chunk *ptr;
6574         struct chunk_record *rec;
6575         int num_stripes, i;
6576
6577         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6578         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6579
6580         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6581         if (!rec) {
6582                 fprintf(stderr, "memory allocation failed\n");
6583                 exit(-1);
6584         }
6585
6586         INIT_LIST_HEAD(&rec->list);
6587         INIT_LIST_HEAD(&rec->dextents);
6588         rec->bg_rec = NULL;
6589
6590         rec->cache.start = key->offset;
6591         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6592
6593         rec->generation = btrfs_header_generation(leaf);
6594
6595         rec->objectid = key->objectid;
6596         rec->type = key->type;
6597         rec->offset = key->offset;
6598
6599         rec->length = rec->cache.size;
6600         rec->owner = btrfs_chunk_owner(leaf, ptr);
6601         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6602         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6603         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6604         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6605         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6606         rec->num_stripes = num_stripes;
6607         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6608
6609         for (i = 0; i < rec->num_stripes; ++i) {
6610                 rec->stripes[i].devid =
6611                         btrfs_stripe_devid_nr(leaf, ptr, i);
6612                 rec->stripes[i].offset =
6613                         btrfs_stripe_offset_nr(leaf, ptr, i);
6614                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6615                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6616                                 BTRFS_UUID_SIZE);
6617         }
6618
6619         return rec;
6620 }
6621
6622 static int process_chunk_item(struct cache_tree *chunk_cache,
6623                               struct btrfs_key *key, struct extent_buffer *eb,
6624                               int slot)
6625 {
6626         struct chunk_record *rec;
6627         struct btrfs_chunk *chunk;
6628         int ret = 0;
6629
6630         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6631         /*
6632          * Do extra check for this chunk item,
6633          *
6634          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6635          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6636          * and owner<->key_type check.
6637          */
6638         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6639                                       key->offset);
6640         if (ret < 0) {
6641                 error("chunk(%llu, %llu) is not valid, ignore it",
6642                       key->offset, btrfs_chunk_length(eb, chunk));
6643                 return 0;
6644         }
6645         rec = btrfs_new_chunk_record(eb, key, slot);
6646         ret = insert_cache_extent(chunk_cache, &rec->cache);
6647         if (ret) {
6648                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6649                         rec->offset, rec->length);
6650                 free(rec);
6651         }
6652
6653         return ret;
6654 }
6655
6656 static int process_device_item(struct rb_root *dev_cache,
6657                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6658 {
6659         struct btrfs_dev_item *ptr;
6660         struct device_record *rec;
6661         int ret = 0;
6662
6663         ptr = btrfs_item_ptr(eb,
6664                 slot, struct btrfs_dev_item);
6665
6666         rec = malloc(sizeof(*rec));
6667         if (!rec) {
6668                 fprintf(stderr, "memory allocation failed\n");
6669                 return -ENOMEM;
6670         }
6671
6672         rec->devid = key->offset;
6673         rec->generation = btrfs_header_generation(eb);
6674
6675         rec->objectid = key->objectid;
6676         rec->type = key->type;
6677         rec->offset = key->offset;
6678
6679         rec->devid = btrfs_device_id(eb, ptr);
6680         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6681         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6682
6683         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6684         if (ret) {
6685                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6686                 free(rec);
6687         }
6688
6689         return ret;
6690 }
6691
6692 struct block_group_record *
6693 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6694                              int slot)
6695 {
6696         struct btrfs_block_group_item *ptr;
6697         struct block_group_record *rec;
6698
6699         rec = calloc(1, sizeof(*rec));
6700         if (!rec) {
6701                 fprintf(stderr, "memory allocation failed\n");
6702                 exit(-1);
6703         }
6704
6705         rec->cache.start = key->objectid;
6706         rec->cache.size = key->offset;
6707
6708         rec->generation = btrfs_header_generation(leaf);
6709
6710         rec->objectid = key->objectid;
6711         rec->type = key->type;
6712         rec->offset = key->offset;
6713
6714         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6715         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6716
6717         INIT_LIST_HEAD(&rec->list);
6718
6719         return rec;
6720 }
6721
6722 static int process_block_group_item(struct block_group_tree *block_group_cache,
6723                                     struct btrfs_key *key,
6724                                     struct extent_buffer *eb, int slot)
6725 {
6726         struct block_group_record *rec;
6727         int ret = 0;
6728
6729         rec = btrfs_new_block_group_record(eb, key, slot);
6730         ret = insert_block_group_record(block_group_cache, rec);
6731         if (ret) {
6732                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6733                         rec->objectid, rec->offset);
6734                 free(rec);
6735         }
6736
6737         return ret;
6738 }
6739
6740 struct device_extent_record *
6741 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6742                                struct btrfs_key *key, int slot)
6743 {
6744         struct device_extent_record *rec;
6745         struct btrfs_dev_extent *ptr;
6746
6747         rec = calloc(1, sizeof(*rec));
6748         if (!rec) {
6749                 fprintf(stderr, "memory allocation failed\n");
6750                 exit(-1);
6751         }
6752
6753         rec->cache.objectid = key->objectid;
6754         rec->cache.start = key->offset;
6755
6756         rec->generation = btrfs_header_generation(leaf);
6757
6758         rec->objectid = key->objectid;
6759         rec->type = key->type;
6760         rec->offset = key->offset;
6761
6762         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6763         rec->chunk_objecteid =
6764                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6765         rec->chunk_offset =
6766                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6767         rec->length = btrfs_dev_extent_length(leaf, ptr);
6768         rec->cache.size = rec->length;
6769
6770         INIT_LIST_HEAD(&rec->chunk_list);
6771         INIT_LIST_HEAD(&rec->device_list);
6772
6773         return rec;
6774 }
6775
6776 static int
6777 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6778                            struct btrfs_key *key, struct extent_buffer *eb,
6779                            int slot)
6780 {
6781         struct device_extent_record *rec;
6782         int ret;
6783
6784         rec = btrfs_new_device_extent_record(eb, key, slot);
6785         ret = insert_device_extent_record(dev_extent_cache, rec);
6786         if (ret) {
6787                 fprintf(stderr,
6788                         "Device extent[%llu, %llu, %llu] existed.\n",
6789                         rec->objectid, rec->offset, rec->length);
6790                 free(rec);
6791         }
6792
6793         return ret;
6794 }
6795
6796 static int process_extent_item(struct btrfs_root *root,
6797                                struct cache_tree *extent_cache,
6798                                struct extent_buffer *eb, int slot)
6799 {
6800         struct btrfs_extent_item *ei;
6801         struct btrfs_extent_inline_ref *iref;
6802         struct btrfs_extent_data_ref *dref;
6803         struct btrfs_shared_data_ref *sref;
6804         struct btrfs_key key;
6805         struct extent_record tmpl;
6806         unsigned long end;
6807         unsigned long ptr;
6808         int ret;
6809         int type;
6810         u32 item_size = btrfs_item_size_nr(eb, slot);
6811         u64 refs = 0;
6812         u64 offset;
6813         u64 num_bytes;
6814         int metadata = 0;
6815
6816         btrfs_item_key_to_cpu(eb, &key, slot);
6817
6818         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6819                 metadata = 1;
6820                 num_bytes = root->fs_info->nodesize;
6821         } else {
6822                 num_bytes = key.offset;
6823         }
6824
6825         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6826                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6827                       key.objectid, root->fs_info->sectorsize);
6828                 return -EIO;
6829         }
6830         if (item_size < sizeof(*ei)) {
6831 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6832                 struct btrfs_extent_item_v0 *ei0;
6833                 BUG_ON(item_size != sizeof(*ei0));
6834                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6835                 refs = btrfs_extent_refs_v0(eb, ei0);
6836 #else
6837                 BUG();
6838 #endif
6839                 memset(&tmpl, 0, sizeof(tmpl));
6840                 tmpl.start = key.objectid;
6841                 tmpl.nr = num_bytes;
6842                 tmpl.extent_item_refs = refs;
6843                 tmpl.metadata = metadata;
6844                 tmpl.found_rec = 1;
6845                 tmpl.max_size = num_bytes;
6846
6847                 return add_extent_rec(extent_cache, &tmpl);
6848         }
6849
6850         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6851         refs = btrfs_extent_refs(eb, ei);
6852         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6853                 metadata = 1;
6854         else
6855                 metadata = 0;
6856         if (metadata && num_bytes != root->fs_info->nodesize) {
6857                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6858                       num_bytes, root->fs_info->nodesize);
6859                 return -EIO;
6860         }
6861         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6862                 error("ignore invalid data extent, length %llu is not aligned to %u",
6863                       num_bytes, root->fs_info->sectorsize);
6864                 return -EIO;
6865         }
6866
6867         memset(&tmpl, 0, sizeof(tmpl));
6868         tmpl.start = key.objectid;
6869         tmpl.nr = num_bytes;
6870         tmpl.extent_item_refs = refs;
6871         tmpl.metadata = metadata;
6872         tmpl.found_rec = 1;
6873         tmpl.max_size = num_bytes;
6874         add_extent_rec(extent_cache, &tmpl);
6875
6876         ptr = (unsigned long)(ei + 1);
6877         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6878             key.type == BTRFS_EXTENT_ITEM_KEY)
6879                 ptr += sizeof(struct btrfs_tree_block_info);
6880
6881         end = (unsigned long)ei + item_size;
6882         while (ptr < end) {
6883                 iref = (struct btrfs_extent_inline_ref *)ptr;
6884                 type = btrfs_extent_inline_ref_type(eb, iref);
6885                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6886                 switch (type) {
6887                 case BTRFS_TREE_BLOCK_REF_KEY:
6888                         ret = add_tree_backref(extent_cache, key.objectid,
6889                                         0, offset, 0);
6890                         if (ret < 0)
6891                                 error(
6892                         "add_tree_backref failed (extent items tree block): %s",
6893                                       strerror(-ret));
6894                         break;
6895                 case BTRFS_SHARED_BLOCK_REF_KEY:
6896                         ret = add_tree_backref(extent_cache, key.objectid,
6897                                         offset, 0, 0);
6898                         if (ret < 0)
6899                                 error(
6900                         "add_tree_backref failed (extent items shared block): %s",
6901                                       strerror(-ret));
6902                         break;
6903                 case BTRFS_EXTENT_DATA_REF_KEY:
6904                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6905                         add_data_backref(extent_cache, key.objectid, 0,
6906                                         btrfs_extent_data_ref_root(eb, dref),
6907                                         btrfs_extent_data_ref_objectid(eb,
6908                                                                        dref),
6909                                         btrfs_extent_data_ref_offset(eb, dref),
6910                                         btrfs_extent_data_ref_count(eb, dref),
6911                                         0, num_bytes);
6912                         break;
6913                 case BTRFS_SHARED_DATA_REF_KEY:
6914                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6915                         add_data_backref(extent_cache, key.objectid, offset,
6916                                         0, 0, 0,
6917                                         btrfs_shared_data_ref_count(eb, sref),
6918                                         0, num_bytes);
6919                         break;
6920                 default:
6921                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6922                                 key.objectid, key.type, num_bytes);
6923                         goto out;
6924                 }
6925                 ptr += btrfs_extent_inline_ref_size(type);
6926         }
6927         WARN_ON(ptr > end);
6928 out:
6929         return 0;
6930 }
6931
6932 static int check_cache_range(struct btrfs_root *root,
6933                              struct btrfs_block_group_cache *cache,
6934                              u64 offset, u64 bytes)
6935 {
6936         struct btrfs_free_space *entry;
6937         u64 *logical;
6938         u64 bytenr;
6939         int stripe_len;
6940         int i, nr, ret;
6941
6942         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6943                 bytenr = btrfs_sb_offset(i);
6944                 ret = btrfs_rmap_block(root->fs_info,
6945                                        cache->key.objectid, bytenr, 0,
6946                                        &logical, &nr, &stripe_len);
6947                 if (ret)
6948                         return ret;
6949
6950                 while (nr--) {
6951                         if (logical[nr] + stripe_len <= offset)
6952                                 continue;
6953                         if (offset + bytes <= logical[nr])
6954                                 continue;
6955                         if (logical[nr] == offset) {
6956                                 if (stripe_len >= bytes) {
6957                                         free(logical);
6958                                         return 0;
6959                                 }
6960                                 bytes -= stripe_len;
6961                                 offset += stripe_len;
6962                         } else if (logical[nr] < offset) {
6963                                 if (logical[nr] + stripe_len >=
6964                                     offset + bytes) {
6965                                         free(logical);
6966                                         return 0;
6967                                 }
6968                                 bytes = (offset + bytes) -
6969                                         (logical[nr] + stripe_len);
6970                                 offset = logical[nr] + stripe_len;
6971                         } else {
6972                                 /*
6973                                  * Could be tricky, the super may land in the
6974                                  * middle of the area we're checking.  First
6975                                  * check the easiest case, it's at the end.
6976                                  */
6977                                 if (logical[nr] + stripe_len >=
6978                                     bytes + offset) {
6979                                         bytes = logical[nr] - offset;
6980                                         continue;
6981                                 }
6982
6983                                 /* Check the left side */
6984                                 ret = check_cache_range(root, cache,
6985                                                         offset,
6986                                                         logical[nr] - offset);
6987                                 if (ret) {
6988                                         free(logical);
6989                                         return ret;
6990                                 }
6991
6992                                 /* Now we continue with the right side */
6993                                 bytes = (offset + bytes) -
6994                                         (logical[nr] + stripe_len);
6995                                 offset = logical[nr] + stripe_len;
6996                         }
6997                 }
6998
6999                 free(logical);
7000         }
7001
7002         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7003         if (!entry) {
7004                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7005                         offset, offset+bytes);
7006                 return -EINVAL;
7007         }
7008
7009         if (entry->offset != offset) {
7010                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7011                         entry->offset);
7012                 return -EINVAL;
7013         }
7014
7015         if (entry->bytes != bytes) {
7016                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7017                         bytes, entry->bytes, offset);
7018                 return -EINVAL;
7019         }
7020
7021         unlink_free_space(cache->free_space_ctl, entry);
7022         free(entry);
7023         return 0;
7024 }
7025
7026 static int verify_space_cache(struct btrfs_root *root,
7027                               struct btrfs_block_group_cache *cache)
7028 {
7029         struct btrfs_path path;
7030         struct extent_buffer *leaf;
7031         struct btrfs_key key;
7032         u64 last;
7033         int ret = 0;
7034
7035         root = root->fs_info->extent_root;
7036
7037         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7038
7039         btrfs_init_path(&path);
7040         key.objectid = last;
7041         key.offset = 0;
7042         key.type = BTRFS_EXTENT_ITEM_KEY;
7043         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7044         if (ret < 0)
7045                 goto out;
7046         ret = 0;
7047         while (1) {
7048                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7049                         ret = btrfs_next_leaf(root, &path);
7050                         if (ret < 0)
7051                                 goto out;
7052                         if (ret > 0) {
7053                                 ret = 0;
7054                                 break;
7055                         }
7056                 }
7057                 leaf = path.nodes[0];
7058                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7059                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7060                         break;
7061                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7062                     key.type != BTRFS_METADATA_ITEM_KEY) {
7063                         path.slots[0]++;
7064                         continue;
7065                 }
7066
7067                 if (last == key.objectid) {
7068                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7069                                 last = key.objectid + key.offset;
7070                         else
7071                                 last = key.objectid + root->fs_info->nodesize;
7072                         path.slots[0]++;
7073                         continue;
7074                 }
7075
7076                 ret = check_cache_range(root, cache, last,
7077                                         key.objectid - last);
7078                 if (ret)
7079                         break;
7080                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7081                         last = key.objectid + key.offset;
7082                 else
7083                         last = key.objectid + root->fs_info->nodesize;
7084                 path.slots[0]++;
7085         }
7086
7087         if (last < cache->key.objectid + cache->key.offset)
7088                 ret = check_cache_range(root, cache, last,
7089                                         cache->key.objectid +
7090                                         cache->key.offset - last);
7091
7092 out:
7093         btrfs_release_path(&path);
7094
7095         if (!ret &&
7096             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7097                 fprintf(stderr, "There are still entries left in the space "
7098                         "cache\n");
7099                 ret = -EINVAL;
7100         }
7101
7102         return ret;
7103 }
7104
7105 static int check_space_cache(struct btrfs_root *root)
7106 {
7107         struct btrfs_block_group_cache *cache;
7108         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7109         int ret;
7110         int error = 0;
7111
7112         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7113             btrfs_super_generation(root->fs_info->super_copy) !=
7114             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7115                 printf("cache and super generation don't match, space cache "
7116                        "will be invalidated\n");
7117                 return 0;
7118         }
7119
7120         if (ctx.progress_enabled) {
7121                 ctx.tp = TASK_FREE_SPACE;
7122                 task_start(ctx.info);
7123         }
7124
7125         while (1) {
7126                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7127                 if (!cache)
7128                         break;
7129
7130                 start = cache->key.objectid + cache->key.offset;
7131                 if (!cache->free_space_ctl) {
7132                         if (btrfs_init_free_space_ctl(cache,
7133                                                 root->fs_info->sectorsize)) {
7134                                 ret = -ENOMEM;
7135                                 break;
7136                         }
7137                 } else {
7138                         btrfs_remove_free_space_cache(cache);
7139                 }
7140
7141                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7142                         ret = exclude_super_stripes(root, cache);
7143                         if (ret) {
7144                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7145                                         strerror(-ret));
7146                                 error++;
7147                                 continue;
7148                         }
7149                         ret = load_free_space_tree(root->fs_info, cache);
7150                         free_excluded_extents(root, cache);
7151                         if (ret < 0) {
7152                                 fprintf(stderr, "could not load free space tree: %s\n",
7153                                         strerror(-ret));
7154                                 error++;
7155                                 continue;
7156                         }
7157                         error += ret;
7158                 } else {
7159                         ret = load_free_space_cache(root->fs_info, cache);
7160                         if (!ret)
7161                                 continue;
7162                 }
7163
7164                 ret = verify_space_cache(root, cache);
7165                 if (ret) {
7166                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7167                                 cache->key.objectid);
7168                         error++;
7169                 }
7170         }
7171
7172         task_stop(ctx.info);
7173
7174         return error ? -EINVAL : 0;
7175 }
7176
7177 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7178                         u64 num_bytes, unsigned long leaf_offset,
7179                         struct extent_buffer *eb) {
7180
7181         struct btrfs_fs_info *fs_info = root->fs_info;
7182         u64 offset = 0;
7183         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7184         char *data;
7185         unsigned long csum_offset;
7186         u32 csum;
7187         u32 csum_expected;
7188         u64 read_len;
7189         u64 data_checked = 0;
7190         u64 tmp;
7191         int ret = 0;
7192         int mirror;
7193         int num_copies;
7194
7195         if (num_bytes % fs_info->sectorsize)
7196                 return -EINVAL;
7197
7198         data = malloc(num_bytes);
7199         if (!data)
7200                 return -ENOMEM;
7201
7202         while (offset < num_bytes) {
7203                 mirror = 0;
7204 again:
7205                 read_len = num_bytes - offset;
7206                 /* read as much space once a time */
7207                 ret = read_extent_data(fs_info, data + offset,
7208                                 bytenr + offset, &read_len, mirror);
7209                 if (ret)
7210                         goto out;
7211                 data_checked = 0;
7212                 /* verify every 4k data's checksum */
7213                 while (data_checked < read_len) {
7214                         csum = ~(u32)0;
7215                         tmp = offset + data_checked;
7216
7217                         csum = btrfs_csum_data((char *)data + tmp,
7218                                                csum, fs_info->sectorsize);
7219                         btrfs_csum_final(csum, (u8 *)&csum);
7220
7221                         csum_offset = leaf_offset +
7222                                  tmp / fs_info->sectorsize * csum_size;
7223                         read_extent_buffer(eb, (char *)&csum_expected,
7224                                            csum_offset, csum_size);
7225                         /* try another mirror */
7226                         if (csum != csum_expected) {
7227                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7228                                                 mirror, bytenr + tmp,
7229                                                 csum, csum_expected);
7230                                 num_copies = btrfs_num_copies(root->fs_info,
7231                                                 bytenr, num_bytes);
7232                                 if (mirror < num_copies - 1) {
7233                                         mirror += 1;
7234                                         goto again;
7235                                 }
7236                         }
7237                         data_checked += fs_info->sectorsize;
7238                 }
7239                 offset += read_len;
7240         }
7241 out:
7242         free(data);
7243         return ret;
7244 }
7245
7246 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7247                                u64 num_bytes)
7248 {
7249         struct btrfs_path path;
7250         struct extent_buffer *leaf;
7251         struct btrfs_key key;
7252         int ret;
7253
7254         btrfs_init_path(&path);
7255         key.objectid = bytenr;
7256         key.type = BTRFS_EXTENT_ITEM_KEY;
7257         key.offset = (u64)-1;
7258
7259 again:
7260         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7261                                 0, 0);
7262         if (ret < 0) {
7263                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7264                 btrfs_release_path(&path);
7265                 return ret;
7266         } else if (ret) {
7267                 if (path.slots[0] > 0) {
7268                         path.slots[0]--;
7269                 } else {
7270                         ret = btrfs_prev_leaf(root, &path);
7271                         if (ret < 0) {
7272                                 goto out;
7273                         } else if (ret > 0) {
7274                                 ret = 0;
7275                                 goto out;
7276                         }
7277                 }
7278         }
7279
7280         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7281
7282         /*
7283          * Block group items come before extent items if they have the same
7284          * bytenr, so walk back one more just in case.  Dear future traveller,
7285          * first congrats on mastering time travel.  Now if it's not too much
7286          * trouble could you go back to 2006 and tell Chris to make the
7287          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7288          * EXTENT_ITEM_KEY please?
7289          */
7290         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7291                 if (path.slots[0] > 0) {
7292                         path.slots[0]--;
7293                 } else {
7294                         ret = btrfs_prev_leaf(root, &path);
7295                         if (ret < 0) {
7296                                 goto out;
7297                         } else if (ret > 0) {
7298                                 ret = 0;
7299                                 goto out;
7300                         }
7301                 }
7302                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7303         }
7304
7305         while (num_bytes) {
7306                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7307                         ret = btrfs_next_leaf(root, &path);
7308                         if (ret < 0) {
7309                                 fprintf(stderr, "Error going to next leaf "
7310                                         "%d\n", ret);
7311                                 btrfs_release_path(&path);
7312                                 return ret;
7313                         } else if (ret) {
7314                                 break;
7315                         }
7316                 }
7317                 leaf = path.nodes[0];
7318                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7319                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7320                         path.slots[0]++;
7321                         continue;
7322                 }
7323                 if (key.objectid + key.offset < bytenr) {
7324                         path.slots[0]++;
7325                         continue;
7326                 }
7327                 if (key.objectid > bytenr + num_bytes)
7328                         break;
7329
7330                 if (key.objectid == bytenr) {
7331                         if (key.offset >= num_bytes) {
7332                                 num_bytes = 0;
7333                                 break;
7334                         }
7335                         num_bytes -= key.offset;
7336                         bytenr += key.offset;
7337                 } else if (key.objectid < bytenr) {
7338                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7339                                 num_bytes = 0;
7340                                 break;
7341                         }
7342                         num_bytes = (bytenr + num_bytes) -
7343                                 (key.objectid + key.offset);
7344                         bytenr = key.objectid + key.offset;
7345                 } else {
7346                         if (key.objectid + key.offset < bytenr + num_bytes) {
7347                                 u64 new_start = key.objectid + key.offset;
7348                                 u64 new_bytes = bytenr + num_bytes - new_start;
7349
7350                                 /*
7351                                  * Weird case, the extent is in the middle of
7352                                  * our range, we'll have to search one side
7353                                  * and then the other.  Not sure if this happens
7354                                  * in real life, but no harm in coding it up
7355                                  * anyway just in case.
7356                                  */
7357                                 btrfs_release_path(&path);
7358                                 ret = check_extent_exists(root, new_start,
7359                                                           new_bytes);
7360                                 if (ret) {
7361                                         fprintf(stderr, "Right section didn't "
7362                                                 "have a record\n");
7363                                         break;
7364                                 }
7365                                 num_bytes = key.objectid - bytenr;
7366                                 goto again;
7367                         }
7368                         num_bytes = key.objectid - bytenr;
7369                 }
7370                 path.slots[0]++;
7371         }
7372         ret = 0;
7373
7374 out:
7375         if (num_bytes && !ret) {
7376                 fprintf(stderr, "There are no extents for csum range "
7377                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7378                 ret = 1;
7379         }
7380
7381         btrfs_release_path(&path);
7382         return ret;
7383 }
7384
7385 static int check_csums(struct btrfs_root *root)
7386 {
7387         struct btrfs_path path;
7388         struct extent_buffer *leaf;
7389         struct btrfs_key key;
7390         u64 offset = 0, num_bytes = 0;
7391         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7392         int errors = 0;
7393         int ret;
7394         u64 data_len;
7395         unsigned long leaf_offset;
7396
7397         root = root->fs_info->csum_root;
7398         if (!extent_buffer_uptodate(root->node)) {
7399                 fprintf(stderr, "No valid csum tree found\n");
7400                 return -ENOENT;
7401         }
7402
7403         btrfs_init_path(&path);
7404         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7405         key.type = BTRFS_EXTENT_CSUM_KEY;
7406         key.offset = 0;
7407         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7408         if (ret < 0) {
7409                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7410                 btrfs_release_path(&path);
7411                 return ret;
7412         }
7413
7414         if (ret > 0 && path.slots[0])
7415                 path.slots[0]--;
7416         ret = 0;
7417
7418         while (1) {
7419                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7420                         ret = btrfs_next_leaf(root, &path);
7421                         if (ret < 0) {
7422                                 fprintf(stderr, "Error going to next leaf "
7423                                         "%d\n", ret);
7424                                 break;
7425                         }
7426                         if (ret)
7427                                 break;
7428                 }
7429                 leaf = path.nodes[0];
7430
7431                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7432                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7433                         path.slots[0]++;
7434                         continue;
7435                 }
7436
7437                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7438                               csum_size) * root->fs_info->sectorsize;
7439                 if (!check_data_csum)
7440                         goto skip_csum_check;
7441                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7442                 ret = check_extent_csums(root, key.offset, data_len,
7443                                          leaf_offset, leaf);
7444                 if (ret)
7445                         break;
7446 skip_csum_check:
7447                 if (!num_bytes) {
7448                         offset = key.offset;
7449                 } else if (key.offset != offset + num_bytes) {
7450                         ret = check_extent_exists(root, offset, num_bytes);
7451                         if (ret) {
7452                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7453                                         "there is no extent record\n",
7454                                         offset, offset+num_bytes);
7455                                 errors++;
7456                         }
7457                         offset = key.offset;
7458                         num_bytes = 0;
7459                 }
7460                 num_bytes += data_len;
7461                 path.slots[0]++;
7462         }
7463
7464         btrfs_release_path(&path);
7465         return errors;
7466 }
7467
7468 static int is_dropped_key(struct btrfs_key *key,
7469                           struct btrfs_key *drop_key) {
7470         if (key->objectid < drop_key->objectid)
7471                 return 1;
7472         else if (key->objectid == drop_key->objectid) {
7473                 if (key->type < drop_key->type)
7474                         return 1;
7475                 else if (key->type == drop_key->type) {
7476                         if (key->offset < drop_key->offset)
7477                                 return 1;
7478                 }
7479         }
7480         return 0;
7481 }
7482
7483 /*
7484  * Here are the rules for FULL_BACKREF.
7485  *
7486  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7487  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7488  *      FULL_BACKREF set.
7489  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7490  *    if it happened after the relocation occurred since we'll have dropped the
7491  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7492  *    have no real way to know for sure.
7493  *
7494  * We process the blocks one root at a time, and we start from the lowest root
7495  * objectid and go to the highest.  So we can just lookup the owner backref for
7496  * the record and if we don't find it then we know it doesn't exist and we have
7497  * a FULL BACKREF.
7498  *
7499  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7500  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7501  * be set or not and then we can check later once we've gathered all the refs.
7502  */
7503 static int calc_extent_flag(struct cache_tree *extent_cache,
7504                            struct extent_buffer *buf,
7505                            struct root_item_record *ri,
7506                            u64 *flags)
7507 {
7508         struct extent_record *rec;
7509         struct cache_extent *cache;
7510         struct tree_backref *tback;
7511         u64 owner = 0;
7512
7513         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7514         /* we have added this extent before */
7515         if (!cache)
7516                 return -ENOENT;
7517
7518         rec = container_of(cache, struct extent_record, cache);
7519
7520         /*
7521          * Except file/reloc tree, we can not have
7522          * FULL BACKREF MODE
7523          */
7524         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7525                 goto normal;
7526         /*
7527          * root node
7528          */
7529         if (buf->start == ri->bytenr)
7530                 goto normal;
7531
7532         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7533                 goto full_backref;
7534
7535         owner = btrfs_header_owner(buf);
7536         if (owner == ri->objectid)
7537                 goto normal;
7538
7539         tback = find_tree_backref(rec, 0, owner);
7540         if (!tback)
7541                 goto full_backref;
7542 normal:
7543         *flags = 0;
7544         if (rec->flag_block_full_backref != FLAG_UNSET &&
7545             rec->flag_block_full_backref != 0)
7546                 rec->bad_full_backref = 1;
7547         return 0;
7548 full_backref:
7549         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7550         if (rec->flag_block_full_backref != FLAG_UNSET &&
7551             rec->flag_block_full_backref != 1)
7552                 rec->bad_full_backref = 1;
7553         return 0;
7554 }
7555
7556 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7557 {
7558         fprintf(stderr, "Invalid key type(");
7559         print_key_type(stderr, 0, key_type);
7560         fprintf(stderr, ") found in root(");
7561         print_objectid(stderr, rootid, 0);
7562         fprintf(stderr, ")\n");
7563 }
7564
7565 /*
7566  * Check if the key is valid with its extent buffer.
7567  *
7568  * This is a early check in case invalid key exists in a extent buffer
7569  * This is not comprehensive yet, but should prevent wrong key/item passed
7570  * further
7571  */
7572 static int check_type_with_root(u64 rootid, u8 key_type)
7573 {
7574         switch (key_type) {
7575         /* Only valid in chunk tree */
7576         case BTRFS_DEV_ITEM_KEY:
7577         case BTRFS_CHUNK_ITEM_KEY:
7578                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7579                         goto err;
7580                 break;
7581         /* valid in csum and log tree */
7582         case BTRFS_CSUM_TREE_OBJECTID:
7583                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7584                       is_fstree(rootid)))
7585                         goto err;
7586                 break;
7587         case BTRFS_EXTENT_ITEM_KEY:
7588         case BTRFS_METADATA_ITEM_KEY:
7589         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7590                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7591                         goto err;
7592                 break;
7593         case BTRFS_ROOT_ITEM_KEY:
7594                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7595                         goto err;
7596                 break;
7597         case BTRFS_DEV_EXTENT_KEY:
7598                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7599                         goto err;
7600                 break;
7601         }
7602         return 0;
7603 err:
7604         report_mismatch_key_root(key_type, rootid);
7605         return -EINVAL;
7606 }
7607
7608 static int run_next_block(struct btrfs_root *root,
7609                           struct block_info *bits,
7610                           int bits_nr,
7611                           u64 *last,
7612                           struct cache_tree *pending,
7613                           struct cache_tree *seen,
7614                           struct cache_tree *reada,
7615                           struct cache_tree *nodes,
7616                           struct cache_tree *extent_cache,
7617                           struct cache_tree *chunk_cache,
7618                           struct rb_root *dev_cache,
7619                           struct block_group_tree *block_group_cache,
7620                           struct device_extent_tree *dev_extent_cache,
7621                           struct root_item_record *ri)
7622 {
7623         struct btrfs_fs_info *fs_info = root->fs_info;
7624         struct extent_buffer *buf;
7625         struct extent_record *rec = NULL;
7626         u64 bytenr;
7627         u32 size;
7628         u64 parent;
7629         u64 owner;
7630         u64 flags;
7631         u64 ptr;
7632         u64 gen = 0;
7633         int ret = 0;
7634         int i;
7635         int nritems;
7636         struct btrfs_key key;
7637         struct cache_extent *cache;
7638         int reada_bits;
7639
7640         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7641                                     bits_nr, &reada_bits);
7642         if (nritems == 0)
7643                 return 1;
7644
7645         if (!reada_bits) {
7646                 for(i = 0; i < nritems; i++) {
7647                         ret = add_cache_extent(reada, bits[i].start,
7648                                                bits[i].size);
7649                         if (ret == -EEXIST)
7650                                 continue;
7651
7652                         /* fixme, get the parent transid */
7653                         readahead_tree_block(fs_info, bits[i].start,
7654                                              bits[i].size, 0);
7655                 }
7656         }
7657         *last = bits[0].start;
7658         bytenr = bits[0].start;
7659         size = bits[0].size;
7660
7661         cache = lookup_cache_extent(pending, bytenr, size);
7662         if (cache) {
7663                 remove_cache_extent(pending, cache);
7664                 free(cache);
7665         }
7666         cache = lookup_cache_extent(reada, bytenr, size);
7667         if (cache) {
7668                 remove_cache_extent(reada, cache);
7669                 free(cache);
7670         }
7671         cache = lookup_cache_extent(nodes, bytenr, size);
7672         if (cache) {
7673                 remove_cache_extent(nodes, cache);
7674                 free(cache);
7675         }
7676         cache = lookup_cache_extent(extent_cache, bytenr, size);
7677         if (cache) {
7678                 rec = container_of(cache, struct extent_record, cache);
7679                 gen = rec->parent_generation;
7680         }
7681
7682         /* fixme, get the real parent transid */
7683         buf = read_tree_block(root->fs_info, bytenr, gen);
7684         if (!extent_buffer_uptodate(buf)) {
7685                 record_bad_block_io(root->fs_info,
7686                                     extent_cache, bytenr, size);
7687                 goto out;
7688         }
7689
7690         nritems = btrfs_header_nritems(buf);
7691
7692         flags = 0;
7693         if (!init_extent_tree) {
7694                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7695                                        btrfs_header_level(buf), 1, NULL,
7696                                        &flags);
7697                 if (ret < 0) {
7698                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7699                         if (ret < 0) {
7700                                 fprintf(stderr, "Couldn't calc extent flags\n");
7701                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7702                         }
7703                 }
7704         } else {
7705                 flags = 0;
7706                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7707                 if (ret < 0) {
7708                         fprintf(stderr, "Couldn't calc extent flags\n");
7709                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7710                 }
7711         }
7712
7713         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7714                 if (ri != NULL &&
7715                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7716                     ri->objectid == btrfs_header_owner(buf)) {
7717                         /*
7718                          * Ok we got to this block from it's original owner and
7719                          * we have FULL_BACKREF set.  Relocation can leave
7720                          * converted blocks over so this is altogether possible,
7721                          * however it's not possible if the generation > the
7722                          * last snapshot, so check for this case.
7723                          */
7724                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7725                             btrfs_header_generation(buf) > ri->last_snapshot) {
7726                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7727                                 rec->bad_full_backref = 1;
7728                         }
7729                 }
7730         } else {
7731                 if (ri != NULL &&
7732                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7733                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7734                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7735                         rec->bad_full_backref = 1;
7736                 }
7737         }
7738
7739         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7740                 rec->flag_block_full_backref = 1;
7741                 parent = bytenr;
7742                 owner = 0;
7743         } else {
7744                 rec->flag_block_full_backref = 0;
7745                 parent = 0;
7746                 owner = btrfs_header_owner(buf);
7747         }
7748
7749         ret = check_block(root, extent_cache, buf, flags);
7750         if (ret)
7751                 goto out;
7752
7753         if (btrfs_is_leaf(buf)) {
7754                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7755                 for (i = 0; i < nritems; i++) {
7756                         struct btrfs_file_extent_item *fi;
7757                         btrfs_item_key_to_cpu(buf, &key, i);
7758                         /*
7759                          * Check key type against the leaf owner.
7760                          * Could filter quite a lot of early error if
7761                          * owner is correct
7762                          */
7763                         if (check_type_with_root(btrfs_header_owner(buf),
7764                                                  key.type)) {
7765                                 fprintf(stderr, "ignoring invalid key\n");
7766                                 continue;
7767                         }
7768                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7769                                 process_extent_item(root, extent_cache, buf,
7770                                                     i);
7771                                 continue;
7772                         }
7773                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7774                                 process_extent_item(root, extent_cache, buf,
7775                                                     i);
7776                                 continue;
7777                         }
7778                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7779                                 total_csum_bytes +=
7780                                         btrfs_item_size_nr(buf, i);
7781                                 continue;
7782                         }
7783                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7784                                 process_chunk_item(chunk_cache, &key, buf, i);
7785                                 continue;
7786                         }
7787                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7788                                 process_device_item(dev_cache, &key, buf, i);
7789                                 continue;
7790                         }
7791                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7792                                 process_block_group_item(block_group_cache,
7793                                         &key, buf, i);
7794                                 continue;
7795                         }
7796                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7797                                 process_device_extent_item(dev_extent_cache,
7798                                         &key, buf, i);
7799                                 continue;
7800
7801                         }
7802                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7803 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7804                                 process_extent_ref_v0(extent_cache, buf, i);
7805 #else
7806                                 BUG();
7807 #endif
7808                                 continue;
7809                         }
7810
7811                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7812                                 ret = add_tree_backref(extent_cache,
7813                                                 key.objectid, 0, key.offset, 0);
7814                                 if (ret < 0)
7815                                         error(
7816                                 "add_tree_backref failed (leaf tree block): %s",
7817                                               strerror(-ret));
7818                                 continue;
7819                         }
7820                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7821                                 ret = add_tree_backref(extent_cache,
7822                                                 key.objectid, key.offset, 0, 0);
7823                                 if (ret < 0)
7824                                         error(
7825                                 "add_tree_backref failed (leaf shared block): %s",
7826                                               strerror(-ret));
7827                                 continue;
7828                         }
7829                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7830                                 struct btrfs_extent_data_ref *ref;
7831                                 ref = btrfs_item_ptr(buf, i,
7832                                                 struct btrfs_extent_data_ref);
7833                                 add_data_backref(extent_cache,
7834                                         key.objectid, 0,
7835                                         btrfs_extent_data_ref_root(buf, ref),
7836                                         btrfs_extent_data_ref_objectid(buf,
7837                                                                        ref),
7838                                         btrfs_extent_data_ref_offset(buf, ref),
7839                                         btrfs_extent_data_ref_count(buf, ref),
7840                                         0, root->fs_info->sectorsize);
7841                                 continue;
7842                         }
7843                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7844                                 struct btrfs_shared_data_ref *ref;
7845                                 ref = btrfs_item_ptr(buf, i,
7846                                                 struct btrfs_shared_data_ref);
7847                                 add_data_backref(extent_cache,
7848                                         key.objectid, key.offset, 0, 0, 0,
7849                                         btrfs_shared_data_ref_count(buf, ref),
7850                                         0, root->fs_info->sectorsize);
7851                                 continue;
7852                         }
7853                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7854                                 struct bad_item *bad;
7855
7856                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7857                                         continue;
7858                                 if (!owner)
7859                                         continue;
7860                                 bad = malloc(sizeof(struct bad_item));
7861                                 if (!bad)
7862                                         continue;
7863                                 INIT_LIST_HEAD(&bad->list);
7864                                 memcpy(&bad->key, &key,
7865                                        sizeof(struct btrfs_key));
7866                                 bad->root_id = owner;
7867                                 list_add_tail(&bad->list, &delete_items);
7868                                 continue;
7869                         }
7870                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7871                                 continue;
7872                         fi = btrfs_item_ptr(buf, i,
7873                                             struct btrfs_file_extent_item);
7874                         if (btrfs_file_extent_type(buf, fi) ==
7875                             BTRFS_FILE_EXTENT_INLINE)
7876                                 continue;
7877                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7878                                 continue;
7879
7880                         data_bytes_allocated +=
7881                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7882                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7883                                 abort();
7884                         }
7885                         data_bytes_referenced +=
7886                                 btrfs_file_extent_num_bytes(buf, fi);
7887                         add_data_backref(extent_cache,
7888                                 btrfs_file_extent_disk_bytenr(buf, fi),
7889                                 parent, owner, key.objectid, key.offset -
7890                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7891                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7892                 }
7893         } else {
7894                 int level;
7895                 struct btrfs_key first_key;
7896
7897                 first_key.objectid = 0;
7898
7899                 if (nritems > 0)
7900                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7901                 level = btrfs_header_level(buf);
7902                 for (i = 0; i < nritems; i++) {
7903                         struct extent_record tmpl;
7904
7905                         ptr = btrfs_node_blockptr(buf, i);
7906                         size = root->fs_info->nodesize;
7907                         btrfs_node_key_to_cpu(buf, &key, i);
7908                         if (ri != NULL) {
7909                                 if ((level == ri->drop_level)
7910                                     && is_dropped_key(&key, &ri->drop_key)) {
7911                                         continue;
7912                                 }
7913                         }
7914
7915                         memset(&tmpl, 0, sizeof(tmpl));
7916                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7917                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7918                         tmpl.start = ptr;
7919                         tmpl.nr = size;
7920                         tmpl.refs = 1;
7921                         tmpl.metadata = 1;
7922                         tmpl.max_size = size;
7923                         ret = add_extent_rec(extent_cache, &tmpl);
7924                         if (ret < 0)
7925                                 goto out;
7926
7927                         ret = add_tree_backref(extent_cache, ptr, parent,
7928                                         owner, 1);
7929                         if (ret < 0) {
7930                                 error(
7931                                 "add_tree_backref failed (non-leaf block): %s",
7932                                       strerror(-ret));
7933                                 continue;
7934                         }
7935
7936                         if (level > 1) {
7937                                 add_pending(nodes, seen, ptr, size);
7938                         } else {
7939                                 add_pending(pending, seen, ptr, size);
7940                         }
7941                 }
7942                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7943                                       nritems) * sizeof(struct btrfs_key_ptr);
7944         }
7945         total_btree_bytes += buf->len;
7946         if (fs_root_objectid(btrfs_header_owner(buf)))
7947                 total_fs_tree_bytes += buf->len;
7948         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7949                 total_extent_tree_bytes += buf->len;
7950         if (!found_old_backref &&
7951             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7952             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7953             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7954                 found_old_backref = 1;
7955 out:
7956         free_extent_buffer(buf);
7957         return ret;
7958 }
7959
7960 static int add_root_to_pending(struct extent_buffer *buf,
7961                                struct cache_tree *extent_cache,
7962                                struct cache_tree *pending,
7963                                struct cache_tree *seen,
7964                                struct cache_tree *nodes,
7965                                u64 objectid)
7966 {
7967         struct extent_record tmpl;
7968         int ret;
7969
7970         if (btrfs_header_level(buf) > 0)
7971                 add_pending(nodes, seen, buf->start, buf->len);
7972         else
7973                 add_pending(pending, seen, buf->start, buf->len);
7974
7975         memset(&tmpl, 0, sizeof(tmpl));
7976         tmpl.start = buf->start;
7977         tmpl.nr = buf->len;
7978         tmpl.is_root = 1;
7979         tmpl.refs = 1;
7980         tmpl.metadata = 1;
7981         tmpl.max_size = buf->len;
7982         add_extent_rec(extent_cache, &tmpl);
7983
7984         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7985             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7986                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7987                                 0, 1);
7988         else
7989                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7990                                 1);
7991         return ret;
7992 }
7993
7994 /* as we fix the tree, we might be deleting blocks that
7995  * we're tracking for repair.  This hook makes sure we
7996  * remove any backrefs for blocks as we are fixing them.
7997  */
7998 static int free_extent_hook(struct btrfs_trans_handle *trans,
7999                             struct btrfs_root *root,
8000                             u64 bytenr, u64 num_bytes, u64 parent,
8001                             u64 root_objectid, u64 owner, u64 offset,
8002                             int refs_to_drop)
8003 {
8004         struct extent_record *rec;
8005         struct cache_extent *cache;
8006         int is_data;
8007         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8008
8009         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8010         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8011         if (!cache)
8012                 return 0;
8013
8014         rec = container_of(cache, struct extent_record, cache);
8015         if (is_data) {
8016                 struct data_backref *back;
8017                 back = find_data_backref(rec, parent, root_objectid, owner,
8018                                          offset, 1, bytenr, num_bytes);
8019                 if (!back)
8020                         goto out;
8021                 if (back->node.found_ref) {
8022                         back->found_ref -= refs_to_drop;
8023                         if (rec->refs)
8024                                 rec->refs -= refs_to_drop;
8025                 }
8026                 if (back->node.found_extent_tree) {
8027                         back->num_refs -= refs_to_drop;
8028                         if (rec->extent_item_refs)
8029                                 rec->extent_item_refs -= refs_to_drop;
8030                 }
8031                 if (back->found_ref == 0)
8032                         back->node.found_ref = 0;
8033                 if (back->num_refs == 0)
8034                         back->node.found_extent_tree = 0;
8035
8036                 if (!back->node.found_extent_tree && back->node.found_ref) {
8037                         list_del(&back->node.list);
8038                         free(back);
8039                 }
8040         } else {
8041                 struct tree_backref *back;
8042                 back = find_tree_backref(rec, parent, root_objectid);
8043                 if (!back)
8044                         goto out;
8045                 if (back->node.found_ref) {
8046                         if (rec->refs)
8047                                 rec->refs--;
8048                         back->node.found_ref = 0;
8049                 }
8050                 if (back->node.found_extent_tree) {
8051                         if (rec->extent_item_refs)
8052                                 rec->extent_item_refs--;
8053                         back->node.found_extent_tree = 0;
8054                 }
8055                 if (!back->node.found_extent_tree && back->node.found_ref) {
8056                         list_del(&back->node.list);
8057                         free(back);
8058                 }
8059         }
8060         maybe_free_extent_rec(extent_cache, rec);
8061 out:
8062         return 0;
8063 }
8064
8065 static int delete_extent_records(struct btrfs_trans_handle *trans,
8066                                  struct btrfs_root *root,
8067                                  struct btrfs_path *path,
8068                                  u64 bytenr)
8069 {
8070         struct btrfs_key key;
8071         struct btrfs_key found_key;
8072         struct extent_buffer *leaf;
8073         int ret;
8074         int slot;
8075
8076
8077         key.objectid = bytenr;
8078         key.type = (u8)-1;
8079         key.offset = (u64)-1;
8080
8081         while(1) {
8082                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8083                                         &key, path, 0, 1);
8084                 if (ret < 0)
8085                         break;
8086
8087                 if (ret > 0) {
8088                         ret = 0;
8089                         if (path->slots[0] == 0)
8090                                 break;
8091                         path->slots[0]--;
8092                 }
8093                 ret = 0;
8094
8095                 leaf = path->nodes[0];
8096                 slot = path->slots[0];
8097
8098                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8099                 if (found_key.objectid != bytenr)
8100                         break;
8101
8102                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8103                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8104                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8105                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8106                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8107                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8108                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8109                         btrfs_release_path(path);
8110                         if (found_key.type == 0) {
8111                                 if (found_key.offset == 0)
8112                                         break;
8113                                 key.offset = found_key.offset - 1;
8114                                 key.type = found_key.type;
8115                         }
8116                         key.type = found_key.type - 1;
8117                         key.offset = (u64)-1;
8118                         continue;
8119                 }
8120
8121                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8122                         found_key.objectid, found_key.type, found_key.offset);
8123
8124                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8125                 if (ret)
8126                         break;
8127                 btrfs_release_path(path);
8128
8129                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8130                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8131                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8132                                 found_key.offset : root->fs_info->nodesize;
8133
8134                         ret = btrfs_update_block_group(trans, root, bytenr,
8135                                                        bytes, 0, 0);
8136                         if (ret)
8137                                 break;
8138                 }
8139         }
8140
8141         btrfs_release_path(path);
8142         return ret;
8143 }
8144
8145 /*
8146  * for a single backref, this will allocate a new extent
8147  * and add the backref to it.
8148  */
8149 static int record_extent(struct btrfs_trans_handle *trans,
8150                          struct btrfs_fs_info *info,
8151                          struct btrfs_path *path,
8152                          struct extent_record *rec,
8153                          struct extent_backref *back,
8154                          int allocated, u64 flags)
8155 {
8156         int ret = 0;
8157         struct btrfs_root *extent_root = info->extent_root;
8158         struct extent_buffer *leaf;
8159         struct btrfs_key ins_key;
8160         struct btrfs_extent_item *ei;
8161         struct data_backref *dback;
8162         struct btrfs_tree_block_info *bi;
8163
8164         if (!back->is_data)
8165                 rec->max_size = max_t(u64, rec->max_size,
8166                                     info->nodesize);
8167
8168         if (!allocated) {
8169                 u32 item_size = sizeof(*ei);
8170
8171                 if (!back->is_data)
8172                         item_size += sizeof(*bi);
8173
8174                 ins_key.objectid = rec->start;
8175                 ins_key.offset = rec->max_size;
8176                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8177
8178                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8179                                         &ins_key, item_size);
8180                 if (ret)
8181                         goto fail;
8182
8183                 leaf = path->nodes[0];
8184                 ei = btrfs_item_ptr(leaf, path->slots[0],
8185                                     struct btrfs_extent_item);
8186
8187                 btrfs_set_extent_refs(leaf, ei, 0);
8188                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8189
8190                 if (back->is_data) {
8191                         btrfs_set_extent_flags(leaf, ei,
8192                                                BTRFS_EXTENT_FLAG_DATA);
8193                 } else {
8194                         struct btrfs_disk_key copy_key;;
8195
8196                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8197                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8198                                              sizeof(*bi));
8199
8200                         btrfs_set_disk_key_objectid(&copy_key,
8201                                                     rec->info_objectid);
8202                         btrfs_set_disk_key_type(&copy_key, 0);
8203                         btrfs_set_disk_key_offset(&copy_key, 0);
8204
8205                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8206                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8207
8208                         btrfs_set_extent_flags(leaf, ei,
8209                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8210                 }
8211
8212                 btrfs_mark_buffer_dirty(leaf);
8213                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8214                                                rec->max_size, 1, 0);
8215                 if (ret)
8216                         goto fail;
8217                 btrfs_release_path(path);
8218         }
8219
8220         if (back->is_data) {
8221                 u64 parent;
8222                 int i;
8223
8224                 dback = to_data_backref(back);
8225                 if (back->full_backref)
8226                         parent = dback->parent;
8227                 else
8228                         parent = 0;
8229
8230                 for (i = 0; i < dback->found_ref; i++) {
8231                         /* if parent != 0, we're doing a full backref
8232                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8233                          * just makes the backref allocator create a data
8234                          * backref
8235                          */
8236                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8237                                                    rec->start, rec->max_size,
8238                                                    parent,
8239                                                    dback->root,
8240                                                    parent ?
8241                                                    BTRFS_FIRST_FREE_OBJECTID :
8242                                                    dback->owner,
8243                                                    dback->offset);
8244                         if (ret)
8245                                 break;
8246                 }
8247                 fprintf(stderr, "adding new data backref"
8248                                 " on %llu %s %llu owner %llu"
8249                                 " offset %llu found %d\n",
8250                                 (unsigned long long)rec->start,
8251                                 back->full_backref ?
8252                                 "parent" : "root",
8253                                 back->full_backref ?
8254                                 (unsigned long long)parent :
8255                                 (unsigned long long)dback->root,
8256                                 (unsigned long long)dback->owner,
8257                                 (unsigned long long)dback->offset,
8258                                 dback->found_ref);
8259         } else {
8260                 u64 parent;
8261                 struct tree_backref *tback;
8262
8263                 tback = to_tree_backref(back);
8264                 if (back->full_backref)
8265                         parent = tback->parent;
8266                 else
8267                         parent = 0;
8268
8269                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8270                                            rec->start, rec->max_size,
8271                                            parent, tback->root, 0, 0);
8272                 fprintf(stderr, "adding new tree backref on "
8273                         "start %llu len %llu parent %llu root %llu\n",
8274                         rec->start, rec->max_size, parent, tback->root);
8275         }
8276 fail:
8277         btrfs_release_path(path);
8278         return ret;
8279 }
8280
8281 static struct extent_entry *find_entry(struct list_head *entries,
8282                                        u64 bytenr, u64 bytes)
8283 {
8284         struct extent_entry *entry = NULL;
8285
8286         list_for_each_entry(entry, entries, list) {
8287                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8288                         return entry;
8289         }
8290
8291         return NULL;
8292 }
8293
8294 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8295 {
8296         struct extent_entry *entry, *best = NULL, *prev = NULL;
8297
8298         list_for_each_entry(entry, entries, list) {
8299                 /*
8300                  * If there are as many broken entries as entries then we know
8301                  * not to trust this particular entry.
8302                  */
8303                 if (entry->broken == entry->count)
8304                         continue;
8305
8306                 /*
8307                  * Special case, when there are only two entries and 'best' is
8308                  * the first one
8309                  */
8310                 if (!prev) {
8311                         best = entry;
8312                         prev = entry;
8313                         continue;
8314                 }
8315
8316                 /*
8317                  * If our current entry == best then we can't be sure our best
8318                  * is really the best, so we need to keep searching.
8319                  */
8320                 if (best && best->count == entry->count) {
8321                         prev = entry;
8322                         best = NULL;
8323                         continue;
8324                 }
8325
8326                 /* Prev == entry, not good enough, have to keep searching */
8327                 if (!prev->broken && prev->count == entry->count)
8328                         continue;
8329
8330                 if (!best)
8331                         best = (prev->count > entry->count) ? prev : entry;
8332                 else if (best->count < entry->count)
8333                         best = entry;
8334                 prev = entry;
8335         }
8336
8337         return best;
8338 }
8339
8340 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8341                       struct data_backref *dback, struct extent_entry *entry)
8342 {
8343         struct btrfs_trans_handle *trans;
8344         struct btrfs_root *root;
8345         struct btrfs_file_extent_item *fi;
8346         struct extent_buffer *leaf;
8347         struct btrfs_key key;
8348         u64 bytenr, bytes;
8349         int ret, err;
8350
8351         key.objectid = dback->root;
8352         key.type = BTRFS_ROOT_ITEM_KEY;
8353         key.offset = (u64)-1;
8354         root = btrfs_read_fs_root(info, &key);
8355         if (IS_ERR(root)) {
8356                 fprintf(stderr, "Couldn't find root for our ref\n");
8357                 return -EINVAL;
8358         }
8359
8360         /*
8361          * The backref points to the original offset of the extent if it was
8362          * split, so we need to search down to the offset we have and then walk
8363          * forward until we find the backref we're looking for.
8364          */
8365         key.objectid = dback->owner;
8366         key.type = BTRFS_EXTENT_DATA_KEY;
8367         key.offset = dback->offset;
8368         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8369         if (ret < 0) {
8370                 fprintf(stderr, "Error looking up ref %d\n", ret);
8371                 return ret;
8372         }
8373
8374         while (1) {
8375                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8376                         ret = btrfs_next_leaf(root, path);
8377                         if (ret) {
8378                                 fprintf(stderr, "Couldn't find our ref, next\n");
8379                                 return -EINVAL;
8380                         }
8381                 }
8382                 leaf = path->nodes[0];
8383                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8384                 if (key.objectid != dback->owner ||
8385                     key.type != BTRFS_EXTENT_DATA_KEY) {
8386                         fprintf(stderr, "Couldn't find our ref, search\n");
8387                         return -EINVAL;
8388                 }
8389                 fi = btrfs_item_ptr(leaf, path->slots[0],
8390                                     struct btrfs_file_extent_item);
8391                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8392                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8393
8394                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8395                         break;
8396                 path->slots[0]++;
8397         }
8398
8399         btrfs_release_path(path);
8400
8401         trans = btrfs_start_transaction(root, 1);
8402         if (IS_ERR(trans))
8403                 return PTR_ERR(trans);
8404
8405         /*
8406          * Ok we have the key of the file extent we want to fix, now we can cow
8407          * down to the thing and fix it.
8408          */
8409         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8410         if (ret < 0) {
8411                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8412                         key.objectid, key.type, key.offset, ret);
8413                 goto out;
8414         }
8415         if (ret > 0) {
8416                 fprintf(stderr, "Well that's odd, we just found this key "
8417                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8418                         key.offset);
8419                 ret = -EINVAL;
8420                 goto out;
8421         }
8422         leaf = path->nodes[0];
8423         fi = btrfs_item_ptr(leaf, path->slots[0],
8424                             struct btrfs_file_extent_item);
8425
8426         if (btrfs_file_extent_compression(leaf, fi) &&
8427             dback->disk_bytenr != entry->bytenr) {
8428                 fprintf(stderr, "Ref doesn't match the record start and is "
8429                         "compressed, please take a btrfs-image of this file "
8430                         "system and send it to a btrfs developer so they can "
8431                         "complete this functionality for bytenr %Lu\n",
8432                         dback->disk_bytenr);
8433                 ret = -EINVAL;
8434                 goto out;
8435         }
8436
8437         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8438                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8439         } else if (dback->disk_bytenr > entry->bytenr) {
8440                 u64 off_diff, offset;
8441
8442                 off_diff = dback->disk_bytenr - entry->bytenr;
8443                 offset = btrfs_file_extent_offset(leaf, fi);
8444                 if (dback->disk_bytenr + offset +
8445                     btrfs_file_extent_num_bytes(leaf, fi) >
8446                     entry->bytenr + entry->bytes) {
8447                         fprintf(stderr, "Ref is past the entry end, please "
8448                                 "take a btrfs-image of this file system and "
8449                                 "send it to a btrfs developer, ref %Lu\n",
8450                                 dback->disk_bytenr);
8451                         ret = -EINVAL;
8452                         goto out;
8453                 }
8454                 offset += off_diff;
8455                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8456                 btrfs_set_file_extent_offset(leaf, fi, offset);
8457         } else if (dback->disk_bytenr < entry->bytenr) {
8458                 u64 offset;
8459
8460                 offset = btrfs_file_extent_offset(leaf, fi);
8461                 if (dback->disk_bytenr + offset < entry->bytenr) {
8462                         fprintf(stderr, "Ref is before the entry start, please"
8463                                 " take a btrfs-image of this file system and "
8464                                 "send it to a btrfs developer, ref %Lu\n",
8465                                 dback->disk_bytenr);
8466                         ret = -EINVAL;
8467                         goto out;
8468                 }
8469
8470                 offset += dback->disk_bytenr;
8471                 offset -= entry->bytenr;
8472                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8473                 btrfs_set_file_extent_offset(leaf, fi, offset);
8474         }
8475
8476         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8477
8478         /*
8479          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8480          * only do this if we aren't using compression, otherwise it's a
8481          * trickier case.
8482          */
8483         if (!btrfs_file_extent_compression(leaf, fi))
8484                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8485         else
8486                 printf("ram bytes may be wrong?\n");
8487         btrfs_mark_buffer_dirty(leaf);
8488 out:
8489         err = btrfs_commit_transaction(trans, root);
8490         btrfs_release_path(path);
8491         return ret ? ret : err;
8492 }
8493
8494 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8495                            struct extent_record *rec)
8496 {
8497         struct extent_backref *back;
8498         struct data_backref *dback;
8499         struct extent_entry *entry, *best = NULL;
8500         LIST_HEAD(entries);
8501         int nr_entries = 0;
8502         int broken_entries = 0;
8503         int ret = 0;
8504         short mismatch = 0;
8505
8506         /*
8507          * Metadata is easy and the backrefs should always agree on bytenr and
8508          * size, if not we've got bigger issues.
8509          */
8510         if (rec->metadata)
8511                 return 0;
8512
8513         list_for_each_entry(back, &rec->backrefs, list) {
8514                 if (back->full_backref || !back->is_data)
8515                         continue;
8516
8517                 dback = to_data_backref(back);
8518
8519                 /*
8520                  * We only pay attention to backrefs that we found a real
8521                  * backref for.
8522                  */
8523                 if (dback->found_ref == 0)
8524                         continue;
8525
8526                 /*
8527                  * For now we only catch when the bytes don't match, not the
8528                  * bytenr.  We can easily do this at the same time, but I want
8529                  * to have a fs image to test on before we just add repair
8530                  * functionality willy-nilly so we know we won't screw up the
8531                  * repair.
8532                  */
8533
8534                 entry = find_entry(&entries, dback->disk_bytenr,
8535                                    dback->bytes);
8536                 if (!entry) {
8537                         entry = malloc(sizeof(struct extent_entry));
8538                         if (!entry) {
8539                                 ret = -ENOMEM;
8540                                 goto out;
8541                         }
8542                         memset(entry, 0, sizeof(*entry));
8543                         entry->bytenr = dback->disk_bytenr;
8544                         entry->bytes = dback->bytes;
8545                         list_add_tail(&entry->list, &entries);
8546                         nr_entries++;
8547                 }
8548
8549                 /*
8550                  * If we only have on entry we may think the entries agree when
8551                  * in reality they don't so we have to do some extra checking.
8552                  */
8553                 if (dback->disk_bytenr != rec->start ||
8554                     dback->bytes != rec->nr || back->broken)
8555                         mismatch = 1;
8556
8557                 if (back->broken) {
8558                         entry->broken++;
8559                         broken_entries++;
8560                 }
8561
8562                 entry->count++;
8563         }
8564
8565         /* Yay all the backrefs agree, carry on good sir */
8566         if (nr_entries <= 1 && !mismatch)
8567                 goto out;
8568
8569         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8570                 "%Lu\n", rec->start);
8571
8572         /*
8573          * First we want to see if the backrefs can agree amongst themselves who
8574          * is right, so figure out which one of the entries has the highest
8575          * count.
8576          */
8577         best = find_most_right_entry(&entries);
8578
8579         /*
8580          * Ok so we may have an even split between what the backrefs think, so
8581          * this is where we use the extent ref to see what it thinks.
8582          */
8583         if (!best) {
8584                 entry = find_entry(&entries, rec->start, rec->nr);
8585                 if (!entry && (!broken_entries || !rec->found_rec)) {
8586                         fprintf(stderr, "Backrefs don't agree with each other "
8587                                 "and extent record doesn't agree with anybody,"
8588                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8589                                 rec->start, rec->nr);
8590                         ret = -EINVAL;
8591                         goto out;
8592                 } else if (!entry) {
8593                         /*
8594                          * Ok our backrefs were broken, we'll assume this is the
8595                          * correct value and add an entry for this range.
8596                          */
8597                         entry = malloc(sizeof(struct extent_entry));
8598                         if (!entry) {
8599                                 ret = -ENOMEM;
8600                                 goto out;
8601                         }
8602                         memset(entry, 0, sizeof(*entry));
8603                         entry->bytenr = rec->start;
8604                         entry->bytes = rec->nr;
8605                         list_add_tail(&entry->list, &entries);
8606                         nr_entries++;
8607                 }
8608                 entry->count++;
8609                 best = find_most_right_entry(&entries);
8610                 if (!best) {
8611                         fprintf(stderr, "Backrefs and extent record evenly "
8612                                 "split on who is right, this is going to "
8613                                 "require user input to fix bytenr %Lu bytes "
8614                                 "%Lu\n", rec->start, rec->nr);
8615                         ret = -EINVAL;
8616                         goto out;
8617                 }
8618         }
8619
8620         /*
8621          * I don't think this can happen currently as we'll abort() if we catch
8622          * this case higher up, but in case somebody removes that we still can't
8623          * deal with it properly here yet, so just bail out of that's the case.
8624          */
8625         if (best->bytenr != rec->start) {
8626                 fprintf(stderr, "Extent start and backref starts don't match, "
8627                         "please use btrfs-image on this file system and send "
8628                         "it to a btrfs developer so they can make fsck fix "
8629                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8630                         rec->start, rec->nr);
8631                 ret = -EINVAL;
8632                 goto out;
8633         }
8634
8635         /*
8636          * Ok great we all agreed on an extent record, let's go find the real
8637          * references and fix up the ones that don't match.
8638          */
8639         list_for_each_entry(back, &rec->backrefs, list) {
8640                 if (back->full_backref || !back->is_data)
8641                         continue;
8642
8643                 dback = to_data_backref(back);
8644
8645                 /*
8646                  * Still ignoring backrefs that don't have a real ref attached
8647                  * to them.
8648                  */
8649                 if (dback->found_ref == 0)
8650                         continue;
8651
8652                 if (dback->bytes == best->bytes &&
8653                     dback->disk_bytenr == best->bytenr)
8654                         continue;
8655
8656                 ret = repair_ref(info, path, dback, best);
8657                 if (ret)
8658                         goto out;
8659         }
8660
8661         /*
8662          * Ok we messed with the actual refs, which means we need to drop our
8663          * entire cache and go back and rescan.  I know this is a huge pain and
8664          * adds a lot of extra work, but it's the only way to be safe.  Once all
8665          * the backrefs agree we may not need to do anything to the extent
8666          * record itself.
8667          */
8668         ret = -EAGAIN;
8669 out:
8670         while (!list_empty(&entries)) {
8671                 entry = list_entry(entries.next, struct extent_entry, list);
8672                 list_del_init(&entry->list);
8673                 free(entry);
8674         }
8675         return ret;
8676 }
8677
8678 static int process_duplicates(struct cache_tree *extent_cache,
8679                               struct extent_record *rec)
8680 {
8681         struct extent_record *good, *tmp;
8682         struct cache_extent *cache;
8683         int ret;
8684
8685         /*
8686          * If we found a extent record for this extent then return, or if we
8687          * have more than one duplicate we are likely going to need to delete
8688          * something.
8689          */
8690         if (rec->found_rec || rec->num_duplicates > 1)
8691                 return 0;
8692
8693         /* Shouldn't happen but just in case */
8694         BUG_ON(!rec->num_duplicates);
8695
8696         /*
8697          * So this happens if we end up with a backref that doesn't match the
8698          * actual extent entry.  So either the backref is bad or the extent
8699          * entry is bad.  Either way we want to have the extent_record actually
8700          * reflect what we found in the extent_tree, so we need to take the
8701          * duplicate out and use that as the extent_record since the only way we
8702          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8703          */
8704         remove_cache_extent(extent_cache, &rec->cache);
8705
8706         good = to_extent_record(rec->dups.next);
8707         list_del_init(&good->list);
8708         INIT_LIST_HEAD(&good->backrefs);
8709         INIT_LIST_HEAD(&good->dups);
8710         good->cache.start = good->start;
8711         good->cache.size = good->nr;
8712         good->content_checked = 0;
8713         good->owner_ref_checked = 0;
8714         good->num_duplicates = 0;
8715         good->refs = rec->refs;
8716         list_splice_init(&rec->backrefs, &good->backrefs);
8717         while (1) {
8718                 cache = lookup_cache_extent(extent_cache, good->start,
8719                                             good->nr);
8720                 if (!cache)
8721                         break;
8722                 tmp = container_of(cache, struct extent_record, cache);
8723
8724                 /*
8725                  * If we find another overlapping extent and it's found_rec is
8726                  * set then it's a duplicate and we need to try and delete
8727                  * something.
8728                  */
8729                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8730                         if (list_empty(&good->list))
8731                                 list_add_tail(&good->list,
8732                                               &duplicate_extents);
8733                         good->num_duplicates += tmp->num_duplicates + 1;
8734                         list_splice_init(&tmp->dups, &good->dups);
8735                         list_del_init(&tmp->list);
8736                         list_add_tail(&tmp->list, &good->dups);
8737                         remove_cache_extent(extent_cache, &tmp->cache);
8738                         continue;
8739                 }
8740
8741                 /*
8742                  * Ok we have another non extent item backed extent rec, so lets
8743                  * just add it to this extent and carry on like we did above.
8744                  */
8745                 good->refs += tmp->refs;
8746                 list_splice_init(&tmp->backrefs, &good->backrefs);
8747                 remove_cache_extent(extent_cache, &tmp->cache);
8748                 free(tmp);
8749         }
8750         ret = insert_cache_extent(extent_cache, &good->cache);
8751         BUG_ON(ret);
8752         free(rec);
8753         return good->num_duplicates ? 0 : 1;
8754 }
8755
8756 static int delete_duplicate_records(struct btrfs_root *root,
8757                                     struct extent_record *rec)
8758 {
8759         struct btrfs_trans_handle *trans;
8760         LIST_HEAD(delete_list);
8761         struct btrfs_path path;
8762         struct extent_record *tmp, *good, *n;
8763         int nr_del = 0;
8764         int ret = 0, err;
8765         struct btrfs_key key;
8766
8767         btrfs_init_path(&path);
8768
8769         good = rec;
8770         /* Find the record that covers all of the duplicates. */
8771         list_for_each_entry(tmp, &rec->dups, list) {
8772                 if (good->start < tmp->start)
8773                         continue;
8774                 if (good->nr > tmp->nr)
8775                         continue;
8776
8777                 if (tmp->start + tmp->nr < good->start + good->nr) {
8778                         fprintf(stderr, "Ok we have overlapping extents that "
8779                                 "aren't completely covered by each other, this "
8780                                 "is going to require more careful thought.  "
8781                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8782                                 tmp->start, tmp->nr, good->start, good->nr);
8783                         abort();
8784                 }
8785                 good = tmp;
8786         }
8787
8788         if (good != rec)
8789                 list_add_tail(&rec->list, &delete_list);
8790
8791         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8792                 if (tmp == good)
8793                         continue;
8794                 list_move_tail(&tmp->list, &delete_list);
8795         }
8796
8797         root = root->fs_info->extent_root;
8798         trans = btrfs_start_transaction(root, 1);
8799         if (IS_ERR(trans)) {
8800                 ret = PTR_ERR(trans);
8801                 goto out;
8802         }
8803
8804         list_for_each_entry(tmp, &delete_list, list) {
8805                 if (tmp->found_rec == 0)
8806                         continue;
8807                 key.objectid = tmp->start;
8808                 key.type = BTRFS_EXTENT_ITEM_KEY;
8809                 key.offset = tmp->nr;
8810
8811                 /* Shouldn't happen but just in case */
8812                 if (tmp->metadata) {
8813                         fprintf(stderr, "Well this shouldn't happen, extent "
8814                                 "record overlaps but is metadata? "
8815                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8816                         abort();
8817                 }
8818
8819                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8820                 if (ret) {
8821                         if (ret > 0)
8822                                 ret = -EINVAL;
8823                         break;
8824                 }
8825                 ret = btrfs_del_item(trans, root, &path);
8826                 if (ret)
8827                         break;
8828                 btrfs_release_path(&path);
8829                 nr_del++;
8830         }
8831         err = btrfs_commit_transaction(trans, root);
8832         if (err && !ret)
8833                 ret = err;
8834 out:
8835         while (!list_empty(&delete_list)) {
8836                 tmp = to_extent_record(delete_list.next);
8837                 list_del_init(&tmp->list);
8838                 if (tmp == rec)
8839                         continue;
8840                 free(tmp);
8841         }
8842
8843         while (!list_empty(&rec->dups)) {
8844                 tmp = to_extent_record(rec->dups.next);
8845                 list_del_init(&tmp->list);
8846                 free(tmp);
8847         }
8848
8849         btrfs_release_path(&path);
8850
8851         if (!ret && !nr_del)
8852                 rec->num_duplicates = 0;
8853
8854         return ret ? ret : nr_del;
8855 }
8856
8857 static int find_possible_backrefs(struct btrfs_fs_info *info,
8858                                   struct btrfs_path *path,
8859                                   struct cache_tree *extent_cache,
8860                                   struct extent_record *rec)
8861 {
8862         struct btrfs_root *root;
8863         struct extent_backref *back;
8864         struct data_backref *dback;
8865         struct cache_extent *cache;
8866         struct btrfs_file_extent_item *fi;
8867         struct btrfs_key key;
8868         u64 bytenr, bytes;
8869         int ret;
8870
8871         list_for_each_entry(back, &rec->backrefs, list) {
8872                 /* Don't care about full backrefs (poor unloved backrefs) */
8873                 if (back->full_backref || !back->is_data)
8874                         continue;
8875
8876                 dback = to_data_backref(back);
8877
8878                 /* We found this one, we don't need to do a lookup */
8879                 if (dback->found_ref)
8880                         continue;
8881
8882                 key.objectid = dback->root;
8883                 key.type = BTRFS_ROOT_ITEM_KEY;
8884                 key.offset = (u64)-1;
8885
8886                 root = btrfs_read_fs_root(info, &key);
8887
8888                 /* No root, definitely a bad ref, skip */
8889                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8890                         continue;
8891                 /* Other err, exit */
8892                 if (IS_ERR(root))
8893                         return PTR_ERR(root);
8894
8895                 key.objectid = dback->owner;
8896                 key.type = BTRFS_EXTENT_DATA_KEY;
8897                 key.offset = dback->offset;
8898                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8899                 if (ret) {
8900                         btrfs_release_path(path);
8901                         if (ret < 0)
8902                                 return ret;
8903                         /* Didn't find it, we can carry on */
8904                         ret = 0;
8905                         continue;
8906                 }
8907
8908                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8909                                     struct btrfs_file_extent_item);
8910                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8911                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8912                 btrfs_release_path(path);
8913                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8914                 if (cache) {
8915                         struct extent_record *tmp;
8916                         tmp = container_of(cache, struct extent_record, cache);
8917
8918                         /*
8919                          * If we found an extent record for the bytenr for this
8920                          * particular backref then we can't add it to our
8921                          * current extent record.  We only want to add backrefs
8922                          * that don't have a corresponding extent item in the
8923                          * extent tree since they likely belong to this record
8924                          * and we need to fix it if it doesn't match bytenrs.
8925                          */
8926                         if  (tmp->found_rec)
8927                                 continue;
8928                 }
8929
8930                 dback->found_ref += 1;
8931                 dback->disk_bytenr = bytenr;
8932                 dback->bytes = bytes;
8933
8934                 /*
8935                  * Set this so the verify backref code knows not to trust the
8936                  * values in this backref.
8937                  */
8938                 back->broken = 1;
8939         }
8940
8941         return 0;
8942 }
8943
8944 /*
8945  * Record orphan data ref into corresponding root.
8946  *
8947  * Return 0 if the extent item contains data ref and recorded.
8948  * Return 1 if the extent item contains no useful data ref
8949  *   On that case, it may contains only shared_dataref or metadata backref
8950  *   or the file extent exists(this should be handled by the extent bytenr
8951  *   recovery routine)
8952  * Return <0 if something goes wrong.
8953  */
8954 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8955                                       struct extent_record *rec)
8956 {
8957         struct btrfs_key key;
8958         struct btrfs_root *dest_root;
8959         struct extent_backref *back;
8960         struct data_backref *dback;
8961         struct orphan_data_extent *orphan;
8962         struct btrfs_path path;
8963         int recorded_data_ref = 0;
8964         int ret = 0;
8965
8966         if (rec->metadata)
8967                 return 1;
8968         btrfs_init_path(&path);
8969         list_for_each_entry(back, &rec->backrefs, list) {
8970                 if (back->full_backref || !back->is_data ||
8971                     !back->found_extent_tree)
8972                         continue;
8973                 dback = to_data_backref(back);
8974                 if (dback->found_ref)
8975                         continue;
8976                 key.objectid = dback->root;
8977                 key.type = BTRFS_ROOT_ITEM_KEY;
8978                 key.offset = (u64)-1;
8979
8980                 dest_root = btrfs_read_fs_root(fs_info, &key);
8981
8982                 /* For non-exist root we just skip it */
8983                 if (IS_ERR(dest_root) || !dest_root)
8984                         continue;
8985
8986                 key.objectid = dback->owner;
8987                 key.type = BTRFS_EXTENT_DATA_KEY;
8988                 key.offset = dback->offset;
8989
8990                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8991                 btrfs_release_path(&path);
8992                 /*
8993                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8994                  * we need to record it for inode/file extent rebuild.
8995                  * For ret > 0, we record it only for file extent rebuild.
8996                  * For ret == 0, the file extent exists but only bytenr
8997                  * mismatch, let the original bytenr fix routine to handle,
8998                  * don't record it.
8999                  */
9000                 if (ret == 0)
9001                         continue;
9002                 ret = 0;
9003                 orphan = malloc(sizeof(*orphan));
9004                 if (!orphan) {
9005                         ret = -ENOMEM;
9006                         goto out;
9007                 }
9008                 INIT_LIST_HEAD(&orphan->list);
9009                 orphan->root = dback->root;
9010                 orphan->objectid = dback->owner;
9011                 orphan->offset = dback->offset;
9012                 orphan->disk_bytenr = rec->cache.start;
9013                 orphan->disk_len = rec->cache.size;
9014                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9015                 recorded_data_ref = 1;
9016         }
9017 out:
9018         btrfs_release_path(&path);
9019         if (!ret)
9020                 return !recorded_data_ref;
9021         else
9022                 return ret;
9023 }
9024
9025 /*
9026  * when an incorrect extent item is found, this will delete
9027  * all of the existing entries for it and recreate them
9028  * based on what the tree scan found.
9029  */
9030 static int fixup_extent_refs(struct btrfs_fs_info *info,
9031                              struct cache_tree *extent_cache,
9032                              struct extent_record *rec)
9033 {
9034         struct btrfs_trans_handle *trans = NULL;
9035         int ret;
9036         struct btrfs_path path;
9037         struct list_head *cur = rec->backrefs.next;
9038         struct cache_extent *cache;
9039         struct extent_backref *back;
9040         int allocated = 0;
9041         u64 flags = 0;
9042
9043         if (rec->flag_block_full_backref)
9044                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9045
9046         btrfs_init_path(&path);
9047         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9048                 /*
9049                  * Sometimes the backrefs themselves are so broken they don't
9050                  * get attached to any meaningful rec, so first go back and
9051                  * check any of our backrefs that we couldn't find and throw
9052                  * them into the list if we find the backref so that
9053                  * verify_backrefs can figure out what to do.
9054                  */
9055                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9056                 if (ret < 0)
9057                         goto out;
9058         }
9059
9060         /* step one, make sure all of the backrefs agree */
9061         ret = verify_backrefs(info, &path, rec);
9062         if (ret < 0)
9063                 goto out;
9064
9065         trans = btrfs_start_transaction(info->extent_root, 1);
9066         if (IS_ERR(trans)) {
9067                 ret = PTR_ERR(trans);
9068                 goto out;
9069         }
9070
9071         /* step two, delete all the existing records */
9072         ret = delete_extent_records(trans, info->extent_root, &path,
9073                                     rec->start);
9074
9075         if (ret < 0)
9076                 goto out;
9077
9078         /* was this block corrupt?  If so, don't add references to it */
9079         cache = lookup_cache_extent(info->corrupt_blocks,
9080                                     rec->start, rec->max_size);
9081         if (cache) {
9082                 ret = 0;
9083                 goto out;
9084         }
9085
9086         /* step three, recreate all the refs we did find */
9087         while(cur != &rec->backrefs) {
9088                 back = to_extent_backref(cur);
9089                 cur = cur->next;
9090
9091                 /*
9092                  * if we didn't find any references, don't create a
9093                  * new extent record
9094                  */
9095                 if (!back->found_ref)
9096                         continue;
9097
9098                 rec->bad_full_backref = 0;
9099                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9100                 allocated = 1;
9101
9102                 if (ret)
9103                         goto out;
9104         }
9105 out:
9106         if (trans) {
9107                 int err = btrfs_commit_transaction(trans, info->extent_root);
9108                 if (!ret)
9109                         ret = err;
9110         }
9111
9112         if (!ret)
9113                 fprintf(stderr, "Repaired extent references for %llu\n",
9114                                 (unsigned long long)rec->start);
9115
9116         btrfs_release_path(&path);
9117         return ret;
9118 }
9119
9120 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9121                               struct extent_record *rec)
9122 {
9123         struct btrfs_trans_handle *trans;
9124         struct btrfs_root *root = fs_info->extent_root;
9125         struct btrfs_path path;
9126         struct btrfs_extent_item *ei;
9127         struct btrfs_key key;
9128         u64 flags;
9129         int ret = 0;
9130
9131         key.objectid = rec->start;
9132         if (rec->metadata) {
9133                 key.type = BTRFS_METADATA_ITEM_KEY;
9134                 key.offset = rec->info_level;
9135         } else {
9136                 key.type = BTRFS_EXTENT_ITEM_KEY;
9137                 key.offset = rec->max_size;
9138         }
9139
9140         trans = btrfs_start_transaction(root, 0);
9141         if (IS_ERR(trans))
9142                 return PTR_ERR(trans);
9143
9144         btrfs_init_path(&path);
9145         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9146         if (ret < 0) {
9147                 btrfs_release_path(&path);
9148                 btrfs_commit_transaction(trans, root);
9149                 return ret;
9150         } else if (ret) {
9151                 fprintf(stderr, "Didn't find extent for %llu\n",
9152                         (unsigned long long)rec->start);
9153                 btrfs_release_path(&path);
9154                 btrfs_commit_transaction(trans, root);
9155                 return -ENOENT;
9156         }
9157
9158         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9159                             struct btrfs_extent_item);
9160         flags = btrfs_extent_flags(path.nodes[0], ei);
9161         if (rec->flag_block_full_backref) {
9162                 fprintf(stderr, "setting full backref on %llu\n",
9163                         (unsigned long long)key.objectid);
9164                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9165         } else {
9166                 fprintf(stderr, "clearing full backref on %llu\n",
9167                         (unsigned long long)key.objectid);
9168                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9169         }
9170         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9171         btrfs_mark_buffer_dirty(path.nodes[0]);
9172         btrfs_release_path(&path);
9173         ret = btrfs_commit_transaction(trans, root);
9174         if (!ret)
9175                 fprintf(stderr, "Repaired extent flags for %llu\n",
9176                                 (unsigned long long)rec->start);
9177
9178         return ret;
9179 }
9180
9181 /* right now we only prune from the extent allocation tree */
9182 static int prune_one_block(struct btrfs_trans_handle *trans,
9183                            struct btrfs_fs_info *info,
9184                            struct btrfs_corrupt_block *corrupt)
9185 {
9186         int ret;
9187         struct btrfs_path path;
9188         struct extent_buffer *eb;
9189         u64 found;
9190         int slot;
9191         int nritems;
9192         int level = corrupt->level + 1;
9193
9194         btrfs_init_path(&path);
9195 again:
9196         /* we want to stop at the parent to our busted block */
9197         path.lowest_level = level;
9198
9199         ret = btrfs_search_slot(trans, info->extent_root,
9200                                 &corrupt->key, &path, -1, 1);
9201
9202         if (ret < 0)
9203                 goto out;
9204
9205         eb = path.nodes[level];
9206         if (!eb) {
9207                 ret = -ENOENT;
9208                 goto out;
9209         }
9210
9211         /*
9212          * hopefully the search gave us the block we want to prune,
9213          * lets try that first
9214          */
9215         slot = path.slots[level];
9216         found =  btrfs_node_blockptr(eb, slot);
9217         if (found == corrupt->cache.start)
9218                 goto del_ptr;
9219
9220         nritems = btrfs_header_nritems(eb);
9221
9222         /* the search failed, lets scan this node and hope we find it */
9223         for (slot = 0; slot < nritems; slot++) {
9224                 found =  btrfs_node_blockptr(eb, slot);
9225                 if (found == corrupt->cache.start)
9226                         goto del_ptr;
9227         }
9228         /*
9229          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9230          * to this block
9231          */
9232         if (eb == info->extent_root->node) {
9233                 ret = -ENOENT;
9234                 goto out;
9235         } else {
9236                 level++;
9237                 btrfs_release_path(&path);
9238                 goto again;
9239         }
9240
9241 del_ptr:
9242         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9243         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9244
9245 out:
9246         btrfs_release_path(&path);
9247         return ret;
9248 }
9249
9250 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9251 {
9252         struct btrfs_trans_handle *trans = NULL;
9253         struct cache_extent *cache;
9254         struct btrfs_corrupt_block *corrupt;
9255
9256         while (1) {
9257                 cache = search_cache_extent(info->corrupt_blocks, 0);
9258                 if (!cache)
9259                         break;
9260                 if (!trans) {
9261                         trans = btrfs_start_transaction(info->extent_root, 1);
9262                         if (IS_ERR(trans))
9263                                 return PTR_ERR(trans);
9264                 }
9265                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9266                 prune_one_block(trans, info, corrupt);
9267                 remove_cache_extent(info->corrupt_blocks, cache);
9268         }
9269         if (trans)
9270                 return btrfs_commit_transaction(trans, info->extent_root);
9271         return 0;
9272 }
9273
9274 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9275 {
9276         struct btrfs_block_group_cache *cache;
9277         u64 start, end;
9278         int ret;
9279
9280         while (1) {
9281                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9282                                             &start, &end, EXTENT_DIRTY);
9283                 if (ret)
9284                         break;
9285                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9286         }
9287
9288         start = 0;
9289         while (1) {
9290                 cache = btrfs_lookup_first_block_group(fs_info, start);
9291                 if (!cache)
9292                         break;
9293                 if (cache->cached)
9294                         cache->cached = 0;
9295                 start = cache->key.objectid + cache->key.offset;
9296         }
9297 }
9298
9299 static int check_extent_refs(struct btrfs_root *root,
9300                              struct cache_tree *extent_cache)
9301 {
9302         struct extent_record *rec;
9303         struct cache_extent *cache;
9304         int ret = 0;
9305         int had_dups = 0;
9306
9307         if (repair) {
9308                 /*
9309                  * if we're doing a repair, we have to make sure
9310                  * we don't allocate from the problem extents.
9311                  * In the worst case, this will be all the
9312                  * extents in the FS
9313                  */
9314                 cache = search_cache_extent(extent_cache, 0);
9315                 while(cache) {
9316                         rec = container_of(cache, struct extent_record, cache);
9317                         set_extent_dirty(root->fs_info->excluded_extents,
9318                                          rec->start,
9319                                          rec->start + rec->max_size - 1);
9320                         cache = next_cache_extent(cache);
9321                 }
9322
9323                 /* pin down all the corrupted blocks too */
9324                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9325                 while(cache) {
9326                         set_extent_dirty(root->fs_info->excluded_extents,
9327                                          cache->start,
9328                                          cache->start + cache->size - 1);
9329                         cache = next_cache_extent(cache);
9330                 }
9331                 prune_corrupt_blocks(root->fs_info);
9332                 reset_cached_block_groups(root->fs_info);
9333         }
9334
9335         reset_cached_block_groups(root->fs_info);
9336
9337         /*
9338          * We need to delete any duplicate entries we find first otherwise we
9339          * could mess up the extent tree when we have backrefs that actually
9340          * belong to a different extent item and not the weird duplicate one.
9341          */
9342         while (repair && !list_empty(&duplicate_extents)) {
9343                 rec = to_extent_record(duplicate_extents.next);
9344                 list_del_init(&rec->list);
9345
9346                 /* Sometimes we can find a backref before we find an actual
9347                  * extent, so we need to process it a little bit to see if there
9348                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9349                  * if this is a backref screwup.  If we need to delete stuff
9350                  * process_duplicates() will return 0, otherwise it will return
9351                  * 1 and we
9352                  */
9353                 if (process_duplicates(extent_cache, rec))
9354                         continue;
9355                 ret = delete_duplicate_records(root, rec);
9356                 if (ret < 0)
9357                         return ret;
9358                 /*
9359                  * delete_duplicate_records will return the number of entries
9360                  * deleted, so if it's greater than 0 then we know we actually
9361                  * did something and we need to remove.
9362                  */
9363                 if (ret)
9364                         had_dups = 1;
9365         }
9366
9367         if (had_dups)
9368                 return -EAGAIN;
9369
9370         while(1) {
9371                 int cur_err = 0;
9372                 int fix = 0;
9373
9374                 cache = search_cache_extent(extent_cache, 0);
9375                 if (!cache)
9376                         break;
9377                 rec = container_of(cache, struct extent_record, cache);
9378                 if (rec->num_duplicates) {
9379                         fprintf(stderr, "extent item %llu has multiple extent "
9380                                 "items\n", (unsigned long long)rec->start);
9381                         cur_err = 1;
9382                 }
9383
9384                 if (rec->refs != rec->extent_item_refs) {
9385                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9386                                 (unsigned long long)rec->start,
9387                                 (unsigned long long)rec->nr);
9388                         fprintf(stderr, "extent item %llu, found %llu\n",
9389                                 (unsigned long long)rec->extent_item_refs,
9390                                 (unsigned long long)rec->refs);
9391                         ret = record_orphan_data_extents(root->fs_info, rec);
9392                         if (ret < 0)
9393                                 goto repair_abort;
9394                         fix = ret;
9395                         cur_err = 1;
9396                 }
9397                 if (all_backpointers_checked(rec, 1)) {
9398                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9399                                 (unsigned long long)rec->start,
9400                                 (unsigned long long)rec->nr);
9401                         fix = 1;
9402                         cur_err = 1;
9403                 }
9404                 if (!rec->owner_ref_checked) {
9405                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9406                                 (unsigned long long)rec->start,
9407                                 (unsigned long long)rec->nr);
9408                         fix = 1;
9409                         cur_err = 1;
9410                 }
9411
9412                 if (repair && fix) {
9413                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9414                         if (ret)
9415                                 goto repair_abort;
9416                 }
9417
9418
9419                 if (rec->bad_full_backref) {
9420                         fprintf(stderr, "bad full backref, on [%llu]\n",
9421                                 (unsigned long long)rec->start);
9422                         if (repair) {
9423                                 ret = fixup_extent_flags(root->fs_info, rec);
9424                                 if (ret)
9425                                         goto repair_abort;
9426                                 fix = 1;
9427                         }
9428                         cur_err = 1;
9429                 }
9430                 /*
9431                  * Although it's not a extent ref's problem, we reuse this
9432                  * routine for error reporting.
9433                  * No repair function yet.
9434                  */
9435                 if (rec->crossing_stripes) {
9436                         fprintf(stderr,
9437                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9438                                 rec->start, rec->start + rec->max_size);
9439                         cur_err = 1;
9440                 }
9441
9442                 if (rec->wrong_chunk_type) {
9443                         fprintf(stderr,
9444                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9445                                 rec->start, rec->start + rec->max_size);
9446                         cur_err = 1;
9447                 }
9448
9449                 remove_cache_extent(extent_cache, cache);
9450                 free_all_extent_backrefs(rec);
9451                 if (!init_extent_tree && repair && (!cur_err || fix))
9452                         clear_extent_dirty(root->fs_info->excluded_extents,
9453                                            rec->start,
9454                                            rec->start + rec->max_size - 1);
9455                 free(rec);
9456         }
9457 repair_abort:
9458         if (repair) {
9459                 if (ret && ret != -EAGAIN) {
9460                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9461                         exit(1);
9462                 } else if (!ret) {
9463                         struct btrfs_trans_handle *trans;
9464
9465                         root = root->fs_info->extent_root;
9466                         trans = btrfs_start_transaction(root, 1);
9467                         if (IS_ERR(trans)) {
9468                                 ret = PTR_ERR(trans);
9469                                 goto repair_abort;
9470                         }
9471
9472                         btrfs_fix_block_accounting(trans, root);
9473                         ret = btrfs_commit_transaction(trans, root);
9474                         if (ret)
9475                                 goto repair_abort;
9476                 }
9477                 return ret;
9478         }
9479         return 0;
9480 }
9481
9482 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9483 {
9484         u64 stripe_size;
9485
9486         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9487                 stripe_size = length;
9488                 stripe_size /= num_stripes;
9489         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9490                 stripe_size = length * 2;
9491                 stripe_size /= num_stripes;
9492         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9493                 stripe_size = length;
9494                 stripe_size /= (num_stripes - 1);
9495         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9496                 stripe_size = length;
9497                 stripe_size /= (num_stripes - 2);
9498         } else {
9499                 stripe_size = length;
9500         }
9501         return stripe_size;
9502 }
9503
9504 /*
9505  * Check the chunk with its block group/dev list ref:
9506  * Return 0 if all refs seems valid.
9507  * Return 1 if part of refs seems valid, need later check for rebuild ref
9508  * like missing block group and needs to search extent tree to rebuild them.
9509  * Return -1 if essential refs are missing and unable to rebuild.
9510  */
9511 static int check_chunk_refs(struct chunk_record *chunk_rec,
9512                             struct block_group_tree *block_group_cache,
9513                             struct device_extent_tree *dev_extent_cache,
9514                             int silent)
9515 {
9516         struct cache_extent *block_group_item;
9517         struct block_group_record *block_group_rec;
9518         struct cache_extent *dev_extent_item;
9519         struct device_extent_record *dev_extent_rec;
9520         u64 devid;
9521         u64 offset;
9522         u64 length;
9523         int metadump_v2 = 0;
9524         int i;
9525         int ret = 0;
9526
9527         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9528                                                chunk_rec->offset,
9529                                                chunk_rec->length);
9530         if (block_group_item) {
9531                 block_group_rec = container_of(block_group_item,
9532                                                struct block_group_record,
9533                                                cache);
9534                 if (chunk_rec->length != block_group_rec->offset ||
9535                     chunk_rec->offset != block_group_rec->objectid ||
9536                     (!metadump_v2 &&
9537                      chunk_rec->type_flags != block_group_rec->flags)) {
9538                         if (!silent)
9539                                 fprintf(stderr,
9540                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9541                                         chunk_rec->objectid,
9542                                         chunk_rec->type,
9543                                         chunk_rec->offset,
9544                                         chunk_rec->length,
9545                                         chunk_rec->offset,
9546                                         chunk_rec->type_flags,
9547                                         block_group_rec->objectid,
9548                                         block_group_rec->type,
9549                                         block_group_rec->offset,
9550                                         block_group_rec->offset,
9551                                         block_group_rec->objectid,
9552                                         block_group_rec->flags);
9553                         ret = -1;
9554                 } else {
9555                         list_del_init(&block_group_rec->list);
9556                         chunk_rec->bg_rec = block_group_rec;
9557                 }
9558         } else {
9559                 if (!silent)
9560                         fprintf(stderr,
9561                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9562                                 chunk_rec->objectid,
9563                                 chunk_rec->type,
9564                                 chunk_rec->offset,
9565                                 chunk_rec->length,
9566                                 chunk_rec->offset,
9567                                 chunk_rec->type_flags);
9568                 ret = 1;
9569         }
9570
9571         if (metadump_v2)
9572                 return ret;
9573
9574         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9575                                     chunk_rec->num_stripes);
9576         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9577                 devid = chunk_rec->stripes[i].devid;
9578                 offset = chunk_rec->stripes[i].offset;
9579                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9580                                                        devid, offset, length);
9581                 if (dev_extent_item) {
9582                         dev_extent_rec = container_of(dev_extent_item,
9583                                                 struct device_extent_record,
9584                                                 cache);
9585                         if (dev_extent_rec->objectid != devid ||
9586                             dev_extent_rec->offset != offset ||
9587                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9588                             dev_extent_rec->length != length) {
9589                                 if (!silent)
9590                                         fprintf(stderr,
9591                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9592                                                 chunk_rec->objectid,
9593                                                 chunk_rec->type,
9594                                                 chunk_rec->offset,
9595                                                 chunk_rec->stripes[i].devid,
9596                                                 chunk_rec->stripes[i].offset,
9597                                                 dev_extent_rec->objectid,
9598                                                 dev_extent_rec->offset,
9599                                                 dev_extent_rec->length);
9600                                 ret = -1;
9601                         } else {
9602                                 list_move(&dev_extent_rec->chunk_list,
9603                                           &chunk_rec->dextents);
9604                         }
9605                 } else {
9606                         if (!silent)
9607                                 fprintf(stderr,
9608                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9609                                         chunk_rec->objectid,
9610                                         chunk_rec->type,
9611                                         chunk_rec->offset,
9612                                         chunk_rec->stripes[i].devid,
9613                                         chunk_rec->stripes[i].offset);
9614                         ret = -1;
9615                 }
9616         }
9617         return ret;
9618 }
9619
9620 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9621 int check_chunks(struct cache_tree *chunk_cache,
9622                  struct block_group_tree *block_group_cache,
9623                  struct device_extent_tree *dev_extent_cache,
9624                  struct list_head *good, struct list_head *bad,
9625                  struct list_head *rebuild, int silent)
9626 {
9627         struct cache_extent *chunk_item;
9628         struct chunk_record *chunk_rec;
9629         struct block_group_record *bg_rec;
9630         struct device_extent_record *dext_rec;
9631         int err;
9632         int ret = 0;
9633
9634         chunk_item = first_cache_extent(chunk_cache);
9635         while (chunk_item) {
9636                 chunk_rec = container_of(chunk_item, struct chunk_record,
9637                                          cache);
9638                 err = check_chunk_refs(chunk_rec, block_group_cache,
9639                                        dev_extent_cache, silent);
9640                 if (err < 0)
9641                         ret = err;
9642                 if (err == 0 && good)
9643                         list_add_tail(&chunk_rec->list, good);
9644                 if (err > 0 && rebuild)
9645                         list_add_tail(&chunk_rec->list, rebuild);
9646                 if (err < 0 && bad)
9647                         list_add_tail(&chunk_rec->list, bad);
9648                 chunk_item = next_cache_extent(chunk_item);
9649         }
9650
9651         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9652                 if (!silent)
9653                         fprintf(stderr,
9654                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9655                                 bg_rec->objectid,
9656                                 bg_rec->offset,
9657                                 bg_rec->flags);
9658                 if (!ret)
9659                         ret = 1;
9660         }
9661
9662         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9663                             chunk_list) {
9664                 if (!silent)
9665                         fprintf(stderr,
9666                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9667                                 dext_rec->objectid,
9668                                 dext_rec->offset,
9669                                 dext_rec->length);
9670                 if (!ret)
9671                         ret = 1;
9672         }
9673         return ret;
9674 }
9675
9676
9677 static int check_device_used(struct device_record *dev_rec,
9678                              struct device_extent_tree *dext_cache)
9679 {
9680         struct cache_extent *cache;
9681         struct device_extent_record *dev_extent_rec;
9682         u64 total_byte = 0;
9683
9684         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9685         while (cache) {
9686                 dev_extent_rec = container_of(cache,
9687                                               struct device_extent_record,
9688                                               cache);
9689                 if (dev_extent_rec->objectid != dev_rec->devid)
9690                         break;
9691
9692                 list_del_init(&dev_extent_rec->device_list);
9693                 total_byte += dev_extent_rec->length;
9694                 cache = next_cache_extent(cache);
9695         }
9696
9697         if (total_byte != dev_rec->byte_used) {
9698                 fprintf(stderr,
9699                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9700                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9701                         dev_rec->type, dev_rec->offset);
9702                 return -1;
9703         } else {
9704                 return 0;
9705         }
9706 }
9707
9708 /* check btrfs_dev_item -> btrfs_dev_extent */
9709 static int check_devices(struct rb_root *dev_cache,
9710                          struct device_extent_tree *dev_extent_cache)
9711 {
9712         struct rb_node *dev_node;
9713         struct device_record *dev_rec;
9714         struct device_extent_record *dext_rec;
9715         int err;
9716         int ret = 0;
9717
9718         dev_node = rb_first(dev_cache);
9719         while (dev_node) {
9720                 dev_rec = container_of(dev_node, struct device_record, node);
9721                 err = check_device_used(dev_rec, dev_extent_cache);
9722                 if (err)
9723                         ret = err;
9724
9725                 dev_node = rb_next(dev_node);
9726         }
9727         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9728                             device_list) {
9729                 fprintf(stderr,
9730                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9731                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9732                 if (!ret)
9733                         ret = 1;
9734         }
9735         return ret;
9736 }
9737
9738 static int add_root_item_to_list(struct list_head *head,
9739                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9740                                   u8 level, u8 drop_level,
9741                                   struct btrfs_key *drop_key)
9742 {
9743
9744         struct root_item_record *ri_rec;
9745         ri_rec = malloc(sizeof(*ri_rec));
9746         if (!ri_rec)
9747                 return -ENOMEM;
9748         ri_rec->bytenr = bytenr;
9749         ri_rec->objectid = objectid;
9750         ri_rec->level = level;
9751         ri_rec->drop_level = drop_level;
9752         ri_rec->last_snapshot = last_snapshot;
9753         if (drop_key)
9754                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9755         list_add_tail(&ri_rec->list, head);
9756
9757         return 0;
9758 }
9759
9760 static void free_root_item_list(struct list_head *list)
9761 {
9762         struct root_item_record *ri_rec;
9763
9764         while (!list_empty(list)) {
9765                 ri_rec = list_first_entry(list, struct root_item_record,
9766                                           list);
9767                 list_del_init(&ri_rec->list);
9768                 free(ri_rec);
9769         }
9770 }
9771
9772 static int deal_root_from_list(struct list_head *list,
9773                                struct btrfs_root *root,
9774                                struct block_info *bits,
9775                                int bits_nr,
9776                                struct cache_tree *pending,
9777                                struct cache_tree *seen,
9778                                struct cache_tree *reada,
9779                                struct cache_tree *nodes,
9780                                struct cache_tree *extent_cache,
9781                                struct cache_tree *chunk_cache,
9782                                struct rb_root *dev_cache,
9783                                struct block_group_tree *block_group_cache,
9784                                struct device_extent_tree *dev_extent_cache)
9785 {
9786         int ret = 0;
9787         u64 last;
9788
9789         while (!list_empty(list)) {
9790                 struct root_item_record *rec;
9791                 struct extent_buffer *buf;
9792                 rec = list_entry(list->next,
9793                                  struct root_item_record, list);
9794                 last = 0;
9795                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9796                 if (!extent_buffer_uptodate(buf)) {
9797                         free_extent_buffer(buf);
9798                         ret = -EIO;
9799                         break;
9800                 }
9801                 ret = add_root_to_pending(buf, extent_cache, pending,
9802                                     seen, nodes, rec->objectid);
9803                 if (ret < 0)
9804                         break;
9805                 /*
9806                  * To rebuild extent tree, we need deal with snapshot
9807                  * one by one, otherwise we deal with node firstly which
9808                  * can maximize readahead.
9809                  */
9810                 while (1) {
9811                         ret = run_next_block(root, bits, bits_nr, &last,
9812                                              pending, seen, reada, nodes,
9813                                              extent_cache, chunk_cache,
9814                                              dev_cache, block_group_cache,
9815                                              dev_extent_cache, rec);
9816                         if (ret != 0)
9817                                 break;
9818                 }
9819                 free_extent_buffer(buf);
9820                 list_del(&rec->list);
9821                 free(rec);
9822                 if (ret < 0)
9823                         break;
9824         }
9825         while (ret >= 0) {
9826                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9827                                      reada, nodes, extent_cache, chunk_cache,
9828                                      dev_cache, block_group_cache,
9829                                      dev_extent_cache, NULL);
9830                 if (ret != 0) {
9831                         if (ret > 0)
9832                                 ret = 0;
9833                         break;
9834                 }
9835         }
9836         return ret;
9837 }
9838
9839 static int check_chunks_and_extents(struct btrfs_root *root)
9840 {
9841         struct rb_root dev_cache;
9842         struct cache_tree chunk_cache;
9843         struct block_group_tree block_group_cache;
9844         struct device_extent_tree dev_extent_cache;
9845         struct cache_tree extent_cache;
9846         struct cache_tree seen;
9847         struct cache_tree pending;
9848         struct cache_tree reada;
9849         struct cache_tree nodes;
9850         struct extent_io_tree excluded_extents;
9851         struct cache_tree corrupt_blocks;
9852         struct btrfs_path path;
9853         struct btrfs_key key;
9854         struct btrfs_key found_key;
9855         int ret, err = 0;
9856         struct block_info *bits;
9857         int bits_nr;
9858         struct extent_buffer *leaf;
9859         int slot;
9860         struct btrfs_root_item ri;
9861         struct list_head dropping_trees;
9862         struct list_head normal_trees;
9863         struct btrfs_root *root1;
9864         u64 objectid;
9865         u8 level;
9866
9867         dev_cache = RB_ROOT;
9868         cache_tree_init(&chunk_cache);
9869         block_group_tree_init(&block_group_cache);
9870         device_extent_tree_init(&dev_extent_cache);
9871
9872         cache_tree_init(&extent_cache);
9873         cache_tree_init(&seen);
9874         cache_tree_init(&pending);
9875         cache_tree_init(&nodes);
9876         cache_tree_init(&reada);
9877         cache_tree_init(&corrupt_blocks);
9878         extent_io_tree_init(&excluded_extents);
9879         INIT_LIST_HEAD(&dropping_trees);
9880         INIT_LIST_HEAD(&normal_trees);
9881
9882         if (repair) {
9883                 root->fs_info->excluded_extents = &excluded_extents;
9884                 root->fs_info->fsck_extent_cache = &extent_cache;
9885                 root->fs_info->free_extent_hook = free_extent_hook;
9886                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9887         }
9888
9889         bits_nr = 1024;
9890         bits = malloc(bits_nr * sizeof(struct block_info));
9891         if (!bits) {
9892                 perror("malloc");
9893                 exit(1);
9894         }
9895
9896         if (ctx.progress_enabled) {
9897                 ctx.tp = TASK_EXTENTS;
9898                 task_start(ctx.info);
9899         }
9900
9901 again:
9902         root1 = root->fs_info->tree_root;
9903         level = btrfs_header_level(root1->node);
9904         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9905                                     root1->node->start, 0, level, 0, NULL);
9906         if (ret < 0)
9907                 goto out;
9908         root1 = root->fs_info->chunk_root;
9909         level = btrfs_header_level(root1->node);
9910         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9911                                     root1->node->start, 0, level, 0, NULL);
9912         if (ret < 0)
9913                 goto out;
9914         btrfs_init_path(&path);
9915         key.offset = 0;
9916         key.objectid = 0;
9917         key.type = BTRFS_ROOT_ITEM_KEY;
9918         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9919                                         &key, &path, 0, 0);
9920         if (ret < 0)
9921                 goto out;
9922         while(1) {
9923                 leaf = path.nodes[0];
9924                 slot = path.slots[0];
9925                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9926                         ret = btrfs_next_leaf(root, &path);
9927                         if (ret != 0)
9928                                 break;
9929                         leaf = path.nodes[0];
9930                         slot = path.slots[0];
9931                 }
9932                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9933                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9934                         unsigned long offset;
9935                         u64 last_snapshot;
9936
9937                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9938                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9939                         last_snapshot = btrfs_root_last_snapshot(&ri);
9940                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9941                                 level = btrfs_root_level(&ri);
9942                                 ret = add_root_item_to_list(&normal_trees,
9943                                                 found_key.objectid,
9944                                                 btrfs_root_bytenr(&ri),
9945                                                 last_snapshot, level,
9946                                                 0, NULL);
9947                                 if (ret < 0)
9948                                         goto out;
9949                         } else {
9950                                 level = btrfs_root_level(&ri);
9951                                 objectid = found_key.objectid;
9952                                 btrfs_disk_key_to_cpu(&found_key,
9953                                                       &ri.drop_progress);
9954                                 ret = add_root_item_to_list(&dropping_trees,
9955                                                 objectid,
9956                                                 btrfs_root_bytenr(&ri),
9957                                                 last_snapshot, level,
9958                                                 ri.drop_level, &found_key);
9959                                 if (ret < 0)
9960                                         goto out;
9961                         }
9962                 }
9963                 path.slots[0]++;
9964         }
9965         btrfs_release_path(&path);
9966
9967         /*
9968          * check_block can return -EAGAIN if it fixes something, please keep
9969          * this in mind when dealing with return values from these functions, if
9970          * we get -EAGAIN we want to fall through and restart the loop.
9971          */
9972         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9973                                   &seen, &reada, &nodes, &extent_cache,
9974                                   &chunk_cache, &dev_cache, &block_group_cache,
9975                                   &dev_extent_cache);
9976         if (ret < 0) {
9977                 if (ret == -EAGAIN)
9978                         goto loop;
9979                 goto out;
9980         }
9981         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9982                                   &pending, &seen, &reada, &nodes,
9983                                   &extent_cache, &chunk_cache, &dev_cache,
9984                                   &block_group_cache, &dev_extent_cache);
9985         if (ret < 0) {
9986                 if (ret == -EAGAIN)
9987                         goto loop;
9988                 goto out;
9989         }
9990
9991         ret = check_chunks(&chunk_cache, &block_group_cache,
9992                            &dev_extent_cache, NULL, NULL, NULL, 0);
9993         if (ret) {
9994                 if (ret == -EAGAIN)
9995                         goto loop;
9996                 err = ret;
9997         }
9998
9999         ret = check_extent_refs(root, &extent_cache);
10000         if (ret < 0) {
10001                 if (ret == -EAGAIN)
10002                         goto loop;
10003                 goto out;
10004         }
10005
10006         ret = check_devices(&dev_cache, &dev_extent_cache);
10007         if (ret && err)
10008                 ret = err;
10009
10010 out:
10011         task_stop(ctx.info);
10012         if (repair) {
10013                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10014                 extent_io_tree_cleanup(&excluded_extents);
10015                 root->fs_info->fsck_extent_cache = NULL;
10016                 root->fs_info->free_extent_hook = NULL;
10017                 root->fs_info->corrupt_blocks = NULL;
10018                 root->fs_info->excluded_extents = NULL;
10019         }
10020         free(bits);
10021         free_chunk_cache_tree(&chunk_cache);
10022         free_device_cache_tree(&dev_cache);
10023         free_block_group_tree(&block_group_cache);
10024         free_device_extent_tree(&dev_extent_cache);
10025         free_extent_cache_tree(&seen);
10026         free_extent_cache_tree(&pending);
10027         free_extent_cache_tree(&reada);
10028         free_extent_cache_tree(&nodes);
10029         free_root_item_list(&normal_trees);
10030         free_root_item_list(&dropping_trees);
10031         return ret;
10032 loop:
10033         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10034         free_extent_cache_tree(&seen);
10035         free_extent_cache_tree(&pending);
10036         free_extent_cache_tree(&reada);
10037         free_extent_cache_tree(&nodes);
10038         free_chunk_cache_tree(&chunk_cache);
10039         free_block_group_tree(&block_group_cache);
10040         free_device_cache_tree(&dev_cache);
10041         free_device_extent_tree(&dev_extent_cache);
10042         free_extent_record_cache(&extent_cache);
10043         free_root_item_list(&normal_trees);
10044         free_root_item_list(&dropping_trees);
10045         extent_io_tree_cleanup(&excluded_extents);
10046         goto again;
10047 }
10048
10049 /*
10050  * Check backrefs of a tree block given by @bytenr or @eb.
10051  *
10052  * @root:       the root containing the @bytenr or @eb
10053  * @eb:         tree block extent buffer, can be NULL
10054  * @bytenr:     bytenr of the tree block to search
10055  * @level:      tree level of the tree block
10056  * @owner:      owner of the tree block
10057  *
10058  * Return >0 for any error found and output error message
10059  * Return 0 for no error found
10060  */
10061 static int check_tree_block_ref(struct btrfs_root *root,
10062                                 struct extent_buffer *eb, u64 bytenr,
10063                                 int level, u64 owner)
10064 {
10065         struct btrfs_key key;
10066         struct btrfs_root *extent_root = root->fs_info->extent_root;
10067         struct btrfs_path path;
10068         struct btrfs_extent_item *ei;
10069         struct btrfs_extent_inline_ref *iref;
10070         struct extent_buffer *leaf;
10071         unsigned long end;
10072         unsigned long ptr;
10073         int slot;
10074         int skinny_level;
10075         int type;
10076         u32 nodesize = root->fs_info->nodesize;
10077         u32 item_size;
10078         u64 offset;
10079         int tree_reloc_root = 0;
10080         int found_ref = 0;
10081         int err = 0;
10082         int ret;
10083
10084         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10085             btrfs_header_bytenr(root->node) == bytenr)
10086                 tree_reloc_root = 1;
10087
10088         btrfs_init_path(&path);
10089         key.objectid = bytenr;
10090         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10091                 key.type = BTRFS_METADATA_ITEM_KEY;
10092         else
10093                 key.type = BTRFS_EXTENT_ITEM_KEY;
10094         key.offset = (u64)-1;
10095
10096         /* Search for the backref in extent tree */
10097         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10098         if (ret < 0) {
10099                 err |= BACKREF_MISSING;
10100                 goto out;
10101         }
10102         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10103         if (ret) {
10104                 err |= BACKREF_MISSING;
10105                 goto out;
10106         }
10107
10108         leaf = path.nodes[0];
10109         slot = path.slots[0];
10110         btrfs_item_key_to_cpu(leaf, &key, slot);
10111
10112         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10113
10114         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10115                 skinny_level = (int)key.offset;
10116                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10117         } else {
10118                 struct btrfs_tree_block_info *info;
10119
10120                 info = (struct btrfs_tree_block_info *)(ei + 1);
10121                 skinny_level = btrfs_tree_block_level(leaf, info);
10122                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10123         }
10124
10125         if (eb) {
10126                 u64 header_gen;
10127                 u64 extent_gen;
10128
10129                 if (!(btrfs_extent_flags(leaf, ei) &
10130                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10131                         error(
10132                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10133                                 key.objectid, nodesize,
10134                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10135                         err = BACKREF_MISMATCH;
10136                 }
10137                 header_gen = btrfs_header_generation(eb);
10138                 extent_gen = btrfs_extent_generation(leaf, ei);
10139                 if (header_gen != extent_gen) {
10140                         error(
10141         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10142                                 key.objectid, nodesize, header_gen,
10143                                 extent_gen);
10144                         err = BACKREF_MISMATCH;
10145                 }
10146                 if (level != skinny_level) {
10147                         error(
10148                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10149                                 key.objectid, nodesize, level, skinny_level);
10150                         err = BACKREF_MISMATCH;
10151                 }
10152                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10153                         error(
10154                         "extent[%llu %u] is referred by other roots than %llu",
10155                                 key.objectid, nodesize, root->objectid);
10156                         err = BACKREF_MISMATCH;
10157                 }
10158         }
10159
10160         /*
10161          * Iterate the extent/metadata item to find the exact backref
10162          */
10163         item_size = btrfs_item_size_nr(leaf, slot);
10164         ptr = (unsigned long)iref;
10165         end = (unsigned long)ei + item_size;
10166         while (ptr < end) {
10167                 iref = (struct btrfs_extent_inline_ref *)ptr;
10168                 type = btrfs_extent_inline_ref_type(leaf, iref);
10169                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10170
10171                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10172                         (offset == root->objectid || offset == owner)) {
10173                         found_ref = 1;
10174                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10175                         /*
10176                          * Backref of tree reloc root points to itself, no need
10177                          * to check backref any more.
10178                          */
10179                         if (tree_reloc_root)
10180                                 found_ref = 1;
10181                         else
10182                         /* Check if the backref points to valid referencer */
10183                                 found_ref = !check_tree_block_ref(root, NULL,
10184                                                 offset, level + 1, owner);
10185                 }
10186
10187                 if (found_ref)
10188                         break;
10189                 ptr += btrfs_extent_inline_ref_size(type);
10190         }
10191
10192         /*
10193          * Inlined extent item doesn't have what we need, check
10194          * TREE_BLOCK_REF_KEY
10195          */
10196         if (!found_ref) {
10197                 btrfs_release_path(&path);
10198                 key.objectid = bytenr;
10199                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10200                 key.offset = root->objectid;
10201
10202                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10203                 if (!ret)
10204                         found_ref = 1;
10205         }
10206         if (!found_ref)
10207                 err |= BACKREF_MISSING;
10208 out:
10209         btrfs_release_path(&path);
10210         if (eb && (err & BACKREF_MISSING))
10211                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10212                         bytenr, nodesize, owner, level);
10213         return err;
10214 }
10215
10216 /*
10217  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10218  *
10219  * Return >0 any error found and output error message
10220  * Return 0 for no error found
10221  */
10222 static int check_extent_data_item(struct btrfs_root *root,
10223                                   struct extent_buffer *eb, int slot)
10224 {
10225         struct btrfs_file_extent_item *fi;
10226         struct btrfs_path path;
10227         struct btrfs_root *extent_root = root->fs_info->extent_root;
10228         struct btrfs_key fi_key;
10229         struct btrfs_key dbref_key;
10230         struct extent_buffer *leaf;
10231         struct btrfs_extent_item *ei;
10232         struct btrfs_extent_inline_ref *iref;
10233         struct btrfs_extent_data_ref *dref;
10234         u64 owner;
10235         u64 disk_bytenr;
10236         u64 disk_num_bytes;
10237         u64 extent_num_bytes;
10238         u64 extent_flags;
10239         u32 item_size;
10240         unsigned long end;
10241         unsigned long ptr;
10242         int type;
10243         u64 ref_root;
10244         int found_dbackref = 0;
10245         int err = 0;
10246         int ret;
10247
10248         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10249         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10250
10251         /* Nothing to check for hole and inline data extents */
10252         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10253             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10254                 return 0;
10255
10256         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10257         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10258         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10259
10260         /* Check unaligned disk_num_bytes and num_bytes */
10261         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10262                 error(
10263 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10264                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10265                         root->fs_info->sectorsize);
10266                 err |= BYTES_UNALIGNED;
10267         } else {
10268                 data_bytes_allocated += disk_num_bytes;
10269         }
10270         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10271                 error(
10272 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10273                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10274                         root->fs_info->sectorsize);
10275                 err |= BYTES_UNALIGNED;
10276         } else {
10277                 data_bytes_referenced += extent_num_bytes;
10278         }
10279         owner = btrfs_header_owner(eb);
10280
10281         /* Check the extent item of the file extent in extent tree */
10282         btrfs_init_path(&path);
10283         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10284         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10285         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10286
10287         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10288         if (ret)
10289                 goto out;
10290
10291         leaf = path.nodes[0];
10292         slot = path.slots[0];
10293         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10294
10295         extent_flags = btrfs_extent_flags(leaf, ei);
10296
10297         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10298                 error(
10299                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10300                     disk_bytenr, disk_num_bytes,
10301                     BTRFS_EXTENT_FLAG_DATA);
10302                 err |= BACKREF_MISMATCH;
10303         }
10304
10305         /* Check data backref inside that extent item */
10306         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10307         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10308         ptr = (unsigned long)iref;
10309         end = (unsigned long)ei + item_size;
10310         while (ptr < end) {
10311                 iref = (struct btrfs_extent_inline_ref *)ptr;
10312                 type = btrfs_extent_inline_ref_type(leaf, iref);
10313                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10314
10315                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10316                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10317                         if (ref_root == owner || ref_root == root->objectid)
10318                                 found_dbackref = 1;
10319                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10320                         found_dbackref = !check_tree_block_ref(root, NULL,
10321                                 btrfs_extent_inline_ref_offset(leaf, iref),
10322                                 0, owner);
10323                 }
10324
10325                 if (found_dbackref)
10326                         break;
10327                 ptr += btrfs_extent_inline_ref_size(type);
10328         }
10329
10330         if (!found_dbackref) {
10331                 btrfs_release_path(&path);
10332
10333                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10334                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10335                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10336                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10337                                 fi_key.objectid, fi_key.offset);
10338
10339                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10340                                         &dbref_key, &path, 0, 0);
10341                 if (!ret) {
10342                         found_dbackref = 1;
10343                         goto out;
10344                 }
10345
10346                 btrfs_release_path(&path);
10347
10348                 /*
10349                  * Neither inlined nor EXTENT_DATA_REF found, try
10350                  * SHARED_DATA_REF as last chance.
10351                  */
10352                 dbref_key.objectid = disk_bytenr;
10353                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10354                 dbref_key.offset = eb->start;
10355
10356                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10357                                         &dbref_key, &path, 0, 0);
10358                 if (!ret) {
10359                         found_dbackref = 1;
10360                         goto out;
10361                 }
10362         }
10363
10364 out:
10365         if (!found_dbackref)
10366                 err |= BACKREF_MISSING;
10367         btrfs_release_path(&path);
10368         if (err & BACKREF_MISSING) {
10369                 error("data extent[%llu %llu] backref lost",
10370                       disk_bytenr, disk_num_bytes);
10371         }
10372         return err;
10373 }
10374
10375 /*
10376  * Get real tree block level for the case like shared block
10377  * Return >= 0 as tree level
10378  * Return <0 for error
10379  */
10380 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10381 {
10382         struct extent_buffer *eb;
10383         struct btrfs_path path;
10384         struct btrfs_key key;
10385         struct btrfs_extent_item *ei;
10386         u64 flags;
10387         u64 transid;
10388         u8 backref_level;
10389         u8 header_level;
10390         int ret;
10391
10392         /* Search extent tree for extent generation and level */
10393         key.objectid = bytenr;
10394         key.type = BTRFS_METADATA_ITEM_KEY;
10395         key.offset = (u64)-1;
10396
10397         btrfs_init_path(&path);
10398         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10399         if (ret < 0)
10400                 goto release_out;
10401         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10402         if (ret < 0)
10403                 goto release_out;
10404         if (ret > 0) {
10405                 ret = -ENOENT;
10406                 goto release_out;
10407         }
10408
10409         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10410         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10411                             struct btrfs_extent_item);
10412         flags = btrfs_extent_flags(path.nodes[0], ei);
10413         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10414                 ret = -ENOENT;
10415                 goto release_out;
10416         }
10417
10418         /* Get transid for later read_tree_block() check */
10419         transid = btrfs_extent_generation(path.nodes[0], ei);
10420
10421         /* Get backref level as one source */
10422         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10423                 backref_level = key.offset;
10424         } else {
10425                 struct btrfs_tree_block_info *info;
10426
10427                 info = (struct btrfs_tree_block_info *)(ei + 1);
10428                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10429         }
10430         btrfs_release_path(&path);
10431
10432         /* Get level from tree block as an alternative source */
10433         eb = read_tree_block(fs_info, bytenr, transid);
10434         if (!extent_buffer_uptodate(eb)) {
10435                 free_extent_buffer(eb);
10436                 return -EIO;
10437         }
10438         header_level = btrfs_header_level(eb);
10439         free_extent_buffer(eb);
10440
10441         if (header_level != backref_level)
10442                 return -EIO;
10443         return header_level;
10444
10445 release_out:
10446         btrfs_release_path(&path);
10447         return ret;
10448 }
10449
10450 /*
10451  * Check if a tree block backref is valid (points to a valid tree block)
10452  * if level == -1, level will be resolved
10453  * Return >0 for any error found and print error message
10454  */
10455 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10456                                     u64 bytenr, int level)
10457 {
10458         struct btrfs_root *root;
10459         struct btrfs_key key;
10460         struct btrfs_path path;
10461         struct extent_buffer *eb;
10462         struct extent_buffer *node;
10463         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10464         int err = 0;
10465         int ret;
10466
10467         /* Query level for level == -1 special case */
10468         if (level == -1)
10469                 level = query_tree_block_level(fs_info, bytenr);
10470         if (level < 0) {
10471                 err |= REFERENCER_MISSING;
10472                 goto out;
10473         }
10474
10475         key.objectid = root_id;
10476         key.type = BTRFS_ROOT_ITEM_KEY;
10477         key.offset = (u64)-1;
10478
10479         root = btrfs_read_fs_root(fs_info, &key);
10480         if (IS_ERR(root)) {
10481                 err |= REFERENCER_MISSING;
10482                 goto out;
10483         }
10484
10485         /* Read out the tree block to get item/node key */
10486         eb = read_tree_block(fs_info, bytenr, 0);
10487         if (!extent_buffer_uptodate(eb)) {
10488                 err |= REFERENCER_MISSING;
10489                 free_extent_buffer(eb);
10490                 goto out;
10491         }
10492
10493         /* Empty tree, no need to check key */
10494         if (!btrfs_header_nritems(eb) && !level) {
10495                 free_extent_buffer(eb);
10496                 goto out;
10497         }
10498
10499         if (level)
10500                 btrfs_node_key_to_cpu(eb, &key, 0);
10501         else
10502                 btrfs_item_key_to_cpu(eb, &key, 0);
10503
10504         free_extent_buffer(eb);
10505
10506         btrfs_init_path(&path);
10507         path.lowest_level = level;
10508         /* Search with the first key, to ensure we can reach it */
10509         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10510         if (ret < 0) {
10511                 err |= REFERENCER_MISSING;
10512                 goto release_out;
10513         }
10514
10515         node = path.nodes[level];
10516         if (btrfs_header_bytenr(node) != bytenr) {
10517                 error(
10518         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10519                         bytenr, nodesize, bytenr,
10520                         btrfs_header_bytenr(node));
10521                 err |= REFERENCER_MISMATCH;
10522         }
10523         if (btrfs_header_level(node) != level) {
10524                 error(
10525         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10526                         bytenr, nodesize, level,
10527                         btrfs_header_level(node));
10528                 err |= REFERENCER_MISMATCH;
10529         }
10530
10531 release_out:
10532         btrfs_release_path(&path);
10533 out:
10534         if (err & REFERENCER_MISSING) {
10535                 if (level < 0)
10536                         error("extent [%llu %d] lost referencer (owner: %llu)",
10537                                 bytenr, nodesize, root_id);
10538                 else
10539                         error(
10540                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10541                                 bytenr, nodesize, root_id, level);
10542         }
10543
10544         return err;
10545 }
10546
10547 /*
10548  * Check if tree block @eb is tree reloc root.
10549  * Return 0 if it's not or any problem happens
10550  * Return 1 if it's a tree reloc root
10551  */
10552 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10553                                  struct extent_buffer *eb)
10554 {
10555         struct btrfs_root *tree_reloc_root;
10556         struct btrfs_key key;
10557         u64 bytenr = btrfs_header_bytenr(eb);
10558         u64 owner = btrfs_header_owner(eb);
10559         int ret = 0;
10560
10561         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10562         key.offset = owner;
10563         key.type = BTRFS_ROOT_ITEM_KEY;
10564
10565         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10566         if (IS_ERR(tree_reloc_root))
10567                 return 0;
10568
10569         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10570                 ret = 1;
10571         btrfs_free_fs_root(tree_reloc_root);
10572         return ret;
10573 }
10574
10575 /*
10576  * Check referencer for shared block backref
10577  * If level == -1, this function will resolve the level.
10578  */
10579 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10580                                      u64 parent, u64 bytenr, int level)
10581 {
10582         struct extent_buffer *eb;
10583         u32 nr;
10584         int found_parent = 0;
10585         int i;
10586
10587         eb = read_tree_block(fs_info, parent, 0);
10588         if (!extent_buffer_uptodate(eb))
10589                 goto out;
10590
10591         if (level == -1)
10592                 level = query_tree_block_level(fs_info, bytenr);
10593         if (level < 0)
10594                 goto out;
10595
10596         /* It's possible it's a tree reloc root */
10597         if (parent == bytenr) {
10598                 if (is_tree_reloc_root(fs_info, eb))
10599                         found_parent = 1;
10600                 goto out;
10601         }
10602
10603         if (level + 1 != btrfs_header_level(eb))
10604                 goto out;
10605
10606         nr = btrfs_header_nritems(eb);
10607         for (i = 0; i < nr; i++) {
10608                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10609                         found_parent = 1;
10610                         break;
10611                 }
10612         }
10613 out:
10614         free_extent_buffer(eb);
10615         if (!found_parent) {
10616                 error(
10617         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10618                         bytenr, fs_info->nodesize, parent, level);
10619                 return REFERENCER_MISSING;
10620         }
10621         return 0;
10622 }
10623
10624 /*
10625  * Check referencer for normal (inlined) data ref
10626  * If len == 0, it will be resolved by searching in extent tree
10627  */
10628 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10629                                      u64 root_id, u64 objectid, u64 offset,
10630                                      u64 bytenr, u64 len, u32 count)
10631 {
10632         struct btrfs_root *root;
10633         struct btrfs_root *extent_root = fs_info->extent_root;
10634         struct btrfs_key key;
10635         struct btrfs_path path;
10636         struct extent_buffer *leaf;
10637         struct btrfs_file_extent_item *fi;
10638         u32 found_count = 0;
10639         int slot;
10640         int ret = 0;
10641
10642         if (!len) {
10643                 key.objectid = bytenr;
10644                 key.type = BTRFS_EXTENT_ITEM_KEY;
10645                 key.offset = (u64)-1;
10646
10647                 btrfs_init_path(&path);
10648                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10649                 if (ret < 0)
10650                         goto out;
10651                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10652                 if (ret)
10653                         goto out;
10654                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10655                 if (key.objectid != bytenr ||
10656                     key.type != BTRFS_EXTENT_ITEM_KEY)
10657                         goto out;
10658                 len = key.offset;
10659                 btrfs_release_path(&path);
10660         }
10661         key.objectid = root_id;
10662         key.type = BTRFS_ROOT_ITEM_KEY;
10663         key.offset = (u64)-1;
10664         btrfs_init_path(&path);
10665
10666         root = btrfs_read_fs_root(fs_info, &key);
10667         if (IS_ERR(root))
10668                 goto out;
10669
10670         key.objectid = objectid;
10671         key.type = BTRFS_EXTENT_DATA_KEY;
10672         /*
10673          * It can be nasty as data backref offset is
10674          * file offset - file extent offset, which is smaller or
10675          * equal to original backref offset.  The only special case is
10676          * overflow.  So we need to special check and do further search.
10677          */
10678         key.offset = offset & (1ULL << 63) ? 0 : offset;
10679
10680         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10681         if (ret < 0)
10682                 goto out;
10683
10684         /*
10685          * Search afterwards to get correct one
10686          * NOTE: As we must do a comprehensive check on the data backref to
10687          * make sure the dref count also matches, we must iterate all file
10688          * extents for that inode.
10689          */
10690         while (1) {
10691                 leaf = path.nodes[0];
10692                 slot = path.slots[0];
10693
10694                 if (slot >= btrfs_header_nritems(leaf))
10695                         goto next;
10696                 btrfs_item_key_to_cpu(leaf, &key, slot);
10697                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10698                         break;
10699                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10700                 /*
10701                  * Except normal disk bytenr and disk num bytes, we still
10702                  * need to do extra check on dbackref offset as
10703                  * dbackref offset = file_offset - file_extent_offset
10704                  */
10705                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10706                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10707                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10708                     offset)
10709                         found_count++;
10710
10711 next:
10712                 ret = btrfs_next_item(root, &path);
10713                 if (ret)
10714                         break;
10715         }
10716 out:
10717         btrfs_release_path(&path);
10718         if (found_count != count) {
10719                 error(
10720 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10721                         bytenr, len, root_id, objectid, offset, count, found_count);
10722                 return REFERENCER_MISSING;
10723         }
10724         return 0;
10725 }
10726
10727 /*
10728  * Check if the referencer of a shared data backref exists
10729  */
10730 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10731                                      u64 parent, u64 bytenr)
10732 {
10733         struct extent_buffer *eb;
10734         struct btrfs_key key;
10735         struct btrfs_file_extent_item *fi;
10736         u32 nr;
10737         int found_parent = 0;
10738         int i;
10739
10740         eb = read_tree_block(fs_info, parent, 0);
10741         if (!extent_buffer_uptodate(eb))
10742                 goto out;
10743
10744         nr = btrfs_header_nritems(eb);
10745         for (i = 0; i < nr; i++) {
10746                 btrfs_item_key_to_cpu(eb, &key, i);
10747                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10748                         continue;
10749
10750                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10751                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10752                         continue;
10753
10754                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10755                         found_parent = 1;
10756                         break;
10757                 }
10758         }
10759
10760 out:
10761         free_extent_buffer(eb);
10762         if (!found_parent) {
10763                 error("shared extent %llu referencer lost (parent: %llu)",
10764                         bytenr, parent);
10765                 return REFERENCER_MISSING;
10766         }
10767         return 0;
10768 }
10769
10770 /*
10771  * This function will check a given extent item, including its backref and
10772  * itself (like crossing stripe boundary and type)
10773  *
10774  * Since we don't use extent_record anymore, introduce new error bit
10775  */
10776 static int check_extent_item(struct btrfs_fs_info *fs_info,
10777                              struct extent_buffer *eb, int slot)
10778 {
10779         struct btrfs_extent_item *ei;
10780         struct btrfs_extent_inline_ref *iref;
10781         struct btrfs_extent_data_ref *dref;
10782         unsigned long end;
10783         unsigned long ptr;
10784         int type;
10785         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10786         u32 item_size = btrfs_item_size_nr(eb, slot);
10787         u64 flags;
10788         u64 offset;
10789         int metadata = 0;
10790         int level;
10791         struct btrfs_key key;
10792         int ret;
10793         int err = 0;
10794
10795         btrfs_item_key_to_cpu(eb, &key, slot);
10796         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10797                 bytes_used += key.offset;
10798         else
10799                 bytes_used += nodesize;
10800
10801         if (item_size < sizeof(*ei)) {
10802                 /*
10803                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10804                  * old thing when on disk format is still un-determined.
10805                  * No need to care about it anymore
10806                  */
10807                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10808                 return -ENOTTY;
10809         }
10810
10811         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10812         flags = btrfs_extent_flags(eb, ei);
10813
10814         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10815                 metadata = 1;
10816         if (metadata && check_crossing_stripes(global_info, key.objectid,
10817                                                eb->len)) {
10818                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10819                       key.objectid, key.objectid + nodesize);
10820                 err |= CROSSING_STRIPE_BOUNDARY;
10821         }
10822
10823         ptr = (unsigned long)(ei + 1);
10824
10825         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10826                 /* Old EXTENT_ITEM metadata */
10827                 struct btrfs_tree_block_info *info;
10828
10829                 info = (struct btrfs_tree_block_info *)ptr;
10830                 level = btrfs_tree_block_level(eb, info);
10831                 ptr += sizeof(struct btrfs_tree_block_info);
10832         } else {
10833                 /* New METADATA_ITEM */
10834                 level = key.offset;
10835         }
10836         end = (unsigned long)ei + item_size;
10837
10838 next:
10839         /* Reached extent item end normally */
10840         if (ptr == end)
10841                 goto out;
10842
10843         /* Beyond extent item end, wrong item size */
10844         if (ptr > end) {
10845                 err |= ITEM_SIZE_MISMATCH;
10846                 error("extent item at bytenr %llu slot %d has wrong size",
10847                         eb->start, slot);
10848                 goto out;
10849         }
10850
10851         /* Now check every backref in this extent item */
10852         iref = (struct btrfs_extent_inline_ref *)ptr;
10853         type = btrfs_extent_inline_ref_type(eb, iref);
10854         offset = btrfs_extent_inline_ref_offset(eb, iref);
10855         switch (type) {
10856         case BTRFS_TREE_BLOCK_REF_KEY:
10857                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10858                                                level);
10859                 err |= ret;
10860                 break;
10861         case BTRFS_SHARED_BLOCK_REF_KEY:
10862                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10863                                                  level);
10864                 err |= ret;
10865                 break;
10866         case BTRFS_EXTENT_DATA_REF_KEY:
10867                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10868                 ret = check_extent_data_backref(fs_info,
10869                                 btrfs_extent_data_ref_root(eb, dref),
10870                                 btrfs_extent_data_ref_objectid(eb, dref),
10871                                 btrfs_extent_data_ref_offset(eb, dref),
10872                                 key.objectid, key.offset,
10873                                 btrfs_extent_data_ref_count(eb, dref));
10874                 err |= ret;
10875                 break;
10876         case BTRFS_SHARED_DATA_REF_KEY:
10877                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10878                 err |= ret;
10879                 break;
10880         default:
10881                 error("extent[%llu %d %llu] has unknown ref type: %d",
10882                         key.objectid, key.type, key.offset, type);
10883                 err |= UNKNOWN_TYPE;
10884                 goto out;
10885         }
10886
10887         ptr += btrfs_extent_inline_ref_size(type);
10888         goto next;
10889
10890 out:
10891         return err;
10892 }
10893
10894 /*
10895  * Check if a dev extent item is referred correctly by its chunk
10896  */
10897 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10898                                  struct extent_buffer *eb, int slot)
10899 {
10900         struct btrfs_root *chunk_root = fs_info->chunk_root;
10901         struct btrfs_dev_extent *ptr;
10902         struct btrfs_path path;
10903         struct btrfs_key chunk_key;
10904         struct btrfs_key devext_key;
10905         struct btrfs_chunk *chunk;
10906         struct extent_buffer *l;
10907         int num_stripes;
10908         u64 length;
10909         int i;
10910         int found_chunk = 0;
10911         int ret;
10912
10913         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10914         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10915         length = btrfs_dev_extent_length(eb, ptr);
10916
10917         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10918         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10919         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10920
10921         btrfs_init_path(&path);
10922         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10923         if (ret)
10924                 goto out;
10925
10926         l = path.nodes[0];
10927         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10928         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10929                                       chunk_key.offset);
10930         if (ret < 0)
10931                 goto out;
10932
10933         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10934                 goto out;
10935
10936         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10937         for (i = 0; i < num_stripes; i++) {
10938                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10939                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10940
10941                 if (devid == devext_key.objectid &&
10942                     offset == devext_key.offset) {
10943                         found_chunk = 1;
10944                         break;
10945                 }
10946         }
10947 out:
10948         btrfs_release_path(&path);
10949         if (!found_chunk) {
10950                 error(
10951                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10952                         devext_key.objectid, devext_key.offset, length);
10953                 return REFERENCER_MISSING;
10954         }
10955         return 0;
10956 }
10957
10958 /*
10959  * Check if the used space is correct with the dev item
10960  */
10961 static int check_dev_item(struct btrfs_fs_info *fs_info,
10962                           struct extent_buffer *eb, int slot)
10963 {
10964         struct btrfs_root *dev_root = fs_info->dev_root;
10965         struct btrfs_dev_item *dev_item;
10966         struct btrfs_path path;
10967         struct btrfs_key key;
10968         struct btrfs_dev_extent *ptr;
10969         u64 dev_id;
10970         u64 used;
10971         u64 total = 0;
10972         int ret;
10973
10974         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10975         dev_id = btrfs_device_id(eb, dev_item);
10976         used = btrfs_device_bytes_used(eb, dev_item);
10977
10978         key.objectid = dev_id;
10979         key.type = BTRFS_DEV_EXTENT_KEY;
10980         key.offset = 0;
10981
10982         btrfs_init_path(&path);
10983         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10984         if (ret < 0) {
10985                 btrfs_item_key_to_cpu(eb, &key, slot);
10986                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10987                         key.objectid, key.type, key.offset);
10988                 btrfs_release_path(&path);
10989                 return REFERENCER_MISSING;
10990         }
10991
10992         /* Iterate dev_extents to calculate the used space of a device */
10993         while (1) {
10994                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10995                         goto next;
10996
10997                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10998                 if (key.objectid > dev_id)
10999                         break;
11000                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11001                         goto next;
11002
11003                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11004                                      struct btrfs_dev_extent);
11005                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11006 next:
11007                 ret = btrfs_next_item(dev_root, &path);
11008                 if (ret)
11009                         break;
11010         }
11011         btrfs_release_path(&path);
11012
11013         if (used != total) {
11014                 btrfs_item_key_to_cpu(eb, &key, slot);
11015                 error(
11016 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11017                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11018                         BTRFS_DEV_EXTENT_KEY, dev_id);
11019                 return ACCOUNTING_MISMATCH;
11020         }
11021         return 0;
11022 }
11023
11024 /*
11025  * Check a block group item with its referener (chunk) and its used space
11026  * with extent/metadata item
11027  */
11028 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11029                                   struct extent_buffer *eb, int slot)
11030 {
11031         struct btrfs_root *extent_root = fs_info->extent_root;
11032         struct btrfs_root *chunk_root = fs_info->chunk_root;
11033         struct btrfs_block_group_item *bi;
11034         struct btrfs_block_group_item bg_item;
11035         struct btrfs_path path;
11036         struct btrfs_key bg_key;
11037         struct btrfs_key chunk_key;
11038         struct btrfs_key extent_key;
11039         struct btrfs_chunk *chunk;
11040         struct extent_buffer *leaf;
11041         struct btrfs_extent_item *ei;
11042         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11043         u64 flags;
11044         u64 bg_flags;
11045         u64 used;
11046         u64 total = 0;
11047         int ret;
11048         int err = 0;
11049
11050         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11051         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11052         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11053         used = btrfs_block_group_used(&bg_item);
11054         bg_flags = btrfs_block_group_flags(&bg_item);
11055
11056         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11057         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11058         chunk_key.offset = bg_key.objectid;
11059
11060         btrfs_init_path(&path);
11061         /* Search for the referencer chunk */
11062         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11063         if (ret) {
11064                 error(
11065                 "block group[%llu %llu] did not find the related chunk item",
11066                         bg_key.objectid, bg_key.offset);
11067                 err |= REFERENCER_MISSING;
11068         } else {
11069                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11070                                         struct btrfs_chunk);
11071                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11072                                                 bg_key.offset) {
11073                         error(
11074         "block group[%llu %llu] related chunk item length does not match",
11075                                 bg_key.objectid, bg_key.offset);
11076                         err |= REFERENCER_MISMATCH;
11077                 }
11078         }
11079         btrfs_release_path(&path);
11080
11081         /* Search from the block group bytenr */
11082         extent_key.objectid = bg_key.objectid;
11083         extent_key.type = 0;
11084         extent_key.offset = 0;
11085
11086         btrfs_init_path(&path);
11087         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11088         if (ret < 0)
11089                 goto out;
11090
11091         /* Iterate extent tree to account used space */
11092         while (1) {
11093                 leaf = path.nodes[0];
11094
11095                 /* Search slot can point to the last item beyond leaf nritems */
11096                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11097                         goto next;
11098
11099                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11100                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11101                         break;
11102
11103                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11104                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11105                         goto next;
11106                 if (extent_key.objectid < bg_key.objectid)
11107                         goto next;
11108
11109                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11110                         total += nodesize;
11111                 else
11112                         total += extent_key.offset;
11113
11114                 ei = btrfs_item_ptr(leaf, path.slots[0],
11115                                     struct btrfs_extent_item);
11116                 flags = btrfs_extent_flags(leaf, ei);
11117                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11118                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11119                                 error(
11120                         "bad extent[%llu, %llu) type mismatch with chunk",
11121                                         extent_key.objectid,
11122                                         extent_key.objectid + extent_key.offset);
11123                                 err |= CHUNK_TYPE_MISMATCH;
11124                         }
11125                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11126                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11127                                     BTRFS_BLOCK_GROUP_METADATA))) {
11128                                 error(
11129                         "bad extent[%llu, %llu) type mismatch with chunk",
11130                                         extent_key.objectid,
11131                                         extent_key.objectid + nodesize);
11132                                 err |= CHUNK_TYPE_MISMATCH;
11133                         }
11134                 }
11135 next:
11136                 ret = btrfs_next_item(extent_root, &path);
11137                 if (ret)
11138                         break;
11139         }
11140
11141 out:
11142         btrfs_release_path(&path);
11143
11144         if (total != used) {
11145                 error(
11146                 "block group[%llu %llu] used %llu but extent items used %llu",
11147                         bg_key.objectid, bg_key.offset, used, total);
11148                 err |= ACCOUNTING_MISMATCH;
11149         }
11150         return err;
11151 }
11152
11153 /*
11154  * Check a chunk item.
11155  * Including checking all referred dev_extents and block group
11156  */
11157 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11158                             struct extent_buffer *eb, int slot)
11159 {
11160         struct btrfs_root *extent_root = fs_info->extent_root;
11161         struct btrfs_root *dev_root = fs_info->dev_root;
11162         struct btrfs_path path;
11163         struct btrfs_key chunk_key;
11164         struct btrfs_key bg_key;
11165         struct btrfs_key devext_key;
11166         struct btrfs_chunk *chunk;
11167         struct extent_buffer *leaf;
11168         struct btrfs_block_group_item *bi;
11169         struct btrfs_block_group_item bg_item;
11170         struct btrfs_dev_extent *ptr;
11171         u64 length;
11172         u64 chunk_end;
11173         u64 stripe_len;
11174         u64 type;
11175         int num_stripes;
11176         u64 offset;
11177         u64 objectid;
11178         int i;
11179         int ret;
11180         int err = 0;
11181
11182         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11183         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11184         length = btrfs_chunk_length(eb, chunk);
11185         chunk_end = chunk_key.offset + length;
11186         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11187                                       chunk_key.offset);
11188         if (ret < 0) {
11189                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11190                         chunk_end);
11191                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11192                 goto out;
11193         }
11194         type = btrfs_chunk_type(eb, chunk);
11195
11196         bg_key.objectid = chunk_key.offset;
11197         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11198         bg_key.offset = length;
11199
11200         btrfs_init_path(&path);
11201         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11202         if (ret) {
11203                 error(
11204                 "chunk[%llu %llu) did not find the related block group item",
11205                         chunk_key.offset, chunk_end);
11206                 err |= REFERENCER_MISSING;
11207         } else{
11208                 leaf = path.nodes[0];
11209                 bi = btrfs_item_ptr(leaf, path.slots[0],
11210                                     struct btrfs_block_group_item);
11211                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11212                                    sizeof(bg_item));
11213                 if (btrfs_block_group_flags(&bg_item) != type) {
11214                         error(
11215 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11216                                 chunk_key.offset, chunk_end, type,
11217                                 btrfs_block_group_flags(&bg_item));
11218                         err |= REFERENCER_MISSING;
11219                 }
11220         }
11221
11222         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11223         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11224         for (i = 0; i < num_stripes; i++) {
11225                 btrfs_release_path(&path);
11226                 btrfs_init_path(&path);
11227                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11228                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11229                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11230
11231                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11232                                         0, 0);
11233                 if (ret)
11234                         goto not_match_dev;
11235
11236                 leaf = path.nodes[0];
11237                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11238                                      struct btrfs_dev_extent);
11239                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11240                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11241                 if (objectid != chunk_key.objectid ||
11242                     offset != chunk_key.offset ||
11243                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11244                         goto not_match_dev;
11245                 continue;
11246 not_match_dev:
11247                 err |= BACKREF_MISSING;
11248                 error(
11249                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11250                         chunk_key.objectid, chunk_end, i);
11251                 continue;
11252         }
11253         btrfs_release_path(&path);
11254 out:
11255         return err;
11256 }
11257
11258 /*
11259  * Main entry function to check known items and update related accounting info
11260  */
11261 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11262 {
11263         struct btrfs_fs_info *fs_info = root->fs_info;
11264         struct btrfs_key key;
11265         int slot = 0;
11266         int type;
11267         struct btrfs_extent_data_ref *dref;
11268         int ret;
11269         int err = 0;
11270
11271 next:
11272         btrfs_item_key_to_cpu(eb, &key, slot);
11273         type = key.type;
11274
11275         switch (type) {
11276         case BTRFS_EXTENT_DATA_KEY:
11277                 ret = check_extent_data_item(root, eb, slot);
11278                 err |= ret;
11279                 break;
11280         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11281                 ret = check_block_group_item(fs_info, eb, slot);
11282                 err |= ret;
11283                 break;
11284         case BTRFS_DEV_ITEM_KEY:
11285                 ret = check_dev_item(fs_info, eb, slot);
11286                 err |= ret;
11287                 break;
11288         case BTRFS_CHUNK_ITEM_KEY:
11289                 ret = check_chunk_item(fs_info, eb, slot);
11290                 err |= ret;
11291                 break;
11292         case BTRFS_DEV_EXTENT_KEY:
11293                 ret = check_dev_extent_item(fs_info, eb, slot);
11294                 err |= ret;
11295                 break;
11296         case BTRFS_EXTENT_ITEM_KEY:
11297         case BTRFS_METADATA_ITEM_KEY:
11298                 ret = check_extent_item(fs_info, eb, slot);
11299                 err |= ret;
11300                 break;
11301         case BTRFS_EXTENT_CSUM_KEY:
11302                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11303                 break;
11304         case BTRFS_TREE_BLOCK_REF_KEY:
11305                 ret = check_tree_block_backref(fs_info, key.offset,
11306                                                key.objectid, -1);
11307                 err |= ret;
11308                 break;
11309         case BTRFS_EXTENT_DATA_REF_KEY:
11310                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11311                 ret = check_extent_data_backref(fs_info,
11312                                 btrfs_extent_data_ref_root(eb, dref),
11313                                 btrfs_extent_data_ref_objectid(eb, dref),
11314                                 btrfs_extent_data_ref_offset(eb, dref),
11315                                 key.objectid, 0,
11316                                 btrfs_extent_data_ref_count(eb, dref));
11317                 err |= ret;
11318                 break;
11319         case BTRFS_SHARED_BLOCK_REF_KEY:
11320                 ret = check_shared_block_backref(fs_info, key.offset,
11321                                                  key.objectid, -1);
11322                 err |= ret;
11323                 break;
11324         case BTRFS_SHARED_DATA_REF_KEY:
11325                 ret = check_shared_data_backref(fs_info, key.offset,
11326                                                 key.objectid);
11327                 err |= ret;
11328                 break;
11329         default:
11330                 break;
11331         }
11332
11333         if (++slot < btrfs_header_nritems(eb))
11334                 goto next;
11335
11336         return err;
11337 }
11338
11339 /*
11340  * Helper function for later fs/subvol tree check.  To determine if a tree
11341  * block should be checked.
11342  * This function will ensure only the direct referencer with lowest rootid to
11343  * check a fs/subvolume tree block.
11344  *
11345  * Backref check at extent tree would detect errors like missing subvolume
11346  * tree, so we can do aggressive check to reduce duplicated checks.
11347  */
11348 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11349 {
11350         struct btrfs_root *extent_root = root->fs_info->extent_root;
11351         struct btrfs_key key;
11352         struct btrfs_path path;
11353         struct extent_buffer *leaf;
11354         int slot;
11355         struct btrfs_extent_item *ei;
11356         unsigned long ptr;
11357         unsigned long end;
11358         int type;
11359         u32 item_size;
11360         u64 offset;
11361         struct btrfs_extent_inline_ref *iref;
11362         int ret;
11363
11364         btrfs_init_path(&path);
11365         key.objectid = btrfs_header_bytenr(eb);
11366         key.type = BTRFS_METADATA_ITEM_KEY;
11367         key.offset = (u64)-1;
11368
11369         /*
11370          * Any failure in backref resolving means we can't determine
11371          * whom the tree block belongs to.
11372          * So in that case, we need to check that tree block
11373          */
11374         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11375         if (ret < 0)
11376                 goto need_check;
11377
11378         ret = btrfs_previous_extent_item(extent_root, &path,
11379                                          btrfs_header_bytenr(eb));
11380         if (ret)
11381                 goto need_check;
11382
11383         leaf = path.nodes[0];
11384         slot = path.slots[0];
11385         btrfs_item_key_to_cpu(leaf, &key, slot);
11386         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11387
11388         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11389                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11390         } else {
11391                 struct btrfs_tree_block_info *info;
11392
11393                 info = (struct btrfs_tree_block_info *)(ei + 1);
11394                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11395         }
11396
11397         item_size = btrfs_item_size_nr(leaf, slot);
11398         ptr = (unsigned long)iref;
11399         end = (unsigned long)ei + item_size;
11400         while (ptr < end) {
11401                 iref = (struct btrfs_extent_inline_ref *)ptr;
11402                 type = btrfs_extent_inline_ref_type(leaf, iref);
11403                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11404
11405                 /*
11406                  * We only check the tree block if current root is
11407                  * the lowest referencer of it.
11408                  */
11409                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11410                     offset < root->objectid) {
11411                         btrfs_release_path(&path);
11412                         return 0;
11413                 }
11414
11415                 ptr += btrfs_extent_inline_ref_size(type);
11416         }
11417         /*
11418          * Normally we should also check keyed tree block ref, but that may be
11419          * very time consuming.  Inlined ref should already make us skip a lot
11420          * of refs now.  So skip search keyed tree block ref.
11421          */
11422
11423 need_check:
11424         btrfs_release_path(&path);
11425         return 1;
11426 }
11427
11428 /*
11429  * Traversal function for tree block. We will do:
11430  * 1) Skip shared fs/subvolume tree blocks
11431  * 2) Update related bytes accounting
11432  * 3) Pre-order traversal
11433  */
11434 static int traverse_tree_block(struct btrfs_root *root,
11435                                 struct extent_buffer *node)
11436 {
11437         struct extent_buffer *eb;
11438         struct btrfs_key key;
11439         struct btrfs_key drop_key;
11440         int level;
11441         u64 nr;
11442         int i;
11443         int err = 0;
11444         int ret;
11445
11446         /*
11447          * Skip shared fs/subvolume tree block, in that case they will
11448          * be checked by referencer with lowest rootid
11449          */
11450         if (is_fstree(root->objectid) && !should_check(root, node))
11451                 return 0;
11452
11453         /* Update bytes accounting */
11454         total_btree_bytes += node->len;
11455         if (fs_root_objectid(btrfs_header_owner(node)))
11456                 total_fs_tree_bytes += node->len;
11457         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11458                 total_extent_tree_bytes += node->len;
11459         if (!found_old_backref &&
11460             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11461             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11462             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11463                 found_old_backref = 1;
11464
11465         /* pre-order tranversal, check itself first */
11466         level = btrfs_header_level(node);
11467         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11468                                    btrfs_header_level(node),
11469                                    btrfs_header_owner(node));
11470         err |= ret;
11471         if (err)
11472                 error(
11473         "check %s failed root %llu bytenr %llu level %d, force continue check",
11474                         level ? "node":"leaf", root->objectid,
11475                         btrfs_header_bytenr(node), btrfs_header_level(node));
11476
11477         if (!level) {
11478                 btree_space_waste += btrfs_leaf_free_space(root, node);
11479                 ret = check_leaf_items(root, node);
11480                 err |= ret;
11481                 return err;
11482         }
11483
11484         nr = btrfs_header_nritems(node);
11485         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11486         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11487                 sizeof(struct btrfs_key_ptr);
11488
11489         /* Then check all its children */
11490         for (i = 0; i < nr; i++) {
11491                 u64 blocknr = btrfs_node_blockptr(node, i);
11492
11493                 btrfs_node_key_to_cpu(node, &key, i);
11494                 if (level == root->root_item.drop_level &&
11495                     is_dropped_key(&key, &drop_key))
11496                         continue;
11497
11498                 /*
11499                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11500                  * to call the function itself.
11501                  */
11502                 eb = read_tree_block(root->fs_info, blocknr, 0);
11503                 if (extent_buffer_uptodate(eb)) {
11504                         ret = traverse_tree_block(root, eb);
11505                         err |= ret;
11506                 }
11507                 free_extent_buffer(eb);
11508         }
11509
11510         return err;
11511 }
11512
11513 /*
11514  * Low memory usage version check_chunks_and_extents.
11515  */
11516 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11517 {
11518         struct btrfs_path path;
11519         struct btrfs_key key;
11520         struct btrfs_root *root1;
11521         struct btrfs_root *cur_root;
11522         int err = 0;
11523         int ret;
11524
11525         root1 = root->fs_info->chunk_root;
11526         ret = traverse_tree_block(root1, root1->node);
11527         err |= ret;
11528
11529         root1 = root->fs_info->tree_root;
11530         ret = traverse_tree_block(root1, root1->node);
11531         err |= ret;
11532
11533         btrfs_init_path(&path);
11534         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11535         key.offset = 0;
11536         key.type = BTRFS_ROOT_ITEM_KEY;
11537
11538         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11539         if (ret) {
11540                 error("cannot find extent treet in tree_root");
11541                 goto out;
11542         }
11543
11544         while (1) {
11545                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11546                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11547                         goto next;
11548                 key.offset = (u64)-1;
11549
11550                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11551                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11552                                         &key);
11553                 else
11554                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11555                 if (IS_ERR(cur_root) || !cur_root) {
11556                         error("failed to read tree: %lld", key.objectid);
11557                         goto next;
11558                 }
11559
11560                 ret = traverse_tree_block(cur_root, cur_root->node);
11561                 err |= ret;
11562
11563                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11564                         btrfs_free_fs_root(cur_root);
11565 next:
11566                 ret = btrfs_next_item(root1, &path);
11567                 if (ret)
11568                         goto out;
11569         }
11570
11571 out:
11572         btrfs_release_path(&path);
11573         return err;
11574 }
11575
11576 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11577                            struct btrfs_root *root, int overwrite)
11578 {
11579         struct extent_buffer *c;
11580         struct extent_buffer *old = root->node;
11581         int level;
11582         int ret;
11583         struct btrfs_disk_key disk_key = {0,0,0};
11584
11585         level = 0;
11586
11587         if (overwrite) {
11588                 c = old;
11589                 extent_buffer_get(c);
11590                 goto init;
11591         }
11592         c = btrfs_alloc_free_block(trans, root,
11593                                    root->fs_info->nodesize,
11594                                    root->root_key.objectid,
11595                                    &disk_key, level, 0, 0);
11596         if (IS_ERR(c)) {
11597                 c = old;
11598                 extent_buffer_get(c);
11599                 overwrite = 1;
11600         }
11601 init:
11602         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11603         btrfs_set_header_level(c, level);
11604         btrfs_set_header_bytenr(c, c->start);
11605         btrfs_set_header_generation(c, trans->transid);
11606         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11607         btrfs_set_header_owner(c, root->root_key.objectid);
11608
11609         write_extent_buffer(c, root->fs_info->fsid,
11610                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11611
11612         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11613                             btrfs_header_chunk_tree_uuid(c),
11614                             BTRFS_UUID_SIZE);
11615
11616         btrfs_mark_buffer_dirty(c);
11617         /*
11618          * this case can happen in the following case:
11619          *
11620          * 1.overwrite previous root.
11621          *
11622          * 2.reinit reloc data root, this is because we skip pin
11623          * down reloc data tree before which means we can allocate
11624          * same block bytenr here.
11625          */
11626         if (old->start == c->start) {
11627                 btrfs_set_root_generation(&root->root_item,
11628                                           trans->transid);
11629                 root->root_item.level = btrfs_header_level(root->node);
11630                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11631                                         &root->root_key, &root->root_item);
11632                 if (ret) {
11633                         free_extent_buffer(c);
11634                         return ret;
11635                 }
11636         }
11637         free_extent_buffer(old);
11638         root->node = c;
11639         add_root_to_dirty_list(root);
11640         return 0;
11641 }
11642
11643 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11644                                 struct extent_buffer *eb, int tree_root)
11645 {
11646         struct extent_buffer *tmp;
11647         struct btrfs_root_item *ri;
11648         struct btrfs_key key;
11649         u64 bytenr;
11650         int level = btrfs_header_level(eb);
11651         int nritems;
11652         int ret;
11653         int i;
11654
11655         /*
11656          * If we have pinned this block before, don't pin it again.
11657          * This can not only avoid forever loop with broken filesystem
11658          * but also give us some speedups.
11659          */
11660         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11661                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11662                 return 0;
11663
11664         btrfs_pin_extent(fs_info, eb->start, eb->len);
11665
11666         nritems = btrfs_header_nritems(eb);
11667         for (i = 0; i < nritems; i++) {
11668                 if (level == 0) {
11669                         btrfs_item_key_to_cpu(eb, &key, i);
11670                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11671                                 continue;
11672                         /* Skip the extent root and reloc roots */
11673                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11674                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11675                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11676                                 continue;
11677                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11678                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11679
11680                         /*
11681                          * If at any point we start needing the real root we
11682                          * will have to build a stump root for the root we are
11683                          * in, but for now this doesn't actually use the root so
11684                          * just pass in extent_root.
11685                          */
11686                         tmp = read_tree_block(fs_info, bytenr, 0);
11687                         if (!extent_buffer_uptodate(tmp)) {
11688                                 fprintf(stderr, "Error reading root block\n");
11689                                 return -EIO;
11690                         }
11691                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11692                         free_extent_buffer(tmp);
11693                         if (ret)
11694                                 return ret;
11695                 } else {
11696                         bytenr = btrfs_node_blockptr(eb, i);
11697
11698                         /* If we aren't the tree root don't read the block */
11699                         if (level == 1 && !tree_root) {
11700                                 btrfs_pin_extent(fs_info, bytenr,
11701                                                 fs_info->nodesize);
11702                                 continue;
11703                         }
11704
11705                         tmp = read_tree_block(fs_info, bytenr, 0);
11706                         if (!extent_buffer_uptodate(tmp)) {
11707                                 fprintf(stderr, "Error reading tree block\n");
11708                                 return -EIO;
11709                         }
11710                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11711                         free_extent_buffer(tmp);
11712                         if (ret)
11713                                 return ret;
11714                 }
11715         }
11716
11717         return 0;
11718 }
11719
11720 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11721 {
11722         int ret;
11723
11724         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11725         if (ret)
11726                 return ret;
11727
11728         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11729 }
11730
11731 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11732 {
11733         struct btrfs_block_group_cache *cache;
11734         struct btrfs_path path;
11735         struct extent_buffer *leaf;
11736         struct btrfs_chunk *chunk;
11737         struct btrfs_key key;
11738         int ret;
11739         u64 start;
11740
11741         btrfs_init_path(&path);
11742         key.objectid = 0;
11743         key.type = BTRFS_CHUNK_ITEM_KEY;
11744         key.offset = 0;
11745         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11746         if (ret < 0) {
11747                 btrfs_release_path(&path);
11748                 return ret;
11749         }
11750
11751         /*
11752          * We do this in case the block groups were screwed up and had alloc
11753          * bits that aren't actually set on the chunks.  This happens with
11754          * restored images every time and could happen in real life I guess.
11755          */
11756         fs_info->avail_data_alloc_bits = 0;
11757         fs_info->avail_metadata_alloc_bits = 0;
11758         fs_info->avail_system_alloc_bits = 0;
11759
11760         /* First we need to create the in-memory block groups */
11761         while (1) {
11762                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11763                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11764                         if (ret < 0) {
11765                                 btrfs_release_path(&path);
11766                                 return ret;
11767                         }
11768                         if (ret) {
11769                                 ret = 0;
11770                                 break;
11771                         }
11772                 }
11773                 leaf = path.nodes[0];
11774                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11775                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11776                         path.slots[0]++;
11777                         continue;
11778                 }
11779
11780                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11781                 btrfs_add_block_group(fs_info, 0,
11782                                       btrfs_chunk_type(leaf, chunk),
11783                                       key.objectid, key.offset,
11784                                       btrfs_chunk_length(leaf, chunk));
11785                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11786                                  key.offset + btrfs_chunk_length(leaf, chunk));
11787                 path.slots[0]++;
11788         }
11789         start = 0;
11790         while (1) {
11791                 cache = btrfs_lookup_first_block_group(fs_info, start);
11792                 if (!cache)
11793                         break;
11794                 cache->cached = 1;
11795                 start = cache->key.objectid + cache->key.offset;
11796         }
11797
11798         btrfs_release_path(&path);
11799         return 0;
11800 }
11801
11802 static int reset_balance(struct btrfs_trans_handle *trans,
11803                          struct btrfs_fs_info *fs_info)
11804 {
11805         struct btrfs_root *root = fs_info->tree_root;
11806         struct btrfs_path path;
11807         struct extent_buffer *leaf;
11808         struct btrfs_key key;
11809         int del_slot, del_nr = 0;
11810         int ret;
11811         int found = 0;
11812
11813         btrfs_init_path(&path);
11814         key.objectid = BTRFS_BALANCE_OBJECTID;
11815         key.type = BTRFS_BALANCE_ITEM_KEY;
11816         key.offset = 0;
11817         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11818         if (ret) {
11819                 if (ret > 0)
11820                         ret = 0;
11821                 if (!ret)
11822                         goto reinit_data_reloc;
11823                 else
11824                         goto out;
11825         }
11826
11827         ret = btrfs_del_item(trans, root, &path);
11828         if (ret)
11829                 goto out;
11830         btrfs_release_path(&path);
11831
11832         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11833         key.type = BTRFS_ROOT_ITEM_KEY;
11834         key.offset = 0;
11835         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11836         if (ret < 0)
11837                 goto out;
11838         while (1) {
11839                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11840                         if (!found)
11841                                 break;
11842
11843                         if (del_nr) {
11844                                 ret = btrfs_del_items(trans, root, &path,
11845                                                       del_slot, del_nr);
11846                                 del_nr = 0;
11847                                 if (ret)
11848                                         goto out;
11849                         }
11850                         key.offset++;
11851                         btrfs_release_path(&path);
11852
11853                         found = 0;
11854                         ret = btrfs_search_slot(trans, root, &key, &path,
11855                                                 -1, 1);
11856                         if (ret < 0)
11857                                 goto out;
11858                         continue;
11859                 }
11860                 found = 1;
11861                 leaf = path.nodes[0];
11862                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11863                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11864                         break;
11865                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11866                         path.slots[0]++;
11867                         continue;
11868                 }
11869                 if (!del_nr) {
11870                         del_slot = path.slots[0];
11871                         del_nr = 1;
11872                 } else {
11873                         del_nr++;
11874                 }
11875                 path.slots[0]++;
11876         }
11877
11878         if (del_nr) {
11879                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11880                 if (ret)
11881                         goto out;
11882         }
11883         btrfs_release_path(&path);
11884
11885 reinit_data_reloc:
11886         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11887         key.type = BTRFS_ROOT_ITEM_KEY;
11888         key.offset = (u64)-1;
11889         root = btrfs_read_fs_root(fs_info, &key);
11890         if (IS_ERR(root)) {
11891                 fprintf(stderr, "Error reading data reloc tree\n");
11892                 ret = PTR_ERR(root);
11893                 goto out;
11894         }
11895         record_root_in_trans(trans, root);
11896         ret = btrfs_fsck_reinit_root(trans, root, 0);
11897         if (ret)
11898                 goto out;
11899         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11900 out:
11901         btrfs_release_path(&path);
11902         return ret;
11903 }
11904
11905 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11906                               struct btrfs_fs_info *fs_info)
11907 {
11908         u64 start = 0;
11909         int ret;
11910
11911         /*
11912          * The only reason we don't do this is because right now we're just
11913          * walking the trees we find and pinning down their bytes, we don't look
11914          * at any of the leaves.  In order to do mixed groups we'd have to check
11915          * the leaves of any fs roots and pin down the bytes for any file
11916          * extents we find.  Not hard but why do it if we don't have to?
11917          */
11918         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11919                 fprintf(stderr, "We don't support re-initing the extent tree "
11920                         "for mixed block groups yet, please notify a btrfs "
11921                         "developer you want to do this so they can add this "
11922                         "functionality.\n");
11923                 return -EINVAL;
11924         }
11925
11926         /*
11927          * first we need to walk all of the trees except the extent tree and pin
11928          * down the bytes that are in use so we don't overwrite any existing
11929          * metadata.
11930          */
11931         ret = pin_metadata_blocks(fs_info);
11932         if (ret) {
11933                 fprintf(stderr, "error pinning down used bytes\n");
11934                 return ret;
11935         }
11936
11937         /*
11938          * Need to drop all the block groups since we're going to recreate all
11939          * of them again.
11940          */
11941         btrfs_free_block_groups(fs_info);
11942         ret = reset_block_groups(fs_info);
11943         if (ret) {
11944                 fprintf(stderr, "error resetting the block groups\n");
11945                 return ret;
11946         }
11947
11948         /* Ok we can allocate now, reinit the extent root */
11949         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11950         if (ret) {
11951                 fprintf(stderr, "extent root initialization failed\n");
11952                 /*
11953                  * When the transaction code is updated we should end the
11954                  * transaction, but for now progs only knows about commit so
11955                  * just return an error.
11956                  */
11957                 return ret;
11958         }
11959
11960         /*
11961          * Now we have all the in-memory block groups setup so we can make
11962          * allocations properly, and the metadata we care about is safe since we
11963          * pinned all of it above.
11964          */
11965         while (1) {
11966                 struct btrfs_block_group_cache *cache;
11967
11968                 cache = btrfs_lookup_first_block_group(fs_info, start);
11969                 if (!cache)
11970                         break;
11971                 start = cache->key.objectid + cache->key.offset;
11972                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11973                                         &cache->key, &cache->item,
11974                                         sizeof(cache->item));
11975                 if (ret) {
11976                         fprintf(stderr, "Error adding block group\n");
11977                         return ret;
11978                 }
11979                 btrfs_extent_post_op(trans, fs_info->extent_root);
11980         }
11981
11982         ret = reset_balance(trans, fs_info);
11983         if (ret)
11984                 fprintf(stderr, "error resetting the pending balance\n");
11985
11986         return ret;
11987 }
11988
11989 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11990 {
11991         struct btrfs_path path;
11992         struct btrfs_trans_handle *trans;
11993         struct btrfs_key key;
11994         int ret;
11995
11996         printf("Recowing metadata block %llu\n", eb->start);
11997         key.objectid = btrfs_header_owner(eb);
11998         key.type = BTRFS_ROOT_ITEM_KEY;
11999         key.offset = (u64)-1;
12000
12001         root = btrfs_read_fs_root(root->fs_info, &key);
12002         if (IS_ERR(root)) {
12003                 fprintf(stderr, "Couldn't find owner root %llu\n",
12004                         key.objectid);
12005                 return PTR_ERR(root);
12006         }
12007
12008         trans = btrfs_start_transaction(root, 1);
12009         if (IS_ERR(trans))
12010                 return PTR_ERR(trans);
12011
12012         btrfs_init_path(&path);
12013         path.lowest_level = btrfs_header_level(eb);
12014         if (path.lowest_level)
12015                 btrfs_node_key_to_cpu(eb, &key, 0);
12016         else
12017                 btrfs_item_key_to_cpu(eb, &key, 0);
12018
12019         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12020         btrfs_commit_transaction(trans, root);
12021         btrfs_release_path(&path);
12022         return ret;
12023 }
12024
12025 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12026 {
12027         struct btrfs_path path;
12028         struct btrfs_trans_handle *trans;
12029         struct btrfs_key key;
12030         int ret;
12031
12032         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12033                bad->key.type, bad->key.offset);
12034         key.objectid = bad->root_id;
12035         key.type = BTRFS_ROOT_ITEM_KEY;
12036         key.offset = (u64)-1;
12037
12038         root = btrfs_read_fs_root(root->fs_info, &key);
12039         if (IS_ERR(root)) {
12040                 fprintf(stderr, "Couldn't find owner root %llu\n",
12041                         key.objectid);
12042                 return PTR_ERR(root);
12043         }
12044
12045         trans = btrfs_start_transaction(root, 1);
12046         if (IS_ERR(trans))
12047                 return PTR_ERR(trans);
12048
12049         btrfs_init_path(&path);
12050         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12051         if (ret) {
12052                 if (ret > 0)
12053                         ret = 0;
12054                 goto out;
12055         }
12056         ret = btrfs_del_item(trans, root, &path);
12057 out:
12058         btrfs_commit_transaction(trans, root);
12059         btrfs_release_path(&path);
12060         return ret;
12061 }
12062
12063 static int zero_log_tree(struct btrfs_root *root)
12064 {
12065         struct btrfs_trans_handle *trans;
12066         int ret;
12067
12068         trans = btrfs_start_transaction(root, 1);
12069         if (IS_ERR(trans)) {
12070                 ret = PTR_ERR(trans);
12071                 return ret;
12072         }
12073         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12074         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12075         ret = btrfs_commit_transaction(trans, root);
12076         return ret;
12077 }
12078
12079 static int populate_csum(struct btrfs_trans_handle *trans,
12080                          struct btrfs_root *csum_root, char *buf, u64 start,
12081                          u64 len)
12082 {
12083         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12084         u64 offset = 0;
12085         u64 sectorsize;
12086         int ret = 0;
12087
12088         while (offset < len) {
12089                 sectorsize = fs_info->sectorsize;
12090                 ret = read_extent_data(fs_info, buf, start + offset,
12091                                        &sectorsize, 0);
12092                 if (ret)
12093                         break;
12094                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12095                                             start + offset, buf, sectorsize);
12096                 if (ret)
12097                         break;
12098                 offset += sectorsize;
12099         }
12100         return ret;
12101 }
12102
12103 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12104                                       struct btrfs_root *csum_root,
12105                                       struct btrfs_root *cur_root)
12106 {
12107         struct btrfs_path path;
12108         struct btrfs_key key;
12109         struct extent_buffer *node;
12110         struct btrfs_file_extent_item *fi;
12111         char *buf = NULL;
12112         u64 start = 0;
12113         u64 len = 0;
12114         int slot = 0;
12115         int ret = 0;
12116
12117         buf = malloc(cur_root->fs_info->sectorsize);
12118         if (!buf)
12119                 return -ENOMEM;
12120
12121         btrfs_init_path(&path);
12122         key.objectid = 0;
12123         key.offset = 0;
12124         key.type = 0;
12125         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12126         if (ret < 0)
12127                 goto out;
12128         /* Iterate all regular file extents and fill its csum */
12129         while (1) {
12130                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12131
12132                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12133                         goto next;
12134                 node = path.nodes[0];
12135                 slot = path.slots[0];
12136                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12137                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12138                         goto next;
12139                 start = btrfs_file_extent_disk_bytenr(node, fi);
12140                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12141
12142                 ret = populate_csum(trans, csum_root, buf, start, len);
12143                 if (ret == -EEXIST)
12144                         ret = 0;
12145                 if (ret < 0)
12146                         goto out;
12147 next:
12148                 /*
12149                  * TODO: if next leaf is corrupted, jump to nearest next valid
12150                  * leaf.
12151                  */
12152                 ret = btrfs_next_item(cur_root, &path);
12153                 if (ret < 0)
12154                         goto out;
12155                 if (ret > 0) {
12156                         ret = 0;
12157                         goto out;
12158                 }
12159         }
12160
12161 out:
12162         btrfs_release_path(&path);
12163         free(buf);
12164         return ret;
12165 }
12166
12167 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12168                                   struct btrfs_root *csum_root)
12169 {
12170         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12171         struct btrfs_path path;
12172         struct btrfs_root *tree_root = fs_info->tree_root;
12173         struct btrfs_root *cur_root;
12174         struct extent_buffer *node;
12175         struct btrfs_key key;
12176         int slot = 0;
12177         int ret = 0;
12178
12179         btrfs_init_path(&path);
12180         key.objectid = BTRFS_FS_TREE_OBJECTID;
12181         key.offset = 0;
12182         key.type = BTRFS_ROOT_ITEM_KEY;
12183         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12184         if (ret < 0)
12185                 goto out;
12186         if (ret > 0) {
12187                 ret = -ENOENT;
12188                 goto out;
12189         }
12190
12191         while (1) {
12192                 node = path.nodes[0];
12193                 slot = path.slots[0];
12194                 btrfs_item_key_to_cpu(node, &key, slot);
12195                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12196                         goto out;
12197                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12198                         goto next;
12199                 if (!is_fstree(key.objectid))
12200                         goto next;
12201                 key.offset = (u64)-1;
12202
12203                 cur_root = btrfs_read_fs_root(fs_info, &key);
12204                 if (IS_ERR(cur_root) || !cur_root) {
12205                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12206                                 key.objectid);
12207                         goto out;
12208                 }
12209                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12210                                 cur_root);
12211                 if (ret < 0)
12212                         goto out;
12213 next:
12214                 ret = btrfs_next_item(tree_root, &path);
12215                 if (ret > 0) {
12216                         ret = 0;
12217                         goto out;
12218                 }
12219                 if (ret < 0)
12220                         goto out;
12221         }
12222
12223 out:
12224         btrfs_release_path(&path);
12225         return ret;
12226 }
12227
12228 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12229                                       struct btrfs_root *csum_root)
12230 {
12231         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12232         struct btrfs_path path;
12233         struct btrfs_extent_item *ei;
12234         struct extent_buffer *leaf;
12235         char *buf;
12236         struct btrfs_key key;
12237         int ret;
12238
12239         btrfs_init_path(&path);
12240         key.objectid = 0;
12241         key.type = BTRFS_EXTENT_ITEM_KEY;
12242         key.offset = 0;
12243         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12244         if (ret < 0) {
12245                 btrfs_release_path(&path);
12246                 return ret;
12247         }
12248
12249         buf = malloc(csum_root->fs_info->sectorsize);
12250         if (!buf) {
12251                 btrfs_release_path(&path);
12252                 return -ENOMEM;
12253         }
12254
12255         while (1) {
12256                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12257                         ret = btrfs_next_leaf(extent_root, &path);
12258                         if (ret < 0)
12259                                 break;
12260                         if (ret) {
12261                                 ret = 0;
12262                                 break;
12263                         }
12264                 }
12265                 leaf = path.nodes[0];
12266
12267                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12268                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12269                         path.slots[0]++;
12270                         continue;
12271                 }
12272
12273                 ei = btrfs_item_ptr(leaf, path.slots[0],
12274                                     struct btrfs_extent_item);
12275                 if (!(btrfs_extent_flags(leaf, ei) &
12276                       BTRFS_EXTENT_FLAG_DATA)) {
12277                         path.slots[0]++;
12278                         continue;
12279                 }
12280
12281                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12282                                     key.offset);
12283                 if (ret)
12284                         break;
12285                 path.slots[0]++;
12286         }
12287
12288         btrfs_release_path(&path);
12289         free(buf);
12290         return ret;
12291 }
12292
12293 /*
12294  * Recalculate the csum and put it into the csum tree.
12295  *
12296  * Extent tree init will wipe out all the extent info, so in that case, we
12297  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12298  * will use fs/subvol trees to init the csum tree.
12299  */
12300 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12301                           struct btrfs_root *csum_root,
12302                           int search_fs_tree)
12303 {
12304         if (search_fs_tree)
12305                 return fill_csum_tree_from_fs(trans, csum_root);
12306         else
12307                 return fill_csum_tree_from_extent(trans, csum_root);
12308 }
12309
12310 static void free_roots_info_cache(void)
12311 {
12312         if (!roots_info_cache)
12313                 return;
12314
12315         while (!cache_tree_empty(roots_info_cache)) {
12316                 struct cache_extent *entry;
12317                 struct root_item_info *rii;
12318
12319                 entry = first_cache_extent(roots_info_cache);
12320                 if (!entry)
12321                         break;
12322                 remove_cache_extent(roots_info_cache, entry);
12323                 rii = container_of(entry, struct root_item_info, cache_extent);
12324                 free(rii);
12325         }
12326
12327         free(roots_info_cache);
12328         roots_info_cache = NULL;
12329 }
12330
12331 static int build_roots_info_cache(struct btrfs_fs_info *info)
12332 {
12333         int ret = 0;
12334         struct btrfs_key key;
12335         struct extent_buffer *leaf;
12336         struct btrfs_path path;
12337
12338         if (!roots_info_cache) {
12339                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12340                 if (!roots_info_cache)
12341                         return -ENOMEM;
12342                 cache_tree_init(roots_info_cache);
12343         }
12344
12345         btrfs_init_path(&path);
12346         key.objectid = 0;
12347         key.type = BTRFS_EXTENT_ITEM_KEY;
12348         key.offset = 0;
12349         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12350         if (ret < 0)
12351                 goto out;
12352         leaf = path.nodes[0];
12353
12354         while (1) {
12355                 struct btrfs_key found_key;
12356                 struct btrfs_extent_item *ei;
12357                 struct btrfs_extent_inline_ref *iref;
12358                 int slot = path.slots[0];
12359                 int type;
12360                 u64 flags;
12361                 u64 root_id;
12362                 u8 level;
12363                 struct cache_extent *entry;
12364                 struct root_item_info *rii;
12365
12366                 if (slot >= btrfs_header_nritems(leaf)) {
12367                         ret = btrfs_next_leaf(info->extent_root, &path);
12368                         if (ret < 0) {
12369                                 break;
12370                         } else if (ret) {
12371                                 ret = 0;
12372                                 break;
12373                         }
12374                         leaf = path.nodes[0];
12375                         slot = path.slots[0];
12376                 }
12377
12378                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12379
12380                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12381                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12382                         goto next;
12383
12384                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12385                 flags = btrfs_extent_flags(leaf, ei);
12386
12387                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12388                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12389                         goto next;
12390
12391                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12392                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12393                         level = found_key.offset;
12394                 } else {
12395                         struct btrfs_tree_block_info *binfo;
12396
12397                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12398                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12399                         level = btrfs_tree_block_level(leaf, binfo);
12400                 }
12401
12402                 /*
12403                  * For a root extent, it must be of the following type and the
12404                  * first (and only one) iref in the item.
12405                  */
12406                 type = btrfs_extent_inline_ref_type(leaf, iref);
12407                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12408                         goto next;
12409
12410                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12411                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12412                 if (!entry) {
12413                         rii = malloc(sizeof(struct root_item_info));
12414                         if (!rii) {
12415                                 ret = -ENOMEM;
12416                                 goto out;
12417                         }
12418                         rii->cache_extent.start = root_id;
12419                         rii->cache_extent.size = 1;
12420                         rii->level = (u8)-1;
12421                         entry = &rii->cache_extent;
12422                         ret = insert_cache_extent(roots_info_cache, entry);
12423                         ASSERT(ret == 0);
12424                 } else {
12425                         rii = container_of(entry, struct root_item_info,
12426                                            cache_extent);
12427                 }
12428
12429                 ASSERT(rii->cache_extent.start == root_id);
12430                 ASSERT(rii->cache_extent.size == 1);
12431
12432                 if (level > rii->level || rii->level == (u8)-1) {
12433                         rii->level = level;
12434                         rii->bytenr = found_key.objectid;
12435                         rii->gen = btrfs_extent_generation(leaf, ei);
12436                         rii->node_count = 1;
12437                 } else if (level == rii->level) {
12438                         rii->node_count++;
12439                 }
12440 next:
12441                 path.slots[0]++;
12442         }
12443
12444 out:
12445         btrfs_release_path(&path);
12446
12447         return ret;
12448 }
12449
12450 static int maybe_repair_root_item(struct btrfs_path *path,
12451                                   const struct btrfs_key *root_key,
12452                                   const int read_only_mode)
12453 {
12454         const u64 root_id = root_key->objectid;
12455         struct cache_extent *entry;
12456         struct root_item_info *rii;
12457         struct btrfs_root_item ri;
12458         unsigned long offset;
12459
12460         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12461         if (!entry) {
12462                 fprintf(stderr,
12463                         "Error: could not find extent items for root %llu\n",
12464                         root_key->objectid);
12465                 return -ENOENT;
12466         }
12467
12468         rii = container_of(entry, struct root_item_info, cache_extent);
12469         ASSERT(rii->cache_extent.start == root_id);
12470         ASSERT(rii->cache_extent.size == 1);
12471
12472         if (rii->node_count != 1) {
12473                 fprintf(stderr,
12474                         "Error: could not find btree root extent for root %llu\n",
12475                         root_id);
12476                 return -ENOENT;
12477         }
12478
12479         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12480         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12481
12482         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12483             btrfs_root_level(&ri) != rii->level ||
12484             btrfs_root_generation(&ri) != rii->gen) {
12485
12486                 /*
12487                  * If we're in repair mode but our caller told us to not update
12488                  * the root item, i.e. just check if it needs to be updated, don't
12489                  * print this message, since the caller will call us again shortly
12490                  * for the same root item without read only mode (the caller will
12491                  * open a transaction first).
12492                  */
12493                 if (!(read_only_mode && repair))
12494                         fprintf(stderr,
12495                                 "%sroot item for root %llu,"
12496                                 " current bytenr %llu, current gen %llu, current level %u,"
12497                                 " new bytenr %llu, new gen %llu, new level %u\n",
12498                                 (read_only_mode ? "" : "fixing "),
12499                                 root_id,
12500                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12501                                 btrfs_root_level(&ri),
12502                                 rii->bytenr, rii->gen, rii->level);
12503
12504                 if (btrfs_root_generation(&ri) > rii->gen) {
12505                         fprintf(stderr,
12506                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12507                                 root_id, btrfs_root_generation(&ri), rii->gen);
12508                         return -EINVAL;
12509                 }
12510
12511                 if (!read_only_mode) {
12512                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12513                         btrfs_set_root_level(&ri, rii->level);
12514                         btrfs_set_root_generation(&ri, rii->gen);
12515                         write_extent_buffer(path->nodes[0], &ri,
12516                                             offset, sizeof(ri));
12517                 }
12518
12519                 return 1;
12520         }
12521
12522         return 0;
12523 }
12524
12525 /*
12526  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12527  * caused read-only snapshots to be corrupted if they were created at a moment
12528  * when the source subvolume/snapshot had orphan items. The issue was that the
12529  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12530  * node instead of the post orphan cleanup root node.
12531  * So this function, and its callees, just detects and fixes those cases. Even
12532  * though the regression was for read-only snapshots, this function applies to
12533  * any snapshot/subvolume root.
12534  * This must be run before any other repair code - not doing it so, makes other
12535  * repair code delete or modify backrefs in the extent tree for example, which
12536  * will result in an inconsistent fs after repairing the root items.
12537  */
12538 static int repair_root_items(struct btrfs_fs_info *info)
12539 {
12540         struct btrfs_path path;
12541         struct btrfs_key key;
12542         struct extent_buffer *leaf;
12543         struct btrfs_trans_handle *trans = NULL;
12544         int ret = 0;
12545         int bad_roots = 0;
12546         int need_trans = 0;
12547
12548         btrfs_init_path(&path);
12549
12550         ret = build_roots_info_cache(info);
12551         if (ret)
12552                 goto out;
12553
12554         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12555         key.type = BTRFS_ROOT_ITEM_KEY;
12556         key.offset = 0;
12557
12558 again:
12559         /*
12560          * Avoid opening and committing transactions if a leaf doesn't have
12561          * any root items that need to be fixed, so that we avoid rotating
12562          * backup roots unnecessarily.
12563          */
12564         if (need_trans) {
12565                 trans = btrfs_start_transaction(info->tree_root, 1);
12566                 if (IS_ERR(trans)) {
12567                         ret = PTR_ERR(trans);
12568                         goto out;
12569                 }
12570         }
12571
12572         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12573                                 0, trans ? 1 : 0);
12574         if (ret < 0)
12575                 goto out;
12576         leaf = path.nodes[0];
12577
12578         while (1) {
12579                 struct btrfs_key found_key;
12580
12581                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12582                         int no_more_keys = find_next_key(&path, &key);
12583
12584                         btrfs_release_path(&path);
12585                         if (trans) {
12586                                 ret = btrfs_commit_transaction(trans,
12587                                                                info->tree_root);
12588                                 trans = NULL;
12589                                 if (ret < 0)
12590                                         goto out;
12591                         }
12592                         need_trans = 0;
12593                         if (no_more_keys)
12594                                 break;
12595                         goto again;
12596                 }
12597
12598                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12599
12600                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12601                         goto next;
12602                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12603                         goto next;
12604
12605                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12606                 if (ret < 0)
12607                         goto out;
12608                 if (ret) {
12609                         if (!trans && repair) {
12610                                 need_trans = 1;
12611                                 key = found_key;
12612                                 btrfs_release_path(&path);
12613                                 goto again;
12614                         }
12615                         bad_roots++;
12616                 }
12617 next:
12618                 path.slots[0]++;
12619         }
12620         ret = 0;
12621 out:
12622         free_roots_info_cache();
12623         btrfs_release_path(&path);
12624         if (trans)
12625                 btrfs_commit_transaction(trans, info->tree_root);
12626         if (ret < 0)
12627                 return ret;
12628
12629         return bad_roots;
12630 }
12631
12632 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12633 {
12634         struct btrfs_trans_handle *trans;
12635         struct btrfs_block_group_cache *bg_cache;
12636         u64 current = 0;
12637         int ret = 0;
12638
12639         /* Clear all free space cache inodes and its extent data */
12640         while (1) {
12641                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12642                 if (!bg_cache)
12643                         break;
12644                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12645                 if (ret < 0)
12646                         return ret;
12647                 current = bg_cache->key.objectid + bg_cache->key.offset;
12648         }
12649
12650         /* Don't forget to set cache_generation to -1 */
12651         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12652         if (IS_ERR(trans)) {
12653                 error("failed to update super block cache generation");
12654                 return PTR_ERR(trans);
12655         }
12656         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12657         btrfs_commit_transaction(trans, fs_info->tree_root);
12658
12659         return ret;
12660 }
12661
12662 const char * const cmd_check_usage[] = {
12663         "btrfs check [options] <device>",
12664         "Check structural integrity of a filesystem (unmounted).",
12665         "Check structural integrity of an unmounted filesystem. Verify internal",
12666         "trees' consistency and item connectivity. In the repair mode try to",
12667         "fix the problems found. ",
12668         "WARNING: the repair mode is considered dangerous",
12669         "",
12670         "-s|--super <superblock>     use this superblock copy",
12671         "-b|--backup                 use the first valid backup root copy",
12672         "--repair                    try to repair the filesystem",
12673         "--readonly                  run in read-only mode (default)",
12674         "--init-csum-tree            create a new CRC tree",
12675         "--init-extent-tree          create a new extent tree",
12676         "--mode <MODE>               allows choice of memory/IO trade-offs",
12677         "                            where MODE is one of:",
12678         "                            original - read inodes and extents to memory (requires",
12679         "                                       more memory, does less IO)",
12680         "                            lowmem   - try to use less memory but read blocks again",
12681         "                                       when needed",
12682         "--check-data-csum           verify checksums of data blocks",
12683         "-Q|--qgroup-report          print a report on qgroup consistency",
12684         "-E|--subvol-extents <subvolid>",
12685         "                            print subvolume extents and sharing state",
12686         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12687         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12688         "-p|--progress               indicate progress",
12689         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12690         NULL
12691 };
12692
12693 int cmd_check(int argc, char **argv)
12694 {
12695         struct cache_tree root_cache;
12696         struct btrfs_root *root;
12697         struct btrfs_fs_info *info;
12698         u64 bytenr = 0;
12699         u64 subvolid = 0;
12700         u64 tree_root_bytenr = 0;
12701         u64 chunk_root_bytenr = 0;
12702         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12703         int ret;
12704         int err = 0;
12705         u64 num;
12706         int init_csum_tree = 0;
12707         int readonly = 0;
12708         int clear_space_cache = 0;
12709         int qgroup_report = 0;
12710         int qgroups_repaired = 0;
12711         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12712
12713         while(1) {
12714                 int c;
12715                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12716                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12717                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12718                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12719                 static const struct option long_options[] = {
12720                         { "super", required_argument, NULL, 's' },
12721                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12722                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12723                         { "init-csum-tree", no_argument, NULL,
12724                                 GETOPT_VAL_INIT_CSUM },
12725                         { "init-extent-tree", no_argument, NULL,
12726                                 GETOPT_VAL_INIT_EXTENT },
12727                         { "check-data-csum", no_argument, NULL,
12728                                 GETOPT_VAL_CHECK_CSUM },
12729                         { "backup", no_argument, NULL, 'b' },
12730                         { "subvol-extents", required_argument, NULL, 'E' },
12731                         { "qgroup-report", no_argument, NULL, 'Q' },
12732                         { "tree-root", required_argument, NULL, 'r' },
12733                         { "chunk-root", required_argument, NULL,
12734                                 GETOPT_VAL_CHUNK_TREE },
12735                         { "progress", no_argument, NULL, 'p' },
12736                         { "mode", required_argument, NULL,
12737                                 GETOPT_VAL_MODE },
12738                         { "clear-space-cache", required_argument, NULL,
12739                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12740                         { NULL, 0, NULL, 0}
12741                 };
12742
12743                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12744                 if (c < 0)
12745                         break;
12746                 switch(c) {
12747                         case 'a': /* ignored */ break;
12748                         case 'b':
12749                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12750                                 break;
12751                         case 's':
12752                                 num = arg_strtou64(optarg);
12753                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12754                                         error(
12755                                         "super mirror should be less than %d",
12756                                                 BTRFS_SUPER_MIRROR_MAX);
12757                                         exit(1);
12758                                 }
12759                                 bytenr = btrfs_sb_offset(((int)num));
12760                                 printf("using SB copy %llu, bytenr %llu\n", num,
12761                                        (unsigned long long)bytenr);
12762                                 break;
12763                         case 'Q':
12764                                 qgroup_report = 1;
12765                                 break;
12766                         case 'E':
12767                                 subvolid = arg_strtou64(optarg);
12768                                 break;
12769                         case 'r':
12770                                 tree_root_bytenr = arg_strtou64(optarg);
12771                                 break;
12772                         case GETOPT_VAL_CHUNK_TREE:
12773                                 chunk_root_bytenr = arg_strtou64(optarg);
12774                                 break;
12775                         case 'p':
12776                                 ctx.progress_enabled = true;
12777                                 break;
12778                         case '?':
12779                         case 'h':
12780                                 usage(cmd_check_usage);
12781                         case GETOPT_VAL_REPAIR:
12782                                 printf("enabling repair mode\n");
12783                                 repair = 1;
12784                                 ctree_flags |= OPEN_CTREE_WRITES;
12785                                 break;
12786                         case GETOPT_VAL_READONLY:
12787                                 readonly = 1;
12788                                 break;
12789                         case GETOPT_VAL_INIT_CSUM:
12790                                 printf("Creating a new CRC tree\n");
12791                                 init_csum_tree = 1;
12792                                 repair = 1;
12793                                 ctree_flags |= OPEN_CTREE_WRITES;
12794                                 break;
12795                         case GETOPT_VAL_INIT_EXTENT:
12796                                 init_extent_tree = 1;
12797                                 ctree_flags |= (OPEN_CTREE_WRITES |
12798                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12799                                 repair = 1;
12800                                 break;
12801                         case GETOPT_VAL_CHECK_CSUM:
12802                                 check_data_csum = 1;
12803                                 break;
12804                         case GETOPT_VAL_MODE:
12805                                 check_mode = parse_check_mode(optarg);
12806                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12807                                         error("unknown mode: %s", optarg);
12808                                         exit(1);
12809                                 }
12810                                 break;
12811                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12812                                 if (strcmp(optarg, "v1") == 0) {
12813                                         clear_space_cache = 1;
12814                                 } else if (strcmp(optarg, "v2") == 0) {
12815                                         clear_space_cache = 2;
12816                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12817                                 } else {
12818                                         error(
12819                 "invalid argument to --clear-space-cache, must be v1 or v2");
12820                                         exit(1);
12821                                 }
12822                                 ctree_flags |= OPEN_CTREE_WRITES;
12823                                 break;
12824                 }
12825         }
12826
12827         if (check_argc_exact(argc - optind, 1))
12828                 usage(cmd_check_usage);
12829
12830         if (ctx.progress_enabled) {
12831                 ctx.tp = TASK_NOTHING;
12832                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12833         }
12834
12835         /* This check is the only reason for --readonly to exist */
12836         if (readonly && repair) {
12837                 error("repair options are not compatible with --readonly");
12838                 exit(1);
12839         }
12840
12841         /*
12842          * Not supported yet
12843          */
12844         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12845                 error("low memory mode doesn't support repair yet");
12846                 exit(1);
12847         }
12848
12849         radix_tree_init();
12850         cache_tree_init(&root_cache);
12851
12852         if((ret = check_mounted(argv[optind])) < 0) {
12853                 error("could not check mount status: %s", strerror(-ret));
12854                 err |= !!ret;
12855                 goto err_out;
12856         } else if(ret) {
12857                 error("%s is currently mounted, aborting", argv[optind]);
12858                 ret = -EBUSY;
12859                 err |= !!ret;
12860                 goto err_out;
12861         }
12862
12863         /* only allow partial opening under repair mode */
12864         if (repair)
12865                 ctree_flags |= OPEN_CTREE_PARTIAL;
12866
12867         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12868                                   chunk_root_bytenr, ctree_flags);
12869         if (!info) {
12870                 error("cannot open file system");
12871                 ret = -EIO;
12872                 err |= !!ret;
12873                 goto err_out;
12874         }
12875
12876         global_info = info;
12877         root = info->fs_root;
12878         if (clear_space_cache == 1) {
12879                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12880                         error(
12881                 "free space cache v2 detected, use --clear-space-cache v2");
12882                         ret = 1;
12883                         goto close_out;
12884                 }
12885                 printf("Clearing free space cache\n");
12886                 ret = clear_free_space_cache(info);
12887                 if (ret) {
12888                         error("failed to clear free space cache");
12889                         ret = 1;
12890                 } else {
12891                         printf("Free space cache cleared\n");
12892                 }
12893                 goto close_out;
12894         } else if (clear_space_cache == 2) {
12895                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12896                         printf("no free space cache v2 to clear\n");
12897                         ret = 0;
12898                         goto close_out;
12899                 }
12900                 printf("Clear free space cache v2\n");
12901                 ret = btrfs_clear_free_space_tree(info);
12902                 if (ret) {
12903                         error("failed to clear free space cache v2: %d", ret);
12904                         ret = 1;
12905                 } else {
12906                         printf("free space cache v2 cleared\n");
12907                 }
12908                 goto close_out;
12909         }
12910
12911         /*
12912          * repair mode will force us to commit transaction which
12913          * will make us fail to load log tree when mounting.
12914          */
12915         if (repair && btrfs_super_log_root(info->super_copy)) {
12916                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12917                 if (!ret) {
12918                         ret = 1;
12919                         err |= !!ret;
12920                         goto close_out;
12921                 }
12922                 ret = zero_log_tree(root);
12923                 err |= !!ret;
12924                 if (ret) {
12925                         error("failed to zero log tree: %d", ret);
12926                         goto close_out;
12927                 }
12928         }
12929
12930         uuid_unparse(info->super_copy->fsid, uuidbuf);
12931         if (qgroup_report) {
12932                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12933                        uuidbuf);
12934                 ret = qgroup_verify_all(info);
12935                 err |= !!ret;
12936                 if (ret == 0)
12937                         report_qgroups(1);
12938                 goto close_out;
12939         }
12940         if (subvolid) {
12941                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12942                        subvolid, argv[optind], uuidbuf);
12943                 ret = print_extent_state(info, subvolid);
12944                 err |= !!ret;
12945                 goto close_out;
12946         }
12947         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12948
12949         if (!extent_buffer_uptodate(info->tree_root->node) ||
12950             !extent_buffer_uptodate(info->dev_root->node) ||
12951             !extent_buffer_uptodate(info->chunk_root->node)) {
12952                 error("critical roots corrupted, unable to check the filesystem");
12953                 err |= !!ret;
12954                 ret = -EIO;
12955                 goto close_out;
12956         }
12957
12958         if (init_extent_tree || init_csum_tree) {
12959                 struct btrfs_trans_handle *trans;
12960
12961                 trans = btrfs_start_transaction(info->extent_root, 0);
12962                 if (IS_ERR(trans)) {
12963                         error("error starting transaction");
12964                         ret = PTR_ERR(trans);
12965                         err |= !!ret;
12966                         goto close_out;
12967                 }
12968
12969                 if (init_extent_tree) {
12970                         printf("Creating a new extent tree\n");
12971                         ret = reinit_extent_tree(trans, info);
12972                         err |= !!ret;
12973                         if (ret)
12974                                 goto close_out;
12975                 }
12976
12977                 if (init_csum_tree) {
12978                         printf("Reinitialize checksum tree\n");
12979                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12980                         if (ret) {
12981                                 error("checksum tree initialization failed: %d",
12982                                                 ret);
12983                                 ret = -EIO;
12984                                 err |= !!ret;
12985                                 goto close_out;
12986                         }
12987
12988                         ret = fill_csum_tree(trans, info->csum_root,
12989                                              init_extent_tree);
12990                         err |= !!ret;
12991                         if (ret) {
12992                                 error("checksum tree refilling failed: %d", ret);
12993                                 return -EIO;
12994                         }
12995                 }
12996                 /*
12997                  * Ok now we commit and run the normal fsck, which will add
12998                  * extent entries for all of the items it finds.
12999                  */
13000                 ret = btrfs_commit_transaction(trans, info->extent_root);
13001                 err |= !!ret;
13002                 if (ret)
13003                         goto close_out;
13004         }
13005         if (!extent_buffer_uptodate(info->extent_root->node)) {
13006                 error("critical: extent_root, unable to check the filesystem");
13007                 ret = -EIO;
13008                 err |= !!ret;
13009                 goto close_out;
13010         }
13011         if (!extent_buffer_uptodate(info->csum_root->node)) {
13012                 error("critical: csum_root, unable to check the filesystem");
13013                 ret = -EIO;
13014                 err |= !!ret;
13015                 goto close_out;
13016         }
13017
13018         if (!ctx.progress_enabled)
13019                 fprintf(stderr, "checking extents\n");
13020         if (check_mode == CHECK_MODE_LOWMEM)
13021                 ret = check_chunks_and_extents_v2(root);
13022         else
13023                 ret = check_chunks_and_extents(root);
13024         err |= !!ret;
13025         if (ret)
13026                 error(
13027                 "errors found in extent allocation tree or chunk allocation");
13028
13029         ret = repair_root_items(info);
13030         err |= !!ret;
13031         if (ret < 0) {
13032                 error("failed to repair root items: %s", strerror(-ret));
13033                 goto close_out;
13034         }
13035         if (repair) {
13036                 fprintf(stderr, "Fixed %d roots.\n", ret);
13037                 ret = 0;
13038         } else if (ret > 0) {
13039                 fprintf(stderr,
13040                        "Found %d roots with an outdated root item.\n",
13041                        ret);
13042                 fprintf(stderr,
13043                         "Please run a filesystem check with the option --repair to fix them.\n");
13044                 ret = 1;
13045                 err |= !!ret;
13046                 goto close_out;
13047         }
13048
13049         if (!ctx.progress_enabled) {
13050                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13051                         fprintf(stderr, "checking free space tree\n");
13052                 else
13053                         fprintf(stderr, "checking free space cache\n");
13054         }
13055         ret = check_space_cache(root);
13056         err |= !!ret;
13057         if (ret) {
13058                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13059                         error("errors found in free space tree");
13060                 else
13061                         error("errors found in free space cache");
13062                 goto out;
13063         }
13064
13065         /*
13066          * We used to have to have these hole extents in between our real
13067          * extents so if we don't have this flag set we need to make sure there
13068          * are no gaps in the file extents for inodes, otherwise we can just
13069          * ignore it when this happens.
13070          */
13071         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13072         if (!ctx.progress_enabled)
13073                 fprintf(stderr, "checking fs roots\n");
13074         if (check_mode == CHECK_MODE_LOWMEM)
13075                 ret = check_fs_roots_v2(root->fs_info);
13076         else
13077                 ret = check_fs_roots(root, &root_cache);
13078         err |= !!ret;
13079         if (ret) {
13080                 error("errors found in fs roots");
13081                 goto out;
13082         }
13083
13084         fprintf(stderr, "checking csums\n");
13085         ret = check_csums(root);
13086         err |= !!ret;
13087         if (ret) {
13088                 error("errors found in csum tree");
13089                 goto out;
13090         }
13091
13092         fprintf(stderr, "checking root refs\n");
13093         /* For low memory mode, check_fs_roots_v2 handles root refs */
13094         if (check_mode != CHECK_MODE_LOWMEM) {
13095                 ret = check_root_refs(root, &root_cache);
13096                 err |= !!ret;
13097                 if (ret) {
13098                         error("errors found in root refs");
13099                         goto out;
13100                 }
13101         }
13102
13103         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13104                 struct extent_buffer *eb;
13105
13106                 eb = list_first_entry(&root->fs_info->recow_ebs,
13107                                       struct extent_buffer, recow);
13108                 list_del_init(&eb->recow);
13109                 ret = recow_extent_buffer(root, eb);
13110                 err |= !!ret;
13111                 if (ret) {
13112                         error("fails to fix transid errors");
13113                         break;
13114                 }
13115         }
13116
13117         while (!list_empty(&delete_items)) {
13118                 struct bad_item *bad;
13119
13120                 bad = list_first_entry(&delete_items, struct bad_item, list);
13121                 list_del_init(&bad->list);
13122                 if (repair) {
13123                         ret = delete_bad_item(root, bad);
13124                         err |= !!ret;
13125                 }
13126                 free(bad);
13127         }
13128
13129         if (info->quota_enabled) {
13130                 fprintf(stderr, "checking quota groups\n");
13131                 ret = qgroup_verify_all(info);
13132                 err |= !!ret;
13133                 if (ret) {
13134                         error("failed to check quota groups");
13135                         goto out;
13136                 }
13137                 report_qgroups(0);
13138                 ret = repair_qgroups(info, &qgroups_repaired);
13139                 err |= !!ret;
13140                 if (err) {
13141                         error("failed to repair quota groups");
13142                         goto out;
13143                 }
13144                 ret = 0;
13145         }
13146
13147         if (!list_empty(&root->fs_info->recow_ebs)) {
13148                 error("transid errors in file system");
13149                 ret = 1;
13150                 err |= !!ret;
13151         }
13152 out:
13153         if (found_old_backref) { /*
13154                  * there was a disk format change when mixed
13155                  * backref was in testing tree. The old format
13156                  * existed about one week.
13157                  */
13158                 printf("\n * Found old mixed backref format. "
13159                        "The old format is not supported! *"
13160                        "\n * Please mount the FS in readonly mode, "
13161                        "backup data and re-format the FS. *\n\n");
13162                 err |= 1;
13163         }
13164         printf("found %llu bytes used, ",
13165                (unsigned long long)bytes_used);
13166         if (err)
13167                 printf("error(s) found\n");
13168         else
13169                 printf("no error found\n");
13170         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13171         printf("total tree bytes: %llu\n",
13172                (unsigned long long)total_btree_bytes);
13173         printf("total fs tree bytes: %llu\n",
13174                (unsigned long long)total_fs_tree_bytes);
13175         printf("total extent tree bytes: %llu\n",
13176                (unsigned long long)total_extent_tree_bytes);
13177         printf("btree space waste bytes: %llu\n",
13178                (unsigned long long)btree_space_waste);
13179         printf("file data blocks allocated: %llu\n referenced %llu\n",
13180                 (unsigned long long)data_bytes_allocated,
13181                 (unsigned long long)data_bytes_referenced);
13182
13183         free_qgroup_counts();
13184         free_root_recs_tree(&root_cache);
13185 close_out:
13186         close_ctree(root);
13187 err_out:
13188         if (ctx.progress_enabled)
13189                 task_deinit(ctx.info);
13190
13191         return err;
13192 }