btrfs-progs: check: make max_size consistent with nr
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 /*
1872  * Returns >0  Found error, not fatal, should continue
1873  * Returns <0  Fatal error, must exit the whole check
1874  * Returns 0   No errors found
1875  */
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877                                struct node_refs *nrefs, int *level, int ext_ref)
1878 {
1879         struct extent_buffer *cur = path->nodes[0];
1880         struct btrfs_key key;
1881         u64 cur_bytenr;
1882         u32 nritems;
1883         u64 first_ino = 0;
1884         int root_level = btrfs_header_level(root->node);
1885         int i;
1886         int ret = 0; /* Final return value */
1887         int err = 0; /* Positive error bitmap */
1888
1889         cur_bytenr = cur->start;
1890
1891         /* skip to first inode item or the first inode number change */
1892         nritems = btrfs_header_nritems(cur);
1893         for (i = 0; i < nritems; i++) {
1894                 btrfs_item_key_to_cpu(cur, &key, i);
1895                 if (i == 0)
1896                         first_ino = key.objectid;
1897                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898                     (first_ino && first_ino != key.objectid))
1899                         break;
1900         }
1901         if (i == nritems) {
1902                 path->slots[0] = nritems;
1903                 return 0;
1904         }
1905         path->slots[0] = i;
1906
1907 again:
1908         err |= check_inode_item(root, path, ext_ref);
1909
1910         if (err & LAST_ITEM)
1911                 goto out;
1912
1913         /* still have inode items in thie leaf */
1914         if (cur->start == cur_bytenr)
1915                 goto again;
1916
1917         /*
1918          * we have switched to another leaf, above nodes may
1919          * have changed, here walk down the path, if a node
1920          * or leaf is shared, check whether we can skip this
1921          * node or leaf.
1922          */
1923         for (i = root_level; i >= 0; i--) {
1924                 if (path->nodes[i]->start == nrefs->bytenr[i])
1925                         continue;
1926
1927                 ret = update_nodes_refs(root,
1928                                 path->nodes[i]->start,
1929                                 nrefs, i);
1930                 if (ret)
1931                         goto out;
1932
1933                 if (!nrefs->need_check[i]) {
1934                         *level += 1;
1935                         break;
1936                 }
1937         }
1938
1939         for (i = 0; i < *level; i++) {
1940                 free_extent_buffer(path->nodes[i]);
1941                 path->nodes[i] = NULL;
1942         }
1943 out:
1944         err &= ~LAST_ITEM;
1945         if (err && !ret)
1946                 ret = err;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         free_extent_buffer(next);
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 /*
2217  * Returns >0  Found error, should continue
2218  * Returns <0  Fatal error, must exit the whole check
2219  * Returns 0   No errors found
2220  */
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222                              int *level, struct node_refs *nrefs, int ext_ref)
2223 {
2224         enum btrfs_tree_block_status status;
2225         u64 bytenr;
2226         u64 ptr_gen;
2227         struct extent_buffer *next;
2228         struct extent_buffer *cur;
2229         u32 blocksize;
2230         int ret;
2231
2232         WARN_ON(*level < 0);
2233         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2234
2235         ret = update_nodes_refs(root, path->nodes[*level]->start,
2236                                 nrefs, *level);
2237         if (ret < 0)
2238                 return ret;
2239
2240         while (*level >= 0) {
2241                 WARN_ON(*level < 0);
2242                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243                 cur = path->nodes[*level];
2244
2245                 if (btrfs_header_level(cur) != *level)
2246                         WARN_ON(1);
2247
2248                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249                         break;
2250                 /* Don't forgot to check leaf/node validation */
2251                 if (*level == 0) {
2252                         ret = btrfs_check_leaf(root, NULL, cur);
2253                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2254                                 ret = -EIO;
2255                                 break;
2256                         }
2257                         ret = process_one_leaf_v2(root, path, nrefs,
2258                                                   level, ext_ref);
2259                         break;
2260                 } else {
2261                         ret = btrfs_check_node(root, NULL, cur);
2262                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2263                                 ret = -EIO;
2264                                 break;
2265                         }
2266                 }
2267                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269                 blocksize = root->nodesize;
2270
2271                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2272                 if (ret)
2273                         break;
2274                 if (!nrefs->need_check[*level - 1]) {
2275                         path->slots[*level]++;
2276                         continue;
2277                 }
2278
2279                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root, bytenr, blocksize,
2284                                                ptr_gen);
2285                         if (!extent_buffer_uptodate(next)) {
2286                                 struct btrfs_key node_key;
2287
2288                                 btrfs_node_key_to_cpu(path->nodes[*level],
2289                                                       &node_key,
2290                                                       path->slots[*level]);
2291                                 btrfs_add_corrupt_extent_record(root->fs_info,
2292                                                 &node_key,
2293                                                 path->nodes[*level]->start,
2294                                                 root->nodesize, *level);
2295                                 ret = -EIO;
2296                                 break;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret < 0) 
2302                         break;
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         ret = -EIO;
2311                         break;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319         return ret;
2320 }
2321
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323                         struct walk_control *wc, int *level)
2324 {
2325         int i;
2326         struct extent_buffer *leaf;
2327
2328         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329                 leaf = path->nodes[i];
2330                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2331                         path->slots[i]++;
2332                         *level = i;
2333                         return 0;
2334                 } else {
2335                         free_extent_buffer(path->nodes[*level]);
2336                         path->nodes[*level] = NULL;
2337                         BUG_ON(*level > wc->active_node);
2338                         if (*level == wc->active_node)
2339                                 leave_shared_node(root, wc, *level);
2340                         *level = i + 1;
2341                 }
2342         }
2343         return 1;
2344 }
2345
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2347                            int *level)
2348 {
2349         int i;
2350         struct extent_buffer *leaf;
2351
2352         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353                 leaf = path->nodes[i];
2354                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355                         path->slots[i]++;
2356                         *level = i;
2357                         return 0;
2358                 } else {
2359                         free_extent_buffer(path->nodes[*level]);
2360                         path->nodes[*level] = NULL;
2361                         *level = i + 1;
2362                 }
2363         }
2364         return 1;
2365 }
2366
2367 static int check_root_dir(struct inode_record *rec)
2368 {
2369         struct inode_backref *backref;
2370         int ret = -1;
2371
2372         if (!rec->found_inode_item || rec->errors)
2373                 goto out;
2374         if (rec->nlink != 1 || rec->found_link != 0)
2375                 goto out;
2376         if (list_empty(&rec->backrefs))
2377                 goto out;
2378         backref = to_inode_backref(rec->backrefs.next);
2379         if (!backref->found_inode_ref)
2380                 goto out;
2381         if (backref->index != 0 || backref->namelen != 2 ||
2382             memcmp(backref->name, "..", 2))
2383                 goto out;
2384         if (backref->found_dir_index || backref->found_dir_item)
2385                 goto out;
2386         ret = 0;
2387 out:
2388         return ret;
2389 }
2390
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392                               struct btrfs_root *root, struct btrfs_path *path,
2393                               struct inode_record *rec)
2394 {
2395         struct btrfs_inode_item *ei;
2396         struct btrfs_key key;
2397         int ret;
2398
2399         key.objectid = rec->ino;
2400         key.type = BTRFS_INODE_ITEM_KEY;
2401         key.offset = (u64)-1;
2402
2403         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2404         if (ret < 0)
2405                 goto out;
2406         if (ret) {
2407                 if (!path->slots[0]) {
2408                         ret = -ENOENT;
2409                         goto out;
2410                 }
2411                 path->slots[0]--;
2412                 ret = 0;
2413         }
2414         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415         if (key.objectid != rec->ino) {
2416                 ret = -ENOENT;
2417                 goto out;
2418         }
2419
2420         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421                             struct btrfs_inode_item);
2422         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423         btrfs_mark_buffer_dirty(path->nodes[0]);
2424         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426                root->root_key.objectid);
2427 out:
2428         btrfs_release_path(path);
2429         return ret;
2430 }
2431
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433                                     struct btrfs_root *root,
2434                                     struct btrfs_path *path,
2435                                     struct inode_record *rec)
2436 {
2437         int ret;
2438
2439         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440         btrfs_release_path(path);
2441         if (!ret)
2442                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2443         return ret;
2444 }
2445
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447                                struct btrfs_root *root,
2448                                struct btrfs_path *path,
2449                                struct inode_record *rec)
2450 {
2451         struct btrfs_inode_item *ei;
2452         struct btrfs_key key;
2453         int ret = 0;
2454
2455         key.objectid = rec->ino;
2456         key.type = BTRFS_INODE_ITEM_KEY;
2457         key.offset = 0;
2458
2459         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2460         if (ret) {
2461                 if (ret > 0)
2462                         ret = -ENOENT;
2463                 goto out;
2464         }
2465
2466         /* Since ret == 0, no need to check anything */
2467         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468                             struct btrfs_inode_item);
2469         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470         btrfs_mark_buffer_dirty(path->nodes[0]);
2471         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472         printf("reset nbytes for ino %llu root %llu\n",
2473                rec->ino, root->root_key.objectid);
2474 out:
2475         btrfs_release_path(path);
2476         return ret;
2477 }
2478
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480                                  struct cache_tree *inode_cache,
2481                                  struct inode_record *rec,
2482                                  struct inode_backref *backref)
2483 {
2484         struct btrfs_path path;
2485         struct btrfs_trans_handle *trans;
2486         struct btrfs_dir_item *dir_item;
2487         struct extent_buffer *leaf;
2488         struct btrfs_key key;
2489         struct btrfs_disk_key disk_key;
2490         struct inode_record *dir_rec;
2491         unsigned long name_ptr;
2492         u32 data_size = sizeof(*dir_item) + backref->namelen;
2493         int ret;
2494
2495         trans = btrfs_start_transaction(root, 1);
2496         if (IS_ERR(trans))
2497                 return PTR_ERR(trans);
2498
2499         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500                 (unsigned long long)rec->ino);
2501
2502         btrfs_init_path(&path);
2503         key.objectid = backref->dir;
2504         key.type = BTRFS_DIR_INDEX_KEY;
2505         key.offset = backref->index;
2506         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2507         BUG_ON(ret);
2508
2509         leaf = path.nodes[0];
2510         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2511
2512         disk_key.objectid = cpu_to_le64(rec->ino);
2513         disk_key.type = BTRFS_INODE_ITEM_KEY;
2514         disk_key.offset = 0;
2515
2516         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518         btrfs_set_dir_data_len(leaf, dir_item, 0);
2519         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520         name_ptr = (unsigned long)(dir_item + 1);
2521         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522         btrfs_mark_buffer_dirty(leaf);
2523         btrfs_release_path(&path);
2524         btrfs_commit_transaction(trans, root);
2525
2526         backref->found_dir_index = 1;
2527         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528         BUG_ON(IS_ERR(dir_rec));
2529         if (!dir_rec)
2530                 return 0;
2531         dir_rec->found_size += backref->namelen;
2532         if (dir_rec->found_size == dir_rec->isize &&
2533             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535         if (dir_rec->found_size != dir_rec->isize)
2536                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2537
2538         return 0;
2539 }
2540
2541 static int delete_dir_index(struct btrfs_root *root,
2542                             struct inode_backref *backref)
2543 {
2544         struct btrfs_trans_handle *trans;
2545         struct btrfs_dir_item *di;
2546         struct btrfs_path path;
2547         int ret = 0;
2548
2549         trans = btrfs_start_transaction(root, 1);
2550         if (IS_ERR(trans))
2551                 return PTR_ERR(trans);
2552
2553         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554                 (unsigned long long)backref->dir,
2555                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556                 (unsigned long long)root->objectid);
2557
2558         btrfs_init_path(&path);
2559         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560                                     backref->name, backref->namelen,
2561                                     backref->index, -1);
2562         if (IS_ERR(di)) {
2563                 ret = PTR_ERR(di);
2564                 btrfs_release_path(&path);
2565                 btrfs_commit_transaction(trans, root);
2566                 if (ret == -ENOENT)
2567                         return 0;
2568                 return ret;
2569         }
2570
2571         if (!di)
2572                 ret = btrfs_del_item(trans, root, &path);
2573         else
2574                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2575         BUG_ON(ret);
2576         btrfs_release_path(&path);
2577         btrfs_commit_transaction(trans, root);
2578         return ret;
2579 }
2580
2581 static int create_inode_item(struct btrfs_root *root,
2582                              struct inode_record *rec,
2583                              int root_dir)
2584 {
2585         struct btrfs_trans_handle *trans;
2586         struct btrfs_inode_item inode_item;
2587         time_t now = time(NULL);
2588         int ret;
2589
2590         trans = btrfs_start_transaction(root, 1);
2591         if (IS_ERR(trans)) {
2592                 ret = PTR_ERR(trans);
2593                 return ret;
2594         }
2595
2596         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597                 "be incomplete, please check permissions and content after "
2598                 "the fsck completes.\n", (unsigned long long)root->objectid,
2599                 (unsigned long long)rec->ino);
2600
2601         memset(&inode_item, 0, sizeof(inode_item));
2602         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2603         if (root_dir)
2604                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2605         else
2606                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608         if (rec->found_dir_item) {
2609                 if (rec->found_file_extent)
2610                         fprintf(stderr, "root %llu inode %llu has both a dir "
2611                                 "item and extents, unsure if it is a dir or a "
2612                                 "regular file so setting it as a directory\n",
2613                                 (unsigned long long)root->objectid,
2614                                 (unsigned long long)rec->ino);
2615                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617         } else if (!rec->found_dir_item) {
2618                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2620         }
2621         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2629
2630         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2631         BUG_ON(ret);
2632         btrfs_commit_transaction(trans, root);
2633         return 0;
2634 }
2635
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637                                  struct inode_record *rec,
2638                                  struct cache_tree *inode_cache,
2639                                  int delete)
2640 {
2641         struct inode_backref *tmp, *backref;
2642         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2643         int ret = 0;
2644         int repaired = 0;
2645
2646         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647                 if (!delete && rec->ino == root_dirid) {
2648                         if (!rec->found_inode_item) {
2649                                 ret = create_inode_item(root, rec, 1);
2650                                 if (ret)
2651                                         break;
2652                                 repaired++;
2653                         }
2654                 }
2655
2656                 /* Index 0 for root dir's are special, don't mess with it */
2657                 if (rec->ino == root_dirid && backref->index == 0)
2658                         continue;
2659
2660                 if (delete &&
2661                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2662                      (backref->found_dir_index && backref->found_inode_ref &&
2663                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664                         ret = delete_dir_index(root, backref);
2665                         if (ret)
2666                                 break;
2667                         repaired++;
2668                         list_del(&backref->list);
2669                         free(backref);
2670                 }
2671
2672                 if (!delete && !backref->found_dir_index &&
2673                     backref->found_dir_item && backref->found_inode_ref) {
2674                         ret = add_missing_dir_index(root, inode_cache, rec,
2675                                                     backref);
2676                         if (ret)
2677                                 break;
2678                         repaired++;
2679                         if (backref->found_dir_item &&
2680                             backref->found_dir_index &&
2681                             backref->found_dir_index) {
2682                                 if (!backref->errors &&
2683                                     backref->found_inode_ref) {
2684                                         list_del(&backref->list);
2685                                         free(backref);
2686                                 }
2687                         }
2688                 }
2689
2690                 if (!delete && (!backref->found_dir_index &&
2691                                 !backref->found_dir_item &&
2692                                 backref->found_inode_ref)) {
2693                         struct btrfs_trans_handle *trans;
2694                         struct btrfs_key location;
2695
2696                         ret = check_dir_conflict(root, backref->name,
2697                                                  backref->namelen,
2698                                                  backref->dir,
2699                                                  backref->index);
2700                         if (ret) {
2701                                 /*
2702                                  * let nlink fixing routine to handle it,
2703                                  * which can do it better.
2704                                  */
2705                                 ret = 0;
2706                                 break;
2707                         }
2708                         location.objectid = rec->ino;
2709                         location.type = BTRFS_INODE_ITEM_KEY;
2710                         location.offset = 0;
2711
2712                         trans = btrfs_start_transaction(root, 1);
2713                         if (IS_ERR(trans)) {
2714                                 ret = PTR_ERR(trans);
2715                                 break;
2716                         }
2717                         fprintf(stderr, "adding missing dir index/item pair "
2718                                 "for inode %llu\n",
2719                                 (unsigned long long)rec->ino);
2720                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2721                                                     backref->namelen,
2722                                                     backref->dir, &location,
2723                                                     imode_to_type(rec->imode),
2724                                                     backref->index);
2725                         BUG_ON(ret);
2726                         btrfs_commit_transaction(trans, root);
2727                         repaired++;
2728                 }
2729
2730                 if (!delete && (backref->found_inode_ref &&
2731                                 backref->found_dir_index &&
2732                                 backref->found_dir_item &&
2733                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734                                 !rec->found_inode_item)) {
2735                         ret = create_inode_item(root, rec, 0);
2736                         if (ret)
2737                                 break;
2738                         repaired++;
2739                 }
2740
2741         }
2742         return ret ? ret : repaired;
2743 }
2744
2745 /*
2746  * To determine the file type for nlink/inode_item repair
2747  *
2748  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749  * Return -ENOENT if file type is not found.
2750  */
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2752 {
2753         struct inode_backref *backref;
2754
2755         /* For inode item recovered case */
2756         if (rec->found_inode_item) {
2757                 *type = imode_to_type(rec->imode);
2758                 return 0;
2759         }
2760
2761         list_for_each_entry(backref, &rec->backrefs, list) {
2762                 if (backref->found_dir_index || backref->found_dir_item) {
2763                         *type = backref->filetype;
2764                         return 0;
2765                 }
2766         }
2767         return -ENOENT;
2768 }
2769
2770 /*
2771  * To determine the file name for nlink repair
2772  *
2773  * Return 0 if file name is found, set name and namelen.
2774  * Return -ENOENT if file name is not found.
2775  */
2776 static int find_file_name(struct inode_record *rec,
2777                           char *name, int *namelen)
2778 {
2779         struct inode_backref *backref;
2780
2781         list_for_each_entry(backref, &rec->backrefs, list) {
2782                 if (backref->found_dir_index || backref->found_dir_item ||
2783                     backref->found_inode_ref) {
2784                         memcpy(name, backref->name, backref->namelen);
2785                         *namelen = backref->namelen;
2786                         return 0;
2787                 }
2788         }
2789         return -ENOENT;
2790 }
2791
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794                        struct btrfs_root *root,
2795                        struct btrfs_path *path,
2796                        struct inode_record *rec)
2797 {
2798         struct inode_backref *backref;
2799         struct inode_backref *tmp;
2800         struct btrfs_key key;
2801         struct btrfs_inode_item *inode_item;
2802         int ret = 0;
2803
2804         /* We don't believe this either, reset it and iterate backref */
2805         rec->found_link = 0;
2806
2807         /* Remove all backref including the valid ones */
2808         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810                                    backref->index, backref->name,
2811                                    backref->namelen, 0);
2812                 if (ret < 0)
2813                         goto out;
2814
2815                 /* remove invalid backref, so it won't be added back */
2816                 if (!(backref->found_dir_index &&
2817                       backref->found_dir_item &&
2818                       backref->found_inode_ref)) {
2819                         list_del(&backref->list);
2820                         free(backref);
2821                 } else {
2822                         rec->found_link++;
2823                 }
2824         }
2825
2826         /* Set nlink to 0 */
2827         key.objectid = rec->ino;
2828         key.type = BTRFS_INODE_ITEM_KEY;
2829         key.offset = 0;
2830         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2831         if (ret < 0)
2832                 goto out;
2833         if (ret > 0) {
2834                 ret = -ENOENT;
2835                 goto out;
2836         }
2837         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838                                     struct btrfs_inode_item);
2839         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840         btrfs_mark_buffer_dirty(path->nodes[0]);
2841         btrfs_release_path(path);
2842
2843         /*
2844          * Add back valid inode_ref/dir_item/dir_index,
2845          * add_link() will handle the nlink inc, so new nlink must be correct
2846          */
2847         list_for_each_entry(backref, &rec->backrefs, list) {
2848                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849                                      backref->name, backref->namelen,
2850                                      backref->filetype, &backref->index, 1);
2851                 if (ret < 0)
2852                         goto out;
2853         }
2854 out:
2855         btrfs_release_path(path);
2856         return ret;
2857 }
2858
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860                                 struct btrfs_root *root,
2861                                 struct btrfs_path *path,
2862                                 u64 *highest_ino)
2863 {
2864         struct btrfs_key key, found_key;
2865         int ret;
2866
2867         btrfs_init_path(path);
2868         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2869         key.offset = -1;
2870         key.type = BTRFS_INODE_ITEM_KEY;
2871         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2872         if (ret == 1) {
2873                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874                                 path->slots[0] - 1);
2875                 *highest_ino = found_key.objectid;
2876                 ret = 0;
2877         }
2878         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2879                 ret = -EOVERFLOW;
2880         btrfs_release_path(path);
2881         return ret;
2882 }
2883
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885                                struct btrfs_root *root,
2886                                struct btrfs_path *path,
2887                                struct inode_record *rec)
2888 {
2889         char *dir_name = "lost+found";
2890         char namebuf[BTRFS_NAME_LEN] = {0};
2891         u64 lost_found_ino;
2892         u32 mode = 0700;
2893         u8 type = 0;
2894         int namelen = 0;
2895         int name_recovered = 0;
2896         int type_recovered = 0;
2897         int ret = 0;
2898
2899         /*
2900          * Get file name and type first before these invalid inode ref
2901          * are deleted by remove_all_invalid_backref()
2902          */
2903         name_recovered = !find_file_name(rec, namebuf, &namelen);
2904         type_recovered = !find_file_type(rec, &type);
2905
2906         if (!name_recovered) {
2907                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908                        rec->ino, rec->ino);
2909                 namelen = count_digits(rec->ino);
2910                 sprintf(namebuf, "%llu", rec->ino);
2911                 name_recovered = 1;
2912         }
2913         if (!type_recovered) {
2914                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2915                        rec->ino);
2916                 type = BTRFS_FT_REG_FILE;
2917                 type_recovered = 1;
2918         }
2919
2920         ret = reset_nlink(trans, root, path, rec);
2921         if (ret < 0) {
2922                 fprintf(stderr,
2923                         "Failed to reset nlink for inode %llu: %s\n",
2924                         rec->ino, strerror(-ret));
2925                 goto out;
2926         }
2927
2928         if (rec->found_link == 0) {
2929                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2930                 if (ret < 0)
2931                         goto out;
2932                 lost_found_ino++;
2933                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2935                                   mode);
2936                 if (ret < 0) {
2937                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938                                 dir_name, strerror(-ret));
2939                         goto out;
2940                 }
2941                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942                                      namebuf, namelen, type, NULL, 1);
2943                 /*
2944                  * Add ".INO" suffix several times to handle case where
2945                  * "FILENAME.INO" is already taken by another file.
2946                  */
2947                 while (ret == -EEXIST) {
2948                         /*
2949                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2950                          */
2951                         if (namelen + count_digits(rec->ino) + 1 >
2952                             BTRFS_NAME_LEN) {
2953                                 ret = -EFBIG;
2954                                 goto out;
2955                         }
2956                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2957                                  ".%llu", rec->ino);
2958                         namelen += count_digits(rec->ino) + 1;
2959                         ret = btrfs_add_link(trans, root, rec->ino,
2960                                              lost_found_ino, namebuf,
2961                                              namelen, type, NULL, 1);
2962                 }
2963                 if (ret < 0) {
2964                         fprintf(stderr,
2965                                 "Failed to link the inode %llu to %s dir: %s\n",
2966                                 rec->ino, dir_name, strerror(-ret));
2967                         goto out;
2968                 }
2969                 /*
2970                  * Just increase the found_link, don't actually add the
2971                  * backref. This will make things easier and this inode
2972                  * record will be freed after the repair is done.
2973                  * So fsck will not report problem about this inode.
2974                  */
2975                 rec->found_link++;
2976                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977                        namelen, namebuf, dir_name);
2978         }
2979         printf("Fixed the nlink of inode %llu\n", rec->ino);
2980 out:
2981         /*
2982          * Clear the flag anyway, or we will loop forever for the same inode
2983          * as it will not be removed from the bad inode list and the dead loop
2984          * happens.
2985          */
2986         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987         btrfs_release_path(path);
2988         return ret;
2989 }
2990
2991 /*
2992  * Check if there is any normal(reg or prealloc) file extent for given
2993  * ino.
2994  * This is used to determine the file type when neither its dir_index/item or
2995  * inode_item exists.
2996  *
2997  * This will *NOT* report error, if any error happens, just consider it does
2998  * not have any normal file extent.
2999  */
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3001 {
3002         struct btrfs_path path;
3003         struct btrfs_key key;
3004         struct btrfs_key found_key;
3005         struct btrfs_file_extent_item *fi;
3006         u8 type;
3007         int ret = 0;
3008
3009         btrfs_init_path(&path);
3010         key.objectid = ino;
3011         key.type = BTRFS_EXTENT_DATA_KEY;
3012         key.offset = 0;
3013
3014         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3015         if (ret < 0) {
3016                 ret = 0;
3017                 goto out;
3018         }
3019         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020                 ret = btrfs_next_leaf(root, &path);
3021                 if (ret) {
3022                         ret = 0;
3023                         goto out;
3024                 }
3025         }
3026         while (1) {
3027                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3028                                       path.slots[0]);
3029                 if (found_key.objectid != ino ||
3030                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3031                         break;
3032                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033                                     struct btrfs_file_extent_item);
3034                 type = btrfs_file_extent_type(path.nodes[0], fi);
3035                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3036                         ret = 1;
3037                         goto out;
3038                 }
3039         }
3040 out:
3041         btrfs_release_path(&path);
3042         return ret;
3043 }
3044
3045 static u32 btrfs_type_to_imode(u8 type)
3046 {
3047         static u32 imode_by_btrfs_type[] = {
3048                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3049                 [BTRFS_FT_DIR]          = S_IFDIR,
3050                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3051                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3052                 [BTRFS_FT_FIFO]         = S_IFIFO,
3053                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3054                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3055         };
3056
3057         return imode_by_btrfs_type[(type)];
3058 }
3059
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061                                 struct btrfs_root *root,
3062                                 struct btrfs_path *path,
3063                                 struct inode_record *rec)
3064 {
3065         u8 filetype;
3066         u32 mode = 0700;
3067         int type_recovered = 0;
3068         int ret = 0;
3069
3070         printf("Trying to rebuild inode:%llu\n", rec->ino);
3071
3072         type_recovered = !find_file_type(rec, &filetype);
3073
3074         /*
3075          * Try to determine inode type if type not found.
3076          *
3077          * For found regular file extent, it must be FILE.
3078          * For found dir_item/index, it must be DIR.
3079          *
3080          * For undetermined one, use FILE as fallback.
3081          *
3082          * TODO:
3083          * 1. If found backref(inode_index/item is already handled) to it,
3084          *    it must be DIR.
3085          *    Need new inode-inode ref structure to allow search for that.
3086          */
3087         if (!type_recovered) {
3088                 if (rec->found_file_extent &&
3089                     find_normal_file_extent(root, rec->ino)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else if (rec->found_dir_item) {
3093                         type_recovered = 1;
3094                         filetype = BTRFS_FT_DIR;
3095                 } else if (!list_empty(&rec->orphan_extents)) {
3096                         type_recovered = 1;
3097                         filetype = BTRFS_FT_REG_FILE;
3098                 } else{
3099                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3100                                rec->ino);
3101                         type_recovered = 1;
3102                         filetype = BTRFS_FT_REG_FILE;
3103                 }
3104         }
3105
3106         ret = btrfs_new_inode(trans, root, rec->ino,
3107                               mode | btrfs_type_to_imode(filetype));
3108         if (ret < 0)
3109                 goto out;
3110
3111         /*
3112          * Here inode rebuild is done, we only rebuild the inode item,
3113          * don't repair the nlink(like move to lost+found).
3114          * That is the job of nlink repair.
3115          *
3116          * We just fill the record and return
3117          */
3118         rec->found_dir_item = 1;
3119         rec->imode = mode | btrfs_type_to_imode(filetype);
3120         rec->nlink = 0;
3121         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122         /* Ensure the inode_nlinks repair function will be called */
3123         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3124 out:
3125         return ret;
3126 }
3127
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129                                       struct btrfs_root *root,
3130                                       struct btrfs_path *path,
3131                                       struct inode_record *rec)
3132 {
3133         struct orphan_data_extent *orphan;
3134         struct orphan_data_extent *tmp;
3135         int ret = 0;
3136
3137         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3138                 /*
3139                  * Check for conflicting file extents
3140                  *
3141                  * Here we don't know whether the extents is compressed or not,
3142                  * so we can only assume it not compressed nor data offset,
3143                  * and use its disk_len as extent length.
3144                  */
3145                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146                                        orphan->offset, orphan->disk_len, 0);
3147                 btrfs_release_path(path);
3148                 if (ret < 0)
3149                         goto out;
3150                 if (!ret) {
3151                         fprintf(stderr,
3152                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153                                 orphan->disk_bytenr, orphan->disk_len);
3154                         ret = btrfs_free_extent(trans,
3155                                         root->fs_info->extent_root,
3156                                         orphan->disk_bytenr, orphan->disk_len,
3157                                         0, root->objectid, orphan->objectid,
3158                                         orphan->offset);
3159                         if (ret < 0)
3160                                 goto out;
3161                 }
3162                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163                                 orphan->offset, orphan->disk_bytenr,
3164                                 orphan->disk_len, orphan->disk_len);
3165                 if (ret < 0)
3166                         goto out;
3167
3168                 /* Update file size info */
3169                 rec->found_size += orphan->disk_len;
3170                 if (rec->found_size == rec->nbytes)
3171                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3172
3173                 /* Update the file extent hole info too */
3174                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3175                                            orphan->disk_len);
3176                 if (ret < 0)
3177                         goto out;
3178                 if (RB_EMPTY_ROOT(&rec->holes))
3179                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3180
3181                 list_del(&orphan->list);
3182                 free(orphan);
3183         }
3184         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3185 out:
3186         return ret;
3187 }
3188
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190                                         struct btrfs_root *root,
3191                                         struct btrfs_path *path,
3192                                         struct inode_record *rec)
3193 {
3194         struct rb_node *node;
3195         struct file_extent_hole *hole;
3196         int found = 0;
3197         int ret = 0;
3198
3199         node = rb_first(&rec->holes);
3200
3201         while (node) {
3202                 found = 1;
3203                 hole = rb_entry(node, struct file_extent_hole, node);
3204                 ret = btrfs_punch_hole(trans, root, rec->ino,
3205                                        hole->start, hole->len);
3206                 if (ret < 0)
3207                         goto out;
3208                 ret = del_file_extent_hole(&rec->holes, hole->start,
3209                                            hole->len);
3210                 if (ret < 0)
3211                         goto out;
3212                 if (RB_EMPTY_ROOT(&rec->holes))
3213                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214                 node = rb_first(&rec->holes);
3215         }
3216         /* special case for a file losing all its file extent */
3217         if (!found) {
3218                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219                                        round_up(rec->isize, root->sectorsize));
3220                 if (ret < 0)
3221                         goto out;
3222         }
3223         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224                rec->ino, root->objectid);
3225 out:
3226         return ret;
3227 }
3228
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3230 {
3231         struct btrfs_trans_handle *trans;
3232         struct btrfs_path path;
3233         int ret = 0;
3234
3235         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236                              I_ERR_NO_ORPHAN_ITEM |
3237                              I_ERR_LINK_COUNT_WRONG |
3238                              I_ERR_NO_INODE_ITEM |
3239                              I_ERR_FILE_EXTENT_ORPHAN |
3240                              I_ERR_FILE_EXTENT_DISCOUNT|
3241                              I_ERR_FILE_NBYTES_WRONG)))
3242                 return rec->errors;
3243
3244         /*
3245          * For nlink repair, it may create a dir and add link, so
3246          * 2 for parent(256)'s dir_index and dir_item
3247          * 2 for lost+found dir's inode_item and inode_ref
3248          * 1 for the new inode_ref of the file
3249          * 2 for lost+found dir's dir_index and dir_item for the file
3250          */
3251         trans = btrfs_start_transaction(root, 7);
3252         if (IS_ERR(trans))
3253                 return PTR_ERR(trans);
3254
3255         btrfs_init_path(&path);
3256         if (rec->errors & I_ERR_NO_INODE_ITEM)
3257                 ret = repair_inode_no_item(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263                 ret = repair_inode_isize(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267                 ret = repair_inode_nlinks(trans, root, &path, rec);
3268         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269                 ret = repair_inode_nbytes(trans, root, &path, rec);
3270         btrfs_commit_transaction(trans, root);
3271         btrfs_release_path(&path);
3272         return ret;
3273 }
3274
3275 static int check_inode_recs(struct btrfs_root *root,
3276                             struct cache_tree *inode_cache)
3277 {
3278         struct cache_extent *cache;
3279         struct ptr_node *node;
3280         struct inode_record *rec;
3281         struct inode_backref *backref;
3282         int stage = 0;
3283         int ret = 0;
3284         int err = 0;
3285         u64 error = 0;
3286         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3287
3288         if (btrfs_root_refs(&root->root_item) == 0) {
3289                 if (!cache_tree_empty(inode_cache))
3290                         fprintf(stderr, "warning line %d\n", __LINE__);
3291                 return 0;
3292         }
3293
3294         /*
3295          * We need to repair backrefs first because we could change some of the
3296          * errors in the inode recs.
3297          *
3298          * We also need to go through and delete invalid backrefs first and then
3299          * add the correct ones second.  We do this because we may get EEXIST
3300          * when adding back the correct index because we hadn't yet deleted the
3301          * invalid index.
3302          *
3303          * For example, if we were missing a dir index then the directories
3304          * isize would be wrong, so if we fixed the isize to what we thought it
3305          * would be and then fixed the backref we'd still have a invalid fs, so
3306          * we need to add back the dir index and then check to see if the isize
3307          * is still wrong.
3308          */
3309         while (stage < 3) {
3310                 stage++;
3311                 if (stage == 3 && !err)
3312                         break;
3313
3314                 cache = search_cache_extent(inode_cache, 0);
3315                 while (repair && cache) {
3316                         node = container_of(cache, struct ptr_node, cache);
3317                         rec = node->data;
3318                         cache = next_cache_extent(cache);
3319
3320                         /* Need to free everything up and rescan */
3321                         if (stage == 3) {
3322                                 remove_cache_extent(inode_cache, &node->cache);
3323                                 free(node);
3324                                 free_inode_rec(rec);
3325                                 continue;
3326                         }
3327
3328                         if (list_empty(&rec->backrefs))
3329                                 continue;
3330
3331                         ret = repair_inode_backrefs(root, rec, inode_cache,
3332                                                     stage == 1);
3333                         if (ret < 0) {
3334                                 err = ret;
3335                                 stage = 2;
3336                                 break;
3337                         } if (ret > 0) {
3338                                 err = -EAGAIN;
3339                         }
3340                 }
3341         }
3342         if (err)
3343                 return err;
3344
3345         rec = get_inode_rec(inode_cache, root_dirid, 0);
3346         BUG_ON(IS_ERR(rec));
3347         if (rec) {
3348                 ret = check_root_dir(rec);
3349                 if (ret) {
3350                         fprintf(stderr, "root %llu root dir %llu error\n",
3351                                 (unsigned long long)root->root_key.objectid,
3352                                 (unsigned long long)root_dirid);
3353                         print_inode_error(root, rec);
3354                         error++;
3355                 }
3356         } else {
3357                 if (repair) {
3358                         struct btrfs_trans_handle *trans;
3359
3360                         trans = btrfs_start_transaction(root, 1);
3361                         if (IS_ERR(trans)) {
3362                                 err = PTR_ERR(trans);
3363                                 return err;
3364                         }
3365
3366                         fprintf(stderr,
3367                                 "root %llu missing its root dir, recreating\n",
3368                                 (unsigned long long)root->objectid);
3369
3370                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3371                         BUG_ON(ret);
3372
3373                         btrfs_commit_transaction(trans, root);
3374                         return -EAGAIN;
3375                 }
3376
3377                 fprintf(stderr, "root %llu root dir %llu not found\n",
3378                         (unsigned long long)root->root_key.objectid,
3379                         (unsigned long long)root_dirid);
3380         }
3381
3382         while (1) {
3383                 cache = search_cache_extent(inode_cache, 0);
3384                 if (!cache)
3385                         break;
3386                 node = container_of(cache, struct ptr_node, cache);
3387                 rec = node->data;
3388                 remove_cache_extent(inode_cache, &node->cache);
3389                 free(node);
3390                 if (rec->ino == root_dirid ||
3391                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392                         free_inode_rec(rec);
3393                         continue;
3394                 }
3395
3396                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397                         ret = check_orphan_item(root, rec->ino);
3398                         if (ret == 0)
3399                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400                         if (can_free_inode_rec(rec)) {
3401                                 free_inode_rec(rec);
3402                                 continue;
3403                         }
3404                 }
3405
3406                 if (!rec->found_inode_item)
3407                         rec->errors |= I_ERR_NO_INODE_ITEM;
3408                 if (rec->found_link != rec->nlink)
3409                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3410                 if (repair) {
3411                         ret = try_repair_inode(root, rec);
3412                         if (ret == 0 && can_free_inode_rec(rec)) {
3413                                 free_inode_rec(rec);
3414                                 continue;
3415                         }
3416                         ret = 0;
3417                 }
3418
3419                 if (!(repair && ret == 0))
3420                         error++;
3421                 print_inode_error(root, rec);
3422                 list_for_each_entry(backref, &rec->backrefs, list) {
3423                         if (!backref->found_dir_item)
3424                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425                         if (!backref->found_dir_index)
3426                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427                         if (!backref->found_inode_ref)
3428                                 backref->errors |= REF_ERR_NO_INODE_REF;
3429                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430                                 " namelen %u name %s filetype %d errors %x",
3431                                 (unsigned long long)backref->dir,
3432                                 (unsigned long long)backref->index,
3433                                 backref->namelen, backref->name,
3434                                 backref->filetype, backref->errors);
3435                         print_ref_error(backref->errors);
3436                 }
3437                 free_inode_rec(rec);
3438         }
3439         return (error > 0) ? -1 : 0;
3440 }
3441
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3443                                         u64 objectid)
3444 {
3445         struct cache_extent *cache;
3446         struct root_record *rec = NULL;
3447         int ret;
3448
3449         cache = lookup_cache_extent(root_cache, objectid, 1);
3450         if (cache) {
3451                 rec = container_of(cache, struct root_record, cache);
3452         } else {
3453                 rec = calloc(1, sizeof(*rec));
3454                 if (!rec)
3455                         return ERR_PTR(-ENOMEM);
3456                 rec->objectid = objectid;
3457                 INIT_LIST_HEAD(&rec->backrefs);
3458                 rec->cache.start = objectid;
3459                 rec->cache.size = 1;
3460
3461                 ret = insert_cache_extent(root_cache, &rec->cache);
3462                 if (ret)
3463                         return ERR_PTR(-EEXIST);
3464         }
3465         return rec;
3466 }
3467
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469                                              u64 ref_root, u64 dir, u64 index,
3470                                              const char *name, int namelen)
3471 {
3472         struct root_backref *backref;
3473
3474         list_for_each_entry(backref, &rec->backrefs, list) {
3475                 if (backref->ref_root != ref_root || backref->dir != dir ||
3476                     backref->namelen != namelen)
3477                         continue;
3478                 if (memcmp(name, backref->name, namelen))
3479                         continue;
3480                 return backref;
3481         }
3482
3483         backref = calloc(1, sizeof(*backref) + namelen + 1);
3484         if (!backref)
3485                 return NULL;
3486         backref->ref_root = ref_root;
3487         backref->dir = dir;
3488         backref->index = index;
3489         backref->namelen = namelen;
3490         memcpy(backref->name, name, namelen);
3491         backref->name[namelen] = '\0';
3492         list_add_tail(&backref->list, &rec->backrefs);
3493         return backref;
3494 }
3495
3496 static void free_root_record(struct cache_extent *cache)
3497 {
3498         struct root_record *rec;
3499         struct root_backref *backref;
3500
3501         rec = container_of(cache, struct root_record, cache);
3502         while (!list_empty(&rec->backrefs)) {
3503                 backref = to_root_backref(rec->backrefs.next);
3504                 list_del(&backref->list);
3505                 free(backref);
3506         }
3507
3508         free(rec);
3509 }
3510
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3512
3513 static int add_root_backref(struct cache_tree *root_cache,
3514                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3515                             const char *name, int namelen,
3516                             int item_type, int errors)
3517 {
3518         struct root_record *rec;
3519         struct root_backref *backref;
3520
3521         rec = get_root_rec(root_cache, root_id);
3522         BUG_ON(IS_ERR(rec));
3523         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3524         BUG_ON(!backref);
3525
3526         backref->errors |= errors;
3527
3528         if (item_type != BTRFS_DIR_ITEM_KEY) {
3529                 if (backref->found_dir_index || backref->found_back_ref ||
3530                     backref->found_forward_ref) {
3531                         if (backref->index != index)
3532                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3533                 } else {
3534                         backref->index = index;
3535                 }
3536         }
3537
3538         if (item_type == BTRFS_DIR_ITEM_KEY) {
3539                 if (backref->found_forward_ref)
3540                         rec->found_ref++;
3541                 backref->found_dir_item = 1;
3542         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543                 backref->found_dir_index = 1;
3544         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545                 if (backref->found_forward_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3547                 else if (backref->found_dir_item)
3548                         rec->found_ref++;
3549                 backref->found_forward_ref = 1;
3550         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551                 if (backref->found_back_ref)
3552                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553                 backref->found_back_ref = 1;
3554         } else {
3555                 BUG_ON(1);
3556         }
3557
3558         if (backref->found_forward_ref && backref->found_dir_item)
3559                 backref->reachable = 1;
3560         return 0;
3561 }
3562
3563 static int merge_root_recs(struct btrfs_root *root,
3564                            struct cache_tree *src_cache,
3565                            struct cache_tree *dst_cache)
3566 {
3567         struct cache_extent *cache;
3568         struct ptr_node *node;
3569         struct inode_record *rec;
3570         struct inode_backref *backref;
3571         int ret = 0;
3572
3573         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574                 free_inode_recs_tree(src_cache);
3575                 return 0;
3576         }
3577
3578         while (1) {
3579                 cache = search_cache_extent(src_cache, 0);
3580                 if (!cache)
3581                         break;
3582                 node = container_of(cache, struct ptr_node, cache);
3583                 rec = node->data;
3584                 remove_cache_extent(src_cache, &node->cache);
3585                 free(node);
3586
3587                 ret = is_child_root(root, root->objectid, rec->ino);
3588                 if (ret < 0)
3589                         break;
3590                 else if (ret == 0)
3591                         goto skip;
3592
3593                 list_for_each_entry(backref, &rec->backrefs, list) {
3594                         BUG_ON(backref->found_inode_ref);
3595                         if (backref->found_dir_item)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3600                                         backref->errors);
3601                         if (backref->found_dir_index)
3602                                 add_root_backref(dst_cache, rec->ino,
3603                                         root->root_key.objectid, backref->dir,
3604                                         backref->index, backref->name,
3605                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3606                                         backref->errors);
3607                 }
3608 skip:
3609                 free_inode_rec(rec);
3610         }
3611         if (ret < 0)
3612                 return ret;
3613         return 0;
3614 }
3615
3616 static int check_root_refs(struct btrfs_root *root,
3617                            struct cache_tree *root_cache)
3618 {
3619         struct root_record *rec;
3620         struct root_record *ref_root;
3621         struct root_backref *backref;
3622         struct cache_extent *cache;
3623         int loop = 1;
3624         int ret;
3625         int error;
3626         int errors = 0;
3627
3628         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629         BUG_ON(IS_ERR(rec));
3630         rec->found_ref = 1;
3631
3632         /* fixme: this can not detect circular references */
3633         while (loop) {
3634                 loop = 0;
3635                 cache = search_cache_extent(root_cache, 0);
3636                 while (1) {
3637                         if (!cache)
3638                                 break;
3639                         rec = container_of(cache, struct root_record, cache);
3640                         cache = next_cache_extent(cache);
3641
3642                         if (rec->found_ref == 0)
3643                                 continue;
3644
3645                         list_for_each_entry(backref, &rec->backrefs, list) {
3646                                 if (!backref->reachable)
3647                                         continue;
3648
3649                                 ref_root = get_root_rec(root_cache,
3650                                                         backref->ref_root);
3651                                 BUG_ON(IS_ERR(ref_root));
3652                                 if (ref_root->found_ref > 0)
3653                                         continue;
3654
3655                                 backref->reachable = 0;
3656                                 rec->found_ref--;
3657                                 if (rec->found_ref == 0)
3658                                         loop = 1;
3659                         }
3660                 }
3661         }
3662
3663         cache = search_cache_extent(root_cache, 0);
3664         while (1) {
3665                 if (!cache)
3666                         break;
3667                 rec = container_of(cache, struct root_record, cache);
3668                 cache = next_cache_extent(cache);
3669
3670                 if (rec->found_ref == 0 &&
3671                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673                         ret = check_orphan_item(root->fs_info->tree_root,
3674                                                 rec->objectid);
3675                         if (ret == 0)
3676                                 continue;
3677
3678                         /*
3679                          * If we don't have a root item then we likely just have
3680                          * a dir item in a snapshot for this root but no actual
3681                          * ref key or anything so it's meaningless.
3682                          */
3683                         if (!rec->found_root_item)
3684                                 continue;
3685                         errors++;
3686                         fprintf(stderr, "fs tree %llu not referenced\n",
3687                                 (unsigned long long)rec->objectid);
3688                 }
3689
3690                 error = 0;
3691                 if (rec->found_ref > 0 && !rec->found_root_item)
3692                         error = 1;
3693                 list_for_each_entry(backref, &rec->backrefs, list) {
3694                         if (!backref->found_dir_item)
3695                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696                         if (!backref->found_dir_index)
3697                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698                         if (!backref->found_back_ref)
3699                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700                         if (!backref->found_forward_ref)
3701                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3702                         if (backref->reachable && backref->errors)
3703                                 error = 1;
3704                 }
3705                 if (!error)
3706                         continue;
3707
3708                 errors++;
3709                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710                         (unsigned long long)rec->objectid, rec->found_ref,
3711                          rec->found_root_item ? "" : "not found");
3712
3713                 list_for_each_entry(backref, &rec->backrefs, list) {
3714                         if (!backref->reachable)
3715                                 continue;
3716                         if (!backref->errors && rec->found_root_item)
3717                                 continue;
3718                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719                                 " index %llu namelen %u name %s errors %x\n",
3720                                 (unsigned long long)backref->ref_root,
3721                                 (unsigned long long)backref->dir,
3722                                 (unsigned long long)backref->index,
3723                                 backref->namelen, backref->name,
3724                                 backref->errors);
3725                         print_ref_error(backref->errors);
3726                 }
3727         }
3728         return errors > 0 ? 1 : 0;
3729 }
3730
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732                             struct btrfs_key *key,
3733                             struct cache_tree *root_cache)
3734 {
3735         u64 dirid;
3736         u64 index;
3737         u32 len;
3738         u32 name_len;
3739         struct btrfs_root_ref *ref;
3740         char namebuf[BTRFS_NAME_LEN];
3741         int error;
3742
3743         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3744
3745         dirid = btrfs_root_ref_dirid(eb, ref);
3746         index = btrfs_root_ref_sequence(eb, ref);
3747         name_len = btrfs_root_ref_name_len(eb, ref);
3748
3749         if (name_len <= BTRFS_NAME_LEN) {
3750                 len = name_len;
3751                 error = 0;
3752         } else {
3753                 len = BTRFS_NAME_LEN;
3754                 error = REF_ERR_NAME_TOO_LONG;
3755         }
3756         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3757
3758         if (key->type == BTRFS_ROOT_REF_KEY) {
3759                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760                                  index, namebuf, len, key->type, error);
3761         } else {
3762                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763                                  index, namebuf, len, key->type, error);
3764         }
3765         return 0;
3766 }
3767
3768 static void free_corrupt_block(struct cache_extent *cache)
3769 {
3770         struct btrfs_corrupt_block *corrupt;
3771
3772         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3773         free(corrupt);
3774 }
3775
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3777
3778 /*
3779  * Repair the btree of the given root.
3780  *
3781  * The fix is to remove the node key in corrupt_blocks cache_tree.
3782  * and rebalance the tree.
3783  * After the fix, the btree should be writeable.
3784  */
3785 static int repair_btree(struct btrfs_root *root,
3786                         struct cache_tree *corrupt_blocks)
3787 {
3788         struct btrfs_trans_handle *trans;
3789         struct btrfs_path path;
3790         struct btrfs_corrupt_block *corrupt;
3791         struct cache_extent *cache;
3792         struct btrfs_key key;
3793         u64 offset;
3794         int level;
3795         int ret = 0;
3796
3797         if (cache_tree_empty(corrupt_blocks))
3798                 return 0;
3799
3800         trans = btrfs_start_transaction(root, 1);
3801         if (IS_ERR(trans)) {
3802                 ret = PTR_ERR(trans);
3803                 fprintf(stderr, "Error starting transaction: %s\n",
3804                         strerror(-ret));
3805                 return ret;
3806         }
3807         btrfs_init_path(&path);
3808         cache = first_cache_extent(corrupt_blocks);
3809         while (cache) {
3810                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3811                                        cache);
3812                 level = corrupt->level;
3813                 path.lowest_level = level;
3814                 key.objectid = corrupt->key.objectid;
3815                 key.type = corrupt->key.type;
3816                 key.offset = corrupt->key.offset;
3817
3818                 /*
3819                  * Here we don't want to do any tree balance, since it may
3820                  * cause a balance with corrupted brother leaf/node,
3821                  * so ins_len set to 0 here.
3822                  * Balance will be done after all corrupt node/leaf is deleted.
3823                  */
3824                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3825                 if (ret < 0)
3826                         goto out;
3827                 offset = btrfs_node_blockptr(path.nodes[level],
3828                                              path.slots[level]);
3829
3830                 /* Remove the ptr */
3831                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3832                 if (ret < 0)
3833                         goto out;
3834                 /*
3835                  * Remove the corresponding extent
3836                  * return value is not concerned.
3837                  */
3838                 btrfs_release_path(&path);
3839                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840                                         0, root->root_key.objectid,
3841                                         level - 1, 0);
3842                 cache = next_cache_extent(cache);
3843         }
3844
3845         /* Balance the btree using btrfs_search_slot() */
3846         cache = first_cache_extent(corrupt_blocks);
3847         while (cache) {
3848                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3849                                        cache);
3850                 memcpy(&key, &corrupt->key, sizeof(key));
3851                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3852                 if (ret < 0)
3853                         goto out;
3854                 /* return will always >0 since it won't find the item */
3855                 ret = 0;
3856                 btrfs_release_path(&path);
3857                 cache = next_cache_extent(cache);
3858         }
3859 out:
3860         btrfs_commit_transaction(trans, root);
3861         btrfs_release_path(&path);
3862         return ret;
3863 }
3864
3865 static int check_fs_root(struct btrfs_root *root,
3866                          struct cache_tree *root_cache,
3867                          struct walk_control *wc)
3868 {
3869         int ret = 0;
3870         int err = 0;
3871         int wret;
3872         int level;
3873         struct btrfs_path path;
3874         struct shared_node root_node;
3875         struct root_record *rec;
3876         struct btrfs_root_item *root_item = &root->root_item;
3877         struct cache_tree corrupt_blocks;
3878         struct orphan_data_extent *orphan;
3879         struct orphan_data_extent *tmp;
3880         enum btrfs_tree_block_status status;
3881         struct node_refs nrefs;
3882
3883         /*
3884          * Reuse the corrupt_block cache tree to record corrupted tree block
3885          *
3886          * Unlike the usage in extent tree check, here we do it in a per
3887          * fs/subvol tree base.
3888          */
3889         cache_tree_init(&corrupt_blocks);
3890         root->fs_info->corrupt_blocks = &corrupt_blocks;
3891
3892         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893                 rec = get_root_rec(root_cache, root->root_key.objectid);
3894                 BUG_ON(IS_ERR(rec));
3895                 if (btrfs_root_refs(root_item) > 0)
3896                         rec->found_root_item = 1;
3897         }
3898
3899         btrfs_init_path(&path);
3900         memset(&root_node, 0, sizeof(root_node));
3901         cache_tree_init(&root_node.root_cache);
3902         cache_tree_init(&root_node.inode_cache);
3903         memset(&nrefs, 0, sizeof(nrefs));
3904
3905         /* Move the orphan extent record to corresponding inode_record */
3906         list_for_each_entry_safe(orphan, tmp,
3907                                  &root->orphan_data_extents, list) {
3908                 struct inode_record *inode;
3909
3910                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3911                                       1);
3912                 BUG_ON(IS_ERR(inode));
3913                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914                 list_move(&orphan->list, &inode->orphan_extents);
3915         }
3916
3917         level = btrfs_header_level(root->node);
3918         memset(wc->nodes, 0, sizeof(wc->nodes));
3919         wc->nodes[level] = &root_node;
3920         wc->active_node = level;
3921         wc->root_level = level;
3922
3923         /* We may not have checked the root block, lets do that now */
3924         if (btrfs_is_leaf(root->node))
3925                 status = btrfs_check_leaf(root, NULL, root->node);
3926         else
3927                 status = btrfs_check_node(root, NULL, root->node);
3928         if (status != BTRFS_TREE_BLOCK_CLEAN)
3929                 return -EIO;
3930
3931         if (btrfs_root_refs(root_item) > 0 ||
3932             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933                 path.nodes[level] = root->node;
3934                 extent_buffer_get(root->node);
3935                 path.slots[level] = 0;
3936         } else {
3937                 struct btrfs_key key;
3938                 struct btrfs_disk_key found_key;
3939
3940                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941                 level = root_item->drop_level;
3942                 path.lowest_level = level;
3943                 if (level > btrfs_header_level(root->node) ||
3944                     level >= BTRFS_MAX_LEVEL) {
3945                         error("ignoring invalid drop level: %u", level);
3946                         goto skip_walking;
3947                 }
3948                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3949                 if (wret < 0)
3950                         goto skip_walking;
3951                 btrfs_node_key(path.nodes[level], &found_key,
3952                                 path.slots[level]);
3953                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954                                         sizeof(found_key)));
3955         }
3956
3957         while (1) {
3958                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963
3964                 wret = walk_up_tree(root, &path, wc, &level);
3965                 if (wret < 0)
3966                         ret = wret;
3967                 if (wret != 0)
3968                         break;
3969         }
3970 skip_walking:
3971         btrfs_release_path(&path);
3972
3973         if (!cache_tree_empty(&corrupt_blocks)) {
3974                 struct cache_extent *cache;
3975                 struct btrfs_corrupt_block *corrupt;
3976
3977                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978                        root->root_key.objectid);
3979                 cache = first_cache_extent(&corrupt_blocks);
3980                 while (cache) {
3981                         corrupt = container_of(cache,
3982                                                struct btrfs_corrupt_block,
3983                                                cache);
3984                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985                                cache->start, corrupt->level,
3986                                corrupt->key.objectid, corrupt->key.type,
3987                                corrupt->key.offset);
3988                         cache = next_cache_extent(cache);
3989                 }
3990                 if (repair) {
3991                         printf("Try to repair the btree for root %llu\n",
3992                                root->root_key.objectid);
3993                         ret = repair_btree(root, &corrupt_blocks);
3994                         if (ret < 0)
3995                                 fprintf(stderr, "Failed to repair btree: %s\n",
3996                                         strerror(-ret));
3997                         if (!ret)
3998                                 printf("Btree for root %llu is fixed\n",
3999                                        root->root_key.objectid);
4000                 }
4001         }
4002
4003         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4004         if (err < 0)
4005                 ret = err;
4006
4007         if (root_node.current) {
4008                 root_node.current->checked = 1;
4009                 maybe_free_inode_rec(&root_node.inode_cache,
4010                                 root_node.current);
4011         }
4012
4013         err = check_inode_recs(root, &root_node.inode_cache);
4014         if (!ret)
4015                 ret = err;
4016
4017         free_corrupt_blocks_tree(&corrupt_blocks);
4018         root->fs_info->corrupt_blocks = NULL;
4019         free_orphan_data_extents(&root->orphan_data_extents);
4020         return ret;
4021 }
4022
4023 static int fs_root_objectid(u64 objectid)
4024 {
4025         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4027                 return 1;
4028         return is_fstree(objectid);
4029 }
4030
4031 static int check_fs_roots(struct btrfs_root *root,
4032                           struct cache_tree *root_cache)
4033 {
4034         struct btrfs_path path;
4035         struct btrfs_key key;
4036         struct walk_control wc;
4037         struct extent_buffer *leaf, *tree_node;
4038         struct btrfs_root *tmp_root;
4039         struct btrfs_root *tree_root = root->fs_info->tree_root;
4040         int ret;
4041         int err = 0;
4042
4043         if (ctx.progress_enabled) {
4044                 ctx.tp = TASK_FS_ROOTS;
4045                 task_start(ctx.info);
4046         }
4047
4048         /*
4049          * Just in case we made any changes to the extent tree that weren't
4050          * reflected into the free space cache yet.
4051          */
4052         if (repair)
4053                 reset_cached_block_groups(root->fs_info);
4054         memset(&wc, 0, sizeof(wc));
4055         cache_tree_init(&wc.shared);
4056         btrfs_init_path(&path);
4057
4058 again:
4059         key.offset = 0;
4060         key.objectid = 0;
4061         key.type = BTRFS_ROOT_ITEM_KEY;
4062         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4063         if (ret < 0) {
4064                 err = 1;
4065                 goto out;
4066         }
4067         tree_node = tree_root->node;
4068         while (1) {
4069                 if (tree_node != tree_root->node) {
4070                         free_root_recs_tree(root_cache);
4071                         btrfs_release_path(&path);
4072                         goto again;
4073                 }
4074                 leaf = path.nodes[0];
4075                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076                         ret = btrfs_next_leaf(tree_root, &path);
4077                         if (ret) {
4078                                 if (ret < 0)
4079                                         err = 1;
4080                                 break;
4081                         }
4082                         leaf = path.nodes[0];
4083                 }
4084                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086                     fs_root_objectid(key.objectid)) {
4087                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088                                 tmp_root = btrfs_read_fs_root_no_cache(
4089                                                 root->fs_info, &key);
4090                         } else {
4091                                 key.offset = (u64)-1;
4092                                 tmp_root = btrfs_read_fs_root(
4093                                                 root->fs_info, &key);
4094                         }
4095                         if (IS_ERR(tmp_root)) {
4096                                 err = 1;
4097                                 goto next;
4098                         }
4099                         ret = check_fs_root(tmp_root, root_cache, &wc);
4100                         if (ret == -EAGAIN) {
4101                                 free_root_recs_tree(root_cache);
4102                                 btrfs_release_path(&path);
4103                                 goto again;
4104                         }
4105                         if (ret)
4106                                 err = 1;
4107                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108                                 btrfs_free_fs_root(tmp_root);
4109                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4111                         process_root_ref(leaf, path.slots[0], &key,
4112                                          root_cache);
4113                 }
4114 next:
4115                 path.slots[0]++;
4116         }
4117 out:
4118         btrfs_release_path(&path);
4119         if (err)
4120                 free_extent_cache_tree(&wc.shared);
4121         if (!cache_tree_empty(&wc.shared))
4122                 fprintf(stderr, "warning line %d\n", __LINE__);
4123
4124         task_stop(ctx.info);
4125
4126         return err;
4127 }
4128
4129 /*
4130  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131  * INODE_REF/INODE_EXTREF match.
4132  *
4133  * @root:       the root of the fs/file tree
4134  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4135  * @key:        the key of the DIR_ITEM/DIR_INDEX
4136  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4137  *              distinguish root_dir between normal dir/file
4138  * @name:       the name in the INODE_REF/INODE_EXTREF
4139  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4140  * @mode:       the st_mode of INODE_ITEM
4141  *
4142  * Return 0 if no error occurred.
4143  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4145  * dir/file.
4146  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147  * not match for normal dir/file.
4148  */
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150                          struct btrfs_key *key, u64 index, char *name,
4151                          u32 namelen, u32 mode)
4152 {
4153         struct btrfs_path path;
4154         struct extent_buffer *node;
4155         struct btrfs_dir_item *di;
4156         struct btrfs_key location;
4157         char namebuf[BTRFS_NAME_LEN] = {0};
4158         u32 total;
4159         u32 cur = 0;
4160         u32 len;
4161         u32 name_len;
4162         u32 data_len;
4163         u8 filetype;
4164         int slot;
4165         int ret;
4166
4167         btrfs_init_path(&path);
4168         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4169         if (ret < 0) {
4170                 ret = DIR_ITEM_MISSING;
4171                 goto out;
4172         }
4173
4174         /* Process root dir and goto out*/
4175         if (index == 0) {
4176                 if (ret == 0) {
4177                         ret = ROOT_DIR_ERROR;
4178                         error(
4179                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4180                                 root->objectid,
4181                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4182                                         "REF" : "EXTREF",
4183                                 ref_key->objectid, ref_key->offset,
4184                                 key->type == BTRFS_DIR_ITEM_KEY ?
4185                                         "DIR_ITEM" : "DIR_INDEX");
4186                 } else {
4187                         ret = 0;
4188                 }
4189
4190                 goto out;
4191         }
4192
4193         /* Process normal file/dir */
4194         if (ret > 0) {
4195                 ret = DIR_ITEM_MISSING;
4196                 error(
4197                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4198                         root->objectid,
4199                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200                         ref_key->objectid, ref_key->offset,
4201                         key->type == BTRFS_DIR_ITEM_KEY ?
4202                                 "DIR_ITEM" : "DIR_INDEX",
4203                         key->objectid, key->offset, namelen, name,
4204                         imode_to_type(mode));
4205                 goto out;
4206         }
4207
4208         /* Check whether inode_id/filetype/name match */
4209         node = path.nodes[0];
4210         slot = path.slots[0];
4211         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212         total = btrfs_item_size_nr(node, slot);
4213         while (cur < total) {
4214                 ret = DIR_ITEM_MISMATCH;
4215                 name_len = btrfs_dir_name_len(node, di);
4216                 data_len = btrfs_dir_data_len(node, di);
4217
4218                 btrfs_dir_item_key_to_cpu(node, di, &location);
4219                 if (location.objectid != ref_key->objectid ||
4220                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4221                     location.offset != 0)
4222                         goto next;
4223
4224                 filetype = btrfs_dir_type(node, di);
4225                 if (imode_to_type(mode) != filetype)
4226                         goto next;
4227
4228                 if (name_len <= BTRFS_NAME_LEN) {
4229                         len = name_len;
4230                 } else {
4231                         len = BTRFS_NAME_LEN;
4232                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4233                         root->objectid,
4234                         key->type == BTRFS_DIR_ITEM_KEY ?
4235                         "DIR_ITEM" : "DIR_INDEX",
4236                         key->objectid, key->offset, name_len);
4237                 }
4238                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239                 if (len != namelen || strncmp(namebuf, name, len))
4240                         goto next;
4241
4242                 ret = 0;
4243                 goto out;
4244 next:
4245                 len = sizeof(*di) + name_len + data_len;
4246                 di = (struct btrfs_dir_item *)((char *)di + len);
4247                 cur += len;
4248         }
4249         if (ret == DIR_ITEM_MISMATCH)
4250                 error(
4251                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4252                         root->objectid,
4253                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254                         ref_key->objectid, ref_key->offset,
4255                         key->type == BTRFS_DIR_ITEM_KEY ?
4256                                 "DIR_ITEM" : "DIR_INDEX",
4257                         key->objectid, key->offset, namelen, name,
4258                         imode_to_type(mode));
4259 out:
4260         btrfs_release_path(&path);
4261         return ret;
4262 }
4263
4264 /*
4265  * Traverse the given INODE_REF and call find_dir_item() to find related
4266  * DIR_ITEM/DIR_INDEX.
4267  *
4268  * @root:       the root of the fs/file tree
4269  * @ref_key:    the key of the INODE_REF
4270  * @refs:       the count of INODE_REF
4271  * @mode:       the st_mode of INODE_ITEM
4272  *
4273  * Return 0 if no error occurred.
4274  */
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276                            struct extent_buffer *node, int slot, u64 *refs,
4277                            int mode)
4278 {
4279         struct btrfs_key key;
4280         struct btrfs_inode_ref *ref;
4281         char namebuf[BTRFS_NAME_LEN] = {0};
4282         u32 total;
4283         u32 cur = 0;
4284         u32 len;
4285         u32 name_len;
4286         u64 index;
4287         int ret, err = 0;
4288
4289         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290         total = btrfs_item_size_nr(node, slot);
4291
4292 next:
4293         /* Update inode ref count */
4294         (*refs)++;
4295
4296         index = btrfs_inode_ref_index(node, ref);
4297         name_len = btrfs_inode_ref_name_len(node, ref);
4298         if (name_len <= BTRFS_NAME_LEN) {
4299                 len = name_len;
4300         } else {
4301                 len = BTRFS_NAME_LEN;
4302                 warning("root %llu INODE_REF[%llu %llu] name too long",
4303                         root->objectid, ref_key->objectid, ref_key->offset);
4304         }
4305
4306         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4307
4308         /* Check root dir ref name */
4309         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311                       root->objectid, ref_key->objectid, ref_key->offset,
4312                       namebuf);
4313                 err |= ROOT_DIR_ERROR;
4314         }
4315
4316         /* Find related DIR_INDEX */
4317         key.objectid = ref_key->offset;
4318         key.type = BTRFS_DIR_INDEX_KEY;
4319         key.offset = index;
4320         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4321         err |= ret;
4322
4323         /* Find related dir_item */
4324         key.objectid = ref_key->offset;
4325         key.type = BTRFS_DIR_ITEM_KEY;
4326         key.offset = btrfs_name_hash(namebuf, len);
4327         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4328         err |= ret;
4329
4330         len = sizeof(*ref) + name_len;
4331         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4332         cur += len;
4333         if (cur < total)
4334                 goto next;
4335
4336         return err;
4337 }
4338
4339 /*
4340  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341  * DIR_ITEM/DIR_INDEX.
4342  *
4343  * @root:       the root of the fs/file tree
4344  * @ref_key:    the key of the INODE_EXTREF
4345  * @refs:       the count of INODE_EXTREF
4346  * @mode:       the st_mode of INODE_ITEM
4347  *
4348  * Return 0 if no error occurred.
4349  */
4350 static int check_inode_extref(struct btrfs_root *root,
4351                               struct btrfs_key *ref_key,
4352                               struct extent_buffer *node, int slot, u64 *refs,
4353                               int mode)
4354 {
4355         struct btrfs_key key;
4356         struct btrfs_inode_extref *extref;
4357         char namebuf[BTRFS_NAME_LEN] = {0};
4358         u32 total;
4359         u32 cur = 0;
4360         u32 len;
4361         u32 name_len;
4362         u64 index;
4363         u64 parent;
4364         int ret;
4365         int err = 0;
4366
4367         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368         total = btrfs_item_size_nr(node, slot);
4369
4370 next:
4371         /* update inode ref count */
4372         (*refs)++;
4373         name_len = btrfs_inode_extref_name_len(node, extref);
4374         index = btrfs_inode_extref_index(node, extref);
4375         parent = btrfs_inode_extref_parent(node, extref);
4376         if (name_len <= BTRFS_NAME_LEN) {
4377                 len = name_len;
4378         } else {
4379                 len = BTRFS_NAME_LEN;
4380                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381                         root->objectid, ref_key->objectid, ref_key->offset);
4382         }
4383         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4384
4385         /* Check root dir ref name */
4386         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388                       root->objectid, ref_key->objectid, ref_key->offset,
4389                       namebuf);
4390                 err |= ROOT_DIR_ERROR;
4391         }
4392
4393         /* find related dir_index */
4394         key.objectid = parent;
4395         key.type = BTRFS_DIR_INDEX_KEY;
4396         key.offset = index;
4397         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4398         err |= ret;
4399
4400         /* find related dir_item */
4401         key.objectid = parent;
4402         key.type = BTRFS_DIR_ITEM_KEY;
4403         key.offset = btrfs_name_hash(namebuf, len);
4404         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4405         err |= ret;
4406
4407         len = sizeof(*extref) + name_len;
4408         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4409         cur += len;
4410
4411         if (cur < total)
4412                 goto next;
4413
4414         return err;
4415 }
4416
4417 /*
4418  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419  * DIR_ITEM/DIR_INDEX match.
4420  *
4421  * @root:       the root of the fs/file tree
4422  * @key:        the key of the INODE_REF/INODE_EXTREF
4423  * @name:       the name in the INODE_REF/INODE_EXTREF
4424  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4425  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4426  * to (u64)-1
4427  * @ext_ref:    the EXTENDED_IREF feature
4428  *
4429  * Return 0 if no error occurred.
4430  * Return >0 for error bitmap
4431  */
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433                           char *name, int namelen, u64 index,
4434                           unsigned int ext_ref)
4435 {
4436         struct btrfs_path path;
4437         struct btrfs_inode_ref *ref;
4438         struct btrfs_inode_extref *extref;
4439         struct extent_buffer *node;
4440         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4441         u32 total;
4442         u32 cur = 0;
4443         u32 len;
4444         u32 ref_namelen;
4445         u64 ref_index;
4446         u64 parent;
4447         u64 dir_id;
4448         int slot;
4449         int ret;
4450
4451         btrfs_init_path(&path);
4452         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4453         if (ret) {
4454                 ret = INODE_REF_MISSING;
4455                 goto extref;
4456         }
4457
4458         node = path.nodes[0];
4459         slot = path.slots[0];
4460
4461         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462         total = btrfs_item_size_nr(node, slot);
4463
4464         /* Iterate all entry of INODE_REF */
4465         while (cur < total) {
4466                 ret = INODE_REF_MISSING;
4467
4468                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469                 ref_index = btrfs_inode_ref_index(node, ref);
4470                 if (index != (u64)-1 && index != ref_index)
4471                         goto next_ref;
4472
4473                 if (ref_namelen <= BTRFS_NAME_LEN) {
4474                         len = ref_namelen;
4475                 } else {
4476                         len = BTRFS_NAME_LEN;
4477                         warning("root %llu INODE %s[%llu %llu] name too long",
4478                                 root->objectid,
4479                                 key->type == BTRFS_INODE_REF_KEY ?
4480                                         "REF" : "EXTREF",
4481                                 key->objectid, key->offset);
4482                 }
4483                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4484                                    len);
4485
4486                 if (len != namelen || strncmp(ref_namebuf, name, len))
4487                         goto next_ref;
4488
4489                 ret = 0;
4490                 goto out;
4491 next_ref:
4492                 len = sizeof(*ref) + ref_namelen;
4493                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4494                 cur += len;
4495         }
4496
4497 extref:
4498         /* Skip if not support EXTENDED_IREF feature */
4499         if (!ext_ref)
4500                 goto out;
4501
4502         btrfs_release_path(&path);
4503         btrfs_init_path(&path);
4504
4505         dir_id = key->offset;
4506         key->type = BTRFS_INODE_EXTREF_KEY;
4507         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4508
4509         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4510         if (ret) {
4511                 ret = INODE_REF_MISSING;
4512                 goto out;
4513         }
4514
4515         node = path.nodes[0];
4516         slot = path.slots[0];
4517
4518         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4519         cur = 0;
4520         total = btrfs_item_size_nr(node, slot);
4521
4522         /* Iterate all entry of INODE_EXTREF */
4523         while (cur < total) {
4524                 ret = INODE_REF_MISSING;
4525
4526                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527                 ref_index = btrfs_inode_extref_index(node, extref);
4528                 parent = btrfs_inode_extref_parent(node, extref);
4529                 if (index != (u64)-1 && index != ref_index)
4530                         goto next_extref;
4531
4532                 if (parent != dir_id)
4533                         goto next_extref;
4534
4535                 if (ref_namelen <= BTRFS_NAME_LEN) {
4536                         len = ref_namelen;
4537                 } else {
4538                         len = BTRFS_NAME_LEN;
4539                         warning("root %llu INODE %s[%llu %llu] name too long",
4540                                 root->objectid,
4541                                 key->type == BTRFS_INODE_REF_KEY ?
4542                                         "REF" : "EXTREF",
4543                                 key->objectid, key->offset);
4544                 }
4545                 read_extent_buffer(node, ref_namebuf,
4546                                    (unsigned long)(extref + 1), len);
4547
4548                 if (len != namelen || strncmp(ref_namebuf, name, len))
4549                         goto next_extref;
4550
4551                 ret = 0;
4552                 goto out;
4553
4554 next_extref:
4555                 len = sizeof(*extref) + ref_namelen;
4556                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4557                 cur += len;
4558
4559         }
4560 out:
4561         btrfs_release_path(&path);
4562         return ret;
4563 }
4564
4565 /*
4566  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4568  *
4569  * @root:       the root of the fs/file tree
4570  * @key:        the key of the INODE_REF/INODE_EXTREF
4571  * @size:       the st_size of the INODE_ITEM
4572  * @ext_ref:    the EXTENDED_IREF feature
4573  *
4574  * Return 0 if no error occurred.
4575  */
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577                           struct extent_buffer *node, int slot, u64 *size,
4578                           unsigned int ext_ref)
4579 {
4580         struct btrfs_dir_item *di;
4581         struct btrfs_inode_item *ii;
4582         struct btrfs_path path;
4583         struct btrfs_key location;
4584         char namebuf[BTRFS_NAME_LEN] = {0};
4585         u32 total;
4586         u32 cur = 0;
4587         u32 len;
4588         u32 name_len;
4589         u32 data_len;
4590         u8 filetype;
4591         u32 mode;
4592         u64 index;
4593         int ret;
4594         int err = 0;
4595
4596         /*
4597          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598          * ignore index check.
4599          */
4600         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4601
4602         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603         total = btrfs_item_size_nr(node, slot);
4604
4605         while (cur < total) {
4606                 data_len = btrfs_dir_data_len(node, di);
4607                 if (data_len)
4608                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610                               "DIR_ITEM" : "DIR_INDEX",
4611                               key->objectid, key->offset, data_len);
4612
4613                 name_len = btrfs_dir_name_len(node, di);
4614                 if (name_len <= BTRFS_NAME_LEN) {
4615                         len = name_len;
4616                 } else {
4617                         len = BTRFS_NAME_LEN;
4618                         warning("root %llu %s[%llu %llu] name too long",
4619                                 root->objectid,
4620                                 key->type == BTRFS_DIR_ITEM_KEY ?
4621                                 "DIR_ITEM" : "DIR_INDEX",
4622                                 key->objectid, key->offset);
4623                 }
4624                 (*size) += name_len;
4625
4626                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627                 filetype = btrfs_dir_type(node, di);
4628
4629                 btrfs_init_path(&path);
4630                 btrfs_dir_item_key_to_cpu(node, di, &location);
4631
4632                 /* Ignore related ROOT_ITEM check */
4633                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4634                         goto next;
4635
4636                 /* Check relative INODE_ITEM(existence/filetype) */
4637                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4638                 if (ret) {
4639                         err |= INODE_ITEM_MISSING;
4640                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643                               key->offset, location.objectid, name_len,
4644                               namebuf, filetype);
4645                         goto next;
4646                 }
4647
4648                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649                                     struct btrfs_inode_item);
4650                 mode = btrfs_inode_mode(path.nodes[0], ii);
4651
4652                 if (imode_to_type(mode) != filetype) {
4653                         err |= INODE_ITEM_MISMATCH;
4654                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657                               key->offset, name_len, namebuf, filetype);
4658                 }
4659
4660                 /* Check relative INODE_REF/INODE_EXTREF */
4661                 location.type = BTRFS_INODE_REF_KEY;
4662                 location.offset = key->objectid;
4663                 ret = find_inode_ref(root, &location, namebuf, len,
4664                                        index, ext_ref);
4665                 err |= ret;
4666                 if (ret & INODE_REF_MISSING)
4667                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670                               key->offset, name_len, namebuf, filetype);
4671
4672 next:
4673                 btrfs_release_path(&path);
4674                 len = sizeof(*di) + name_len + data_len;
4675                 di = (struct btrfs_dir_item *)((char *)di + len);
4676                 cur += len;
4677
4678                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680                               root->objectid, key->objectid, key->offset);
4681                         break;
4682                 }
4683         }
4684
4685         return err;
4686 }
4687
4688 /*
4689  * Check file extent datasum/hole, update the size of the file extents,
4690  * check and update the last offset of the file extent.
4691  *
4692  * @root:       the root of fs/file tree.
4693  * @fkey:       the key of the file extent.
4694  * @nodatasum:  INODE_NODATASUM feature.
4695  * @size:       the sum of all EXTENT_DATA items size for this inode.
4696  * @end:        the offset of the last extent.
4697  *
4698  * Return 0 if no error occurred.
4699  */
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701                              struct extent_buffer *node, int slot,
4702                              unsigned int nodatasum, u64 *size, u64 *end)
4703 {
4704         struct btrfs_file_extent_item *fi;
4705         u64 disk_bytenr;
4706         u64 disk_num_bytes;
4707         u64 extent_num_bytes;
4708         u64 extent_offset;
4709         u64 csum_found;         /* In byte size, sectorsize aligned */
4710         u64 search_start;       /* Logical range start we search for csum */
4711         u64 search_len;         /* Logical range len we search for csum */
4712         unsigned int extent_type;
4713         unsigned int is_hole;
4714         int compressed = 0;
4715         int ret;
4716         int err = 0;
4717
4718         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4719
4720         /* Check inline extent */
4721         extent_type = btrfs_file_extent_type(node, fi);
4722         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723                 struct btrfs_item *e = btrfs_item_nr(slot);
4724                 u32 item_inline_len;
4725
4726                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728                 compressed = btrfs_file_extent_compression(node, fi);
4729                 if (extent_num_bytes == 0) {
4730                         error(
4731                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732                                 root->objectid, fkey->objectid, fkey->offset);
4733                         err |= FILE_EXTENT_ERROR;
4734                 }
4735                 if (!compressed && extent_num_bytes != item_inline_len) {
4736                         error(
4737                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738                                 root->objectid, fkey->objectid, fkey->offset,
4739                                 extent_num_bytes, item_inline_len);
4740                         err |= FILE_EXTENT_ERROR;
4741                 }
4742                 *size += extent_num_bytes;
4743                 return err;
4744         }
4745
4746         /* Check extent type */
4747         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749                 err |= FILE_EXTENT_ERROR;
4750                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752                 return err;
4753         }
4754
4755         /* Check REG_EXTENT/PREALLOC_EXTENT */
4756         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759         extent_offset = btrfs_file_extent_offset(node, fi);
4760         compressed = btrfs_file_extent_compression(node, fi);
4761         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4762
4763         /*
4764          * Check EXTENT_DATA csum
4765          *
4766          * For plain (uncompressed) extent, we should only check the range
4767          * we're referring to, as it's possible that part of prealloc extent
4768          * has been written, and has csum:
4769          *
4770          * |<--- Original large preallocated extent A ---->|
4771          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4772          *      No csum                         Has csum
4773          *
4774          * For compressed extent, we should check the whole range.
4775          */
4776         if (!compressed) {
4777                 search_start = disk_bytenr + extent_offset;
4778                 search_len = extent_num_bytes;
4779         } else {
4780                 search_start = disk_bytenr;
4781                 search_len = disk_num_bytes;
4782         }
4783         ret = count_csum_range(root, search_start, search_len, &csum_found);
4784         if (csum_found > 0 && nodatasum) {
4785                 err |= ODD_CSUM_ITEM;
4786                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787                       root->objectid, fkey->objectid, fkey->offset);
4788         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789                    !is_hole && (ret < 0 || csum_found < search_len)) {
4790                 err |= CSUM_ITEM_MISSING;
4791                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792                       root->objectid, fkey->objectid, fkey->offset,
4793                       csum_found, search_len);
4794         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795                 err |= ODD_CSUM_ITEM;
4796                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4798         }
4799
4800         /* Check EXTENT_DATA hole */
4801         if (no_holes && is_hole) {
4802                 err |= FILE_EXTENT_ERROR;
4803                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804                       root->objectid, fkey->objectid, fkey->offset);
4805         } else if (!no_holes && *end != fkey->offset) {
4806                 err |= FILE_EXTENT_ERROR;
4807                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808                       root->objectid, fkey->objectid, fkey->offset);
4809         }
4810
4811         *end += extent_num_bytes;
4812         if (!is_hole)
4813                 *size += extent_num_bytes;
4814
4815         return err;
4816 }
4817
4818 /*
4819  * Check INODE_ITEM and related ITEMs (the same inode number)
4820  * 1. check link count
4821  * 2. check inode ref/extref
4822  * 3. check dir item/index
4823  *
4824  * @ext_ref:    the EXTENDED_IREF feature
4825  *
4826  * Return 0 if no error occurred.
4827  * Return >0 for error or hit the traversal is done(by error bitmap)
4828  */
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830                             unsigned int ext_ref)
4831 {
4832         struct extent_buffer *node;
4833         struct btrfs_inode_item *ii;
4834         struct btrfs_key key;
4835         u64 inode_id;
4836         u32 mode;
4837         u64 nlink;
4838         u64 nbytes;
4839         u64 isize;
4840         u64 size = 0;
4841         u64 refs = 0;
4842         u64 extent_end = 0;
4843         u64 extent_size = 0;
4844         unsigned int dir;
4845         unsigned int nodatasum;
4846         int slot;
4847         int ret;
4848         int err = 0;
4849
4850         node = path->nodes[0];
4851         slot = path->slots[0];
4852
4853         btrfs_item_key_to_cpu(node, &key, slot);
4854         inode_id = key.objectid;
4855
4856         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857                 ret = btrfs_next_item(root, path);
4858                 if (ret > 0)
4859                         err |= LAST_ITEM;
4860                 return err;
4861         }
4862
4863         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864         isize = btrfs_inode_size(node, ii);
4865         nbytes = btrfs_inode_nbytes(node, ii);
4866         mode = btrfs_inode_mode(node, ii);
4867         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868         nlink = btrfs_inode_nlink(node, ii);
4869         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4870
4871         while (1) {
4872                 ret = btrfs_next_item(root, path);
4873                 if (ret < 0) {
4874                         /* out will fill 'err' rusing current statistics */
4875                         goto out;
4876                 } else if (ret > 0) {
4877                         err |= LAST_ITEM;
4878                         goto out;
4879                 }
4880
4881                 node = path->nodes[0];
4882                 slot = path->slots[0];
4883                 btrfs_item_key_to_cpu(node, &key, slot);
4884                 if (key.objectid != inode_id)
4885                         goto out;
4886
4887                 switch (key.type) {
4888                 case BTRFS_INODE_REF_KEY:
4889                         ret = check_inode_ref(root, &key, node, slot, &refs,
4890                                               mode);
4891                         err |= ret;
4892                         break;
4893                 case BTRFS_INODE_EXTREF_KEY:
4894                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896                                         root->objectid, key.objectid,
4897                                         key.offset);
4898                         ret = check_inode_extref(root, &key, node, slot, &refs,
4899                                                  mode);
4900                         err |= ret;
4901                         break;
4902                 case BTRFS_DIR_ITEM_KEY:
4903                 case BTRFS_DIR_INDEX_KEY:
4904                         if (!dir) {
4905                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906                                         root->objectid, inode_id,
4907                                         imode_to_type(mode), key.objectid,
4908                                         key.offset);
4909                         }
4910                         ret = check_dir_item(root, &key, node, slot, &size,
4911                                              ext_ref);
4912                         err |= ret;
4913                         break;
4914                 case BTRFS_EXTENT_DATA_KEY:
4915                         if (dir) {
4916                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917                                         root->objectid, inode_id, key.objectid,
4918                                         key.offset);
4919                         }
4920                         ret = check_file_extent(root, &key, node, slot,
4921                                                 nodatasum, &extent_size,
4922                                                 &extent_end);
4923                         err |= ret;
4924                         break;
4925                 case BTRFS_XATTR_ITEM_KEY:
4926                         break;
4927                 default:
4928                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929                               key.objectid, key.type, key.offset);
4930                 }
4931         }
4932
4933 out:
4934         /* verify INODE_ITEM nlink/isize/nbytes */
4935         if (dir) {
4936                 if (nlink != 1) {
4937                         err |= LINK_COUNT_ERROR;
4938                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939                               root->objectid, inode_id, nlink);
4940                 }
4941
4942                 /*
4943                  * Just a warning, as dir inode nbytes is just an
4944                  * instructive value.
4945                  */
4946                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948                                 root->objectid, inode_id, root->nodesize);
4949                 }
4950
4951                 if (isize != size) {
4952                         err |= ISIZE_ERROR;
4953                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954                               root->objectid, inode_id, isize, size);
4955                 }
4956         } else {
4957                 if (nlink != refs) {
4958                         err |= LINK_COUNT_ERROR;
4959                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960                               root->objectid, inode_id, nlink, refs);
4961                 } else if (!nlink) {
4962                         err |= ORPHAN_ITEM;
4963                 }
4964
4965                 if (!nbytes && !no_holes && extent_end < isize) {
4966                         err |= NBYTES_ERROR;
4967                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968                               root->objectid, inode_id, isize);
4969                 }
4970
4971                 if (nbytes != extent_size) {
4972                         err |= NBYTES_ERROR;
4973                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974                               root->objectid, inode_id, nbytes, extent_size);
4975                 }
4976         }
4977
4978         return err;
4979 }
4980
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4982 {
4983         struct btrfs_path path;
4984         struct btrfs_key key;
4985         int err = 0;
4986         int ret;
4987
4988         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989         key.type = BTRFS_INODE_ITEM_KEY;
4990         key.offset = 0;
4991
4992         /* For root being dropped, we don't need to check first inode */
4993         if (btrfs_root_refs(&root->root_item) == 0 &&
4994             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4995             key.objectid)
4996                 return 0;
4997
4998         btrfs_init_path(&path);
4999
5000         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5001         if (ret < 0)
5002                 goto out;
5003         if (ret > 0) {
5004                 ret = 0;
5005                 err |= INODE_ITEM_MISSING;
5006                 error("first inode item of root %llu is missing",
5007                       root->objectid);
5008         }
5009
5010         err |= check_inode_item(root, &path, ext_ref);
5011         err &= ~LAST_ITEM;
5012         if (err && !ret)
5013                 ret = -EIO;
5014 out:
5015         btrfs_release_path(&path);
5016         return ret;
5017 }
5018
5019 /*
5020  * Iterate all item on the tree and call check_inode_item() to check.
5021  *
5022  * @root:       the root of the tree to be checked.
5023  * @ext_ref:    the EXTENDED_IREF feature
5024  *
5025  * Return 0 if no error found.
5026  * Return <0 for error.
5027  */
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5029 {
5030         struct btrfs_path path;
5031         struct node_refs nrefs;
5032         struct btrfs_root_item *root_item = &root->root_item;
5033         int ret;
5034         int level;
5035         int err = 0;
5036
5037         /*
5038          * We need to manually check the first inode item(256)
5039          * As the following traversal function will only start from
5040          * the first inode item in the leaf, if inode item(256) is missing
5041          * we will just skip it forever.
5042          */
5043         ret = check_fs_first_inode(root, ext_ref);
5044         if (ret < 0)
5045                 return ret;
5046
5047         memset(&nrefs, 0, sizeof(nrefs));
5048         level = btrfs_header_level(root->node);
5049         btrfs_init_path(&path);
5050
5051         if (btrfs_root_refs(root_item) > 0 ||
5052             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053                 path.nodes[level] = root->node;
5054                 path.slots[level] = 0;
5055                 extent_buffer_get(root->node);
5056         } else {
5057                 struct btrfs_key key;
5058
5059                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060                 level = root_item->drop_level;
5061                 path.lowest_level = level;
5062                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5063                 if (ret < 0)
5064                         goto out;
5065                 ret = 0;
5066         }
5067
5068         while (1) {
5069                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5070                 err |= !!ret;
5071
5072                 /* if ret is negative, walk shall stop */
5073                 if (ret < 0) {
5074                         ret = err;
5075                         break;
5076                 }
5077
5078                 ret = walk_up_tree_v2(root, &path, &level);
5079                 if (ret != 0) {
5080                         /* Normal exit, reset ret to err */
5081                         ret = err;
5082                         break;
5083                 }
5084         }
5085
5086 out:
5087         btrfs_release_path(&path);
5088         return ret;
5089 }
5090
5091 /*
5092  * Find the relative ref for root_ref and root_backref.
5093  *
5094  * @root:       the root of the root tree.
5095  * @ref_key:    the key of the root ref.
5096  *
5097  * Return 0 if no error occurred.
5098  */
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100                           struct extent_buffer *node, int slot)
5101 {
5102         struct btrfs_path path;
5103         struct btrfs_key key;
5104         struct btrfs_root_ref *ref;
5105         struct btrfs_root_ref *backref;
5106         char ref_name[BTRFS_NAME_LEN] = {0};
5107         char backref_name[BTRFS_NAME_LEN] = {0};
5108         u64 ref_dirid;
5109         u64 ref_seq;
5110         u32 ref_namelen;
5111         u64 backref_dirid;
5112         u64 backref_seq;
5113         u32 backref_namelen;
5114         u32 len;
5115         int ret;
5116         int err = 0;
5117
5118         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119         ref_dirid = btrfs_root_ref_dirid(node, ref);
5120         ref_seq = btrfs_root_ref_sequence(node, ref);
5121         ref_namelen = btrfs_root_ref_name_len(node, ref);
5122
5123         if (ref_namelen <= BTRFS_NAME_LEN) {
5124                 len = ref_namelen;
5125         } else {
5126                 len = BTRFS_NAME_LEN;
5127                 warning("%s[%llu %llu] ref_name too long",
5128                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5129                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5130                         ref_key->offset);
5131         }
5132         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5133
5134         /* Find relative root_ref */
5135         key.objectid = ref_key->offset;
5136         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137         key.offset = ref_key->objectid;
5138
5139         btrfs_init_path(&path);
5140         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5141         if (ret) {
5142                 err |= ROOT_REF_MISSING;
5143                 error("%s[%llu %llu] couldn't find relative ref",
5144                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5145                       "ROOT_REF" : "ROOT_BACKREF",
5146                       ref_key->objectid, ref_key->offset);
5147                 goto out;
5148         }
5149
5150         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151                                  struct btrfs_root_ref);
5152         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5155
5156         if (backref_namelen <= BTRFS_NAME_LEN) {
5157                 len = backref_namelen;
5158         } else {
5159                 len = BTRFS_NAME_LEN;
5160                 warning("%s[%llu %llu] ref_name too long",
5161                         key.type == BTRFS_ROOT_REF_KEY ?
5162                         "ROOT_REF" : "ROOT_BACKREF",
5163                         key.objectid, key.offset);
5164         }
5165         read_extent_buffer(path.nodes[0], backref_name,
5166                            (unsigned long)(backref + 1), len);
5167
5168         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169             ref_namelen != backref_namelen ||
5170             strncmp(ref_name, backref_name, len)) {
5171                 err |= ROOT_REF_MISMATCH;
5172                 error("%s[%llu %llu] mismatch relative ref",
5173                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5174                       "ROOT_REF" : "ROOT_BACKREF",
5175                       ref_key->objectid, ref_key->offset);
5176         }
5177 out:
5178         btrfs_release_path(&path);
5179         return err;
5180 }
5181
5182 /*
5183  * Check all fs/file tree in low_memory mode.
5184  *
5185  * 1. for fs tree root item, call check_fs_root_v2()
5186  * 2. for fs tree root ref/backref, call check_root_ref()
5187  *
5188  * Return 0 if no error occurred.
5189  */
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5191 {
5192         struct btrfs_root *tree_root = fs_info->tree_root;
5193         struct btrfs_root *cur_root = NULL;
5194         struct btrfs_path path;
5195         struct btrfs_key key;
5196         struct extent_buffer *node;
5197         unsigned int ext_ref;
5198         int slot;
5199         int ret;
5200         int err = 0;
5201
5202         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5203
5204         btrfs_init_path(&path);
5205         key.objectid = BTRFS_FS_TREE_OBJECTID;
5206         key.offset = 0;
5207         key.type = BTRFS_ROOT_ITEM_KEY;
5208
5209         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5210         if (ret < 0) {
5211                 err = ret;
5212                 goto out;
5213         } else if (ret > 0) {
5214                 err = -ENOENT;
5215                 goto out;
5216         }
5217
5218         while (1) {
5219                 node = path.nodes[0];
5220                 slot = path.slots[0];
5221                 btrfs_item_key_to_cpu(node, &key, slot);
5222                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5223                         goto out;
5224                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225                     fs_root_objectid(key.objectid)) {
5226                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5228                                                                        &key);
5229                         } else {
5230                                 key.offset = (u64)-1;
5231                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5232                         }
5233
5234                         if (IS_ERR(cur_root)) {
5235                                 error("Fail to read fs/subvol tree: %lld",
5236                                       key.objectid);
5237                                 err = -EIO;
5238                                 goto next;
5239                         }
5240
5241                         ret = check_fs_root_v2(cur_root, ext_ref);
5242                         err |= ret;
5243
5244                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245                                 btrfs_free_fs_root(cur_root);
5246                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248                         ret = check_root_ref(tree_root, &key, node, slot);
5249                         err |= ret;
5250                 }
5251 next:
5252                 ret = btrfs_next_item(tree_root, &path);
5253                 if (ret > 0)
5254                         goto out;
5255                 if (ret < 0) {
5256                         err = ret;
5257                         goto out;
5258                 }
5259         }
5260
5261 out:
5262         btrfs_release_path(&path);
5263         return err;
5264 }
5265
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5267 {
5268         struct list_head *cur = rec->backrefs.next;
5269         struct extent_backref *back;
5270         struct tree_backref *tback;
5271         struct data_backref *dback;
5272         u64 found = 0;
5273         int err = 0;
5274
5275         while(cur != &rec->backrefs) {
5276                 back = to_extent_backref(cur);
5277                 cur = cur->next;
5278                 if (!back->found_extent_tree) {
5279                         err = 1;
5280                         if (!print_errs)
5281                                 goto out;
5282                         if (back->is_data) {
5283                                 dback = to_data_backref(back);
5284                                 fprintf(stderr, "Backref %llu %s %llu"
5285                                         " owner %llu offset %llu num_refs %lu"
5286                                         " not found in extent tree\n",
5287                                         (unsigned long long)rec->start,
5288                                         back->full_backref ?
5289                                         "parent" : "root",
5290                                         back->full_backref ?
5291                                         (unsigned long long)dback->parent:
5292                                         (unsigned long long)dback->root,
5293                                         (unsigned long long)dback->owner,
5294                                         (unsigned long long)dback->offset,
5295                                         (unsigned long)dback->num_refs);
5296                         } else {
5297                                 tback = to_tree_backref(back);
5298                                 fprintf(stderr, "Backref %llu parent %llu"
5299                                         " root %llu not found in extent tree\n",
5300                                         (unsigned long long)rec->start,
5301                                         (unsigned long long)tback->parent,
5302                                         (unsigned long long)tback->root);
5303                         }
5304                 }
5305                 if (!back->is_data && !back->found_ref) {
5306                         err = 1;
5307                         if (!print_errs)
5308                                 goto out;
5309                         tback = to_tree_backref(back);
5310                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311                                 (unsigned long long)rec->start,
5312                                 back->full_backref ? "parent" : "root",
5313                                 back->full_backref ?
5314                                 (unsigned long long)tback->parent :
5315                                 (unsigned long long)tback->root, back);
5316                 }
5317                 if (back->is_data) {
5318                         dback = to_data_backref(back);
5319                         if (dback->found_ref != dback->num_refs) {
5320                                 err = 1;
5321                                 if (!print_errs)
5322                                         goto out;
5323                                 fprintf(stderr, "Incorrect local backref count"
5324                                         " on %llu %s %llu owner %llu"
5325                                         " offset %llu found %u wanted %u back %p\n",
5326                                         (unsigned long long)rec->start,
5327                                         back->full_backref ?
5328                                         "parent" : "root",
5329                                         back->full_backref ?
5330                                         (unsigned long long)dback->parent:
5331                                         (unsigned long long)dback->root,
5332                                         (unsigned long long)dback->owner,
5333                                         (unsigned long long)dback->offset,
5334                                         dback->found_ref, dback->num_refs, back);
5335                         }
5336                         if (dback->disk_bytenr != rec->start) {
5337                                 err = 1;
5338                                 if (!print_errs)
5339                                         goto out;
5340                                 fprintf(stderr, "Backref disk bytenr does not"
5341                                         " match extent record, bytenr=%llu, "
5342                                         "ref bytenr=%llu\n",
5343                                         (unsigned long long)rec->start,
5344                                         (unsigned long long)dback->disk_bytenr);
5345                         }
5346
5347                         if (dback->bytes != rec->nr) {
5348                                 err = 1;
5349                                 if (!print_errs)
5350                                         goto out;
5351                                 fprintf(stderr, "Backref bytes do not match "
5352                                         "extent backref, bytenr=%llu, ref "
5353                                         "bytes=%llu, backref bytes=%llu\n",
5354                                         (unsigned long long)rec->start,
5355                                         (unsigned long long)rec->nr,
5356                                         (unsigned long long)dback->bytes);
5357                         }
5358                 }
5359                 if (!back->is_data) {
5360                         found += 1;
5361                 } else {
5362                         dback = to_data_backref(back);
5363                         found += dback->found_ref;
5364                 }
5365         }
5366         if (found != rec->refs) {
5367                 err = 1;
5368                 if (!print_errs)
5369                         goto out;
5370                 fprintf(stderr, "Incorrect global backref count "
5371                         "on %llu found %llu wanted %llu\n",
5372                         (unsigned long long)rec->start,
5373                         (unsigned long long)found,
5374                         (unsigned long long)rec->refs);
5375         }
5376 out:
5377         return err;
5378 }
5379
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5381 {
5382         struct extent_backref *back;
5383         struct list_head *cur;
5384         while (!list_empty(&rec->backrefs)) {
5385                 cur = rec->backrefs.next;
5386                 back = to_extent_backref(cur);
5387                 list_del(cur);
5388                 free(back);
5389         }
5390         return 0;
5391 }
5392
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5394 {
5395         struct cache_extent *cache;
5396         struct extent_record *rec;
5397
5398         while (1) {
5399                 cache = first_cache_extent(extent_cache);
5400                 if (!cache)
5401                         break;
5402                 rec = container_of(cache, struct extent_record, cache);
5403                 remove_cache_extent(extent_cache, cache);
5404                 free_all_extent_backrefs(rec);
5405                 free(rec);
5406         }
5407 }
5408
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410                                  struct extent_record *rec)
5411 {
5412         if (rec->content_checked && rec->owner_ref_checked &&
5413             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415             !rec->bad_full_backref && !rec->crossing_stripes &&
5416             !rec->wrong_chunk_type) {
5417                 remove_cache_extent(extent_cache, &rec->cache);
5418                 free_all_extent_backrefs(rec);
5419                 list_del_init(&rec->list);
5420                 free(rec);
5421         }
5422         return 0;
5423 }
5424
5425 static int check_owner_ref(struct btrfs_root *root,
5426                             struct extent_record *rec,
5427                             struct extent_buffer *buf)
5428 {
5429         struct extent_backref *node;
5430         struct tree_backref *back;
5431         struct btrfs_root *ref_root;
5432         struct btrfs_key key;
5433         struct btrfs_path path;
5434         struct extent_buffer *parent;
5435         int level;
5436         int found = 0;
5437         int ret;
5438
5439         list_for_each_entry(node, &rec->backrefs, list) {
5440                 if (node->is_data)
5441                         continue;
5442                 if (!node->found_ref)
5443                         continue;
5444                 if (node->full_backref)
5445                         continue;
5446                 back = to_tree_backref(node);
5447                 if (btrfs_header_owner(buf) == back->root)
5448                         return 0;
5449         }
5450         BUG_ON(rec->is_root);
5451
5452         /* try to find the block by search corresponding fs tree */
5453         key.objectid = btrfs_header_owner(buf);
5454         key.type = BTRFS_ROOT_ITEM_KEY;
5455         key.offset = (u64)-1;
5456
5457         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458         if (IS_ERR(ref_root))
5459                 return 1;
5460
5461         level = btrfs_header_level(buf);
5462         if (level == 0)
5463                 btrfs_item_key_to_cpu(buf, &key, 0);
5464         else
5465                 btrfs_node_key_to_cpu(buf, &key, 0);
5466
5467         btrfs_init_path(&path);
5468         path.lowest_level = level + 1;
5469         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5470         if (ret < 0)
5471                 return 0;
5472
5473         parent = path.nodes[level + 1];
5474         if (parent && buf->start == btrfs_node_blockptr(parent,
5475                                                         path.slots[level + 1]))
5476                 found = 1;
5477
5478         btrfs_release_path(&path);
5479         return found ? 0 : 1;
5480 }
5481
5482 static int is_extent_tree_record(struct extent_record *rec)
5483 {
5484         struct list_head *cur = rec->backrefs.next;
5485         struct extent_backref *node;
5486         struct tree_backref *back;
5487         int is_extent = 0;
5488
5489         while(cur != &rec->backrefs) {
5490                 node = to_extent_backref(cur);
5491                 cur = cur->next;
5492                 if (node->is_data)
5493                         return 0;
5494                 back = to_tree_backref(node);
5495                 if (node->full_backref)
5496                         return 0;
5497                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5498                         is_extent = 1;
5499         }
5500         return is_extent;
5501 }
5502
5503
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505                                struct cache_tree *extent_cache,
5506                                u64 start, u64 len)
5507 {
5508         struct extent_record *rec;
5509         struct cache_extent *cache;
5510         struct btrfs_key key;
5511
5512         cache = lookup_cache_extent(extent_cache, start, len);
5513         if (!cache)
5514                 return 0;
5515
5516         rec = container_of(cache, struct extent_record, cache);
5517         if (!is_extent_tree_record(rec))
5518                 return 0;
5519
5520         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5522 }
5523
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525                        struct extent_buffer *buf, int slot)
5526 {
5527         if (btrfs_header_level(buf)) {
5528                 struct btrfs_key_ptr ptr1, ptr2;
5529
5530                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531                                    sizeof(struct btrfs_key_ptr));
5532                 read_extent_buffer(buf, &ptr2,
5533                                    btrfs_node_key_ptr_offset(slot + 1),
5534                                    sizeof(struct btrfs_key_ptr));
5535                 write_extent_buffer(buf, &ptr1,
5536                                     btrfs_node_key_ptr_offset(slot + 1),
5537                                     sizeof(struct btrfs_key_ptr));
5538                 write_extent_buffer(buf, &ptr2,
5539                                     btrfs_node_key_ptr_offset(slot),
5540                                     sizeof(struct btrfs_key_ptr));
5541                 if (slot == 0) {
5542                         struct btrfs_disk_key key;
5543                         btrfs_node_key(buf, &key, 0);
5544                         btrfs_fixup_low_keys(root, path, &key,
5545                                              btrfs_header_level(buf) + 1);
5546                 }
5547         } else {
5548                 struct btrfs_item *item1, *item2;
5549                 struct btrfs_key k1, k2;
5550                 char *item1_data, *item2_data;
5551                 u32 item1_offset, item2_offset, item1_size, item2_size;
5552
5553                 item1 = btrfs_item_nr(slot);
5554                 item2 = btrfs_item_nr(slot + 1);
5555                 btrfs_item_key_to_cpu(buf, &k1, slot);
5556                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557                 item1_offset = btrfs_item_offset(buf, item1);
5558                 item2_offset = btrfs_item_offset(buf, item2);
5559                 item1_size = btrfs_item_size(buf, item1);
5560                 item2_size = btrfs_item_size(buf, item2);
5561
5562                 item1_data = malloc(item1_size);
5563                 if (!item1_data)
5564                         return -ENOMEM;
5565                 item2_data = malloc(item2_size);
5566                 if (!item2_data) {
5567                         free(item1_data);
5568                         return -ENOMEM;
5569                 }
5570
5571                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5573
5574                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5576                 free(item1_data);
5577                 free(item2_data);
5578
5579                 btrfs_set_item_offset(buf, item1, item2_offset);
5580                 btrfs_set_item_offset(buf, item2, item1_offset);
5581                 btrfs_set_item_size(buf, item1, item2_size);
5582                 btrfs_set_item_size(buf, item2, item1_size);
5583
5584                 path->slots[0] = slot;
5585                 btrfs_set_item_key_unsafe(root, path, &k2);
5586                 path->slots[0] = slot + 1;
5587                 btrfs_set_item_key_unsafe(root, path, &k1);
5588         }
5589         return 0;
5590 }
5591
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5593 {
5594         struct extent_buffer *buf;
5595         struct btrfs_key k1, k2;
5596         int i;
5597         int level = path->lowest_level;
5598         int ret = -EIO;
5599
5600         buf = path->nodes[level];
5601         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5602                 if (level) {
5603                         btrfs_node_key_to_cpu(buf, &k1, i);
5604                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5605                 } else {
5606                         btrfs_item_key_to_cpu(buf, &k1, i);
5607                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5608                 }
5609                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5610                         continue;
5611                 ret = swap_values(root, path, buf, i);
5612                 if (ret)
5613                         break;
5614                 btrfs_mark_buffer_dirty(buf);
5615                 i = 0;
5616         }
5617         return ret;
5618 }
5619
5620 static int delete_bogus_item(struct btrfs_root *root,
5621                              struct btrfs_path *path,
5622                              struct extent_buffer *buf, int slot)
5623 {
5624         struct btrfs_key key;
5625         int nritems = btrfs_header_nritems(buf);
5626
5627         btrfs_item_key_to_cpu(buf, &key, slot);
5628
5629         /* These are all the keys we can deal with missing. */
5630         if (key.type != BTRFS_DIR_INDEX_KEY &&
5631             key.type != BTRFS_EXTENT_ITEM_KEY &&
5632             key.type != BTRFS_METADATA_ITEM_KEY &&
5633             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5635                 return -1;
5636
5637         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638                (unsigned long long)key.objectid, key.type,
5639                (unsigned long long)key.offset, slot, buf->start);
5640         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641                               btrfs_item_nr_offset(slot + 1),
5642                               sizeof(struct btrfs_item) *
5643                               (nritems - slot - 1));
5644         btrfs_set_header_nritems(buf, nritems - 1);
5645         if (slot == 0) {
5646                 struct btrfs_disk_key disk_key;
5647
5648                 btrfs_item_key(buf, &disk_key, 0);
5649                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5650         }
5651         btrfs_mark_buffer_dirty(buf);
5652         return 0;
5653 }
5654
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5656 {
5657         struct extent_buffer *buf;
5658         int i;
5659         int ret = 0;
5660
5661         /* We should only get this for leaves */
5662         BUG_ON(path->lowest_level);
5663         buf = path->nodes[0];
5664 again:
5665         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666                 unsigned int shift = 0, offset;
5667
5668                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669                     BTRFS_LEAF_DATA_SIZE(root)) {
5670                         if (btrfs_item_end_nr(buf, i) >
5671                             BTRFS_LEAF_DATA_SIZE(root)) {
5672                                 ret = delete_bogus_item(root, path, buf, i);
5673                                 if (!ret)
5674                                         goto again;
5675                                 fprintf(stderr, "item is off the end of the "
5676                                         "leaf, can't fix\n");
5677                                 ret = -EIO;
5678                                 break;
5679                         }
5680                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5681                                 btrfs_item_end_nr(buf, i);
5682                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683                            btrfs_item_offset_nr(buf, i - 1)) {
5684                         if (btrfs_item_end_nr(buf, i) >
5685                             btrfs_item_offset_nr(buf, i - 1)) {
5686                                 ret = delete_bogus_item(root, path, buf, i);
5687                                 if (!ret)
5688                                         goto again;
5689                                 fprintf(stderr, "items overlap, can't fix\n");
5690                                 ret = -EIO;
5691                                 break;
5692                         }
5693                         shift = btrfs_item_offset_nr(buf, i - 1) -
5694                                 btrfs_item_end_nr(buf, i);
5695                 }
5696                 if (!shift)
5697                         continue;
5698
5699                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700                        i, shift, (unsigned long long)buf->start);
5701                 offset = btrfs_item_offset_nr(buf, i);
5702                 memmove_extent_buffer(buf,
5703                                       btrfs_leaf_data(buf) + offset + shift,
5704                                       btrfs_leaf_data(buf) + offset,
5705                                       btrfs_item_size_nr(buf, i));
5706                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5707                                       offset + shift);
5708                 btrfs_mark_buffer_dirty(buf);
5709         }
5710
5711         /*
5712          * We may have moved things, in which case we want to exit so we don't
5713          * write those changes out.  Once we have proper abort functionality in
5714          * progs this can be changed to something nicer.
5715          */
5716         BUG_ON(ret);
5717         return ret;
5718 }
5719
5720 /*
5721  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5722  * then just return -EIO.
5723  */
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725                                 struct extent_buffer *buf,
5726                                 enum btrfs_tree_block_status status)
5727 {
5728         struct btrfs_trans_handle *trans;
5729         struct ulist *roots;
5730         struct ulist_node *node;
5731         struct btrfs_root *search_root;
5732         struct btrfs_path path;
5733         struct ulist_iterator iter;
5734         struct btrfs_key root_key, key;
5735         int ret;
5736
5737         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5739                 return -EIO;
5740
5741         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5742         if (ret)
5743                 return -EIO;
5744
5745         btrfs_init_path(&path);
5746         ULIST_ITER_INIT(&iter);
5747         while ((node = ulist_next(roots, &iter))) {
5748                 root_key.objectid = node->val;
5749                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750                 root_key.offset = (u64)-1;
5751
5752                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5753                 if (IS_ERR(root)) {
5754                         ret = -EIO;
5755                         break;
5756                 }
5757
5758
5759                 trans = btrfs_start_transaction(search_root, 0);
5760                 if (IS_ERR(trans)) {
5761                         ret = PTR_ERR(trans);
5762                         break;
5763                 }
5764
5765                 path.lowest_level = btrfs_header_level(buf);
5766                 path.skip_check_block = 1;
5767                 if (path.lowest_level)
5768                         btrfs_node_key_to_cpu(buf, &key, 0);
5769                 else
5770                         btrfs_item_key_to_cpu(buf, &key, 0);
5771                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5772                 if (ret) {
5773                         ret = -EIO;
5774                         btrfs_commit_transaction(trans, search_root);
5775                         break;
5776                 }
5777                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778                         ret = fix_key_order(search_root, &path);
5779                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780                         ret = fix_item_offset(search_root, &path);
5781                 if (ret) {
5782                         btrfs_commit_transaction(trans, search_root);
5783                         break;
5784                 }
5785                 btrfs_release_path(&path);
5786                 btrfs_commit_transaction(trans, search_root);
5787         }
5788         ulist_free(roots);
5789         btrfs_release_path(&path);
5790         return ret;
5791 }
5792
5793 static int check_block(struct btrfs_root *root,
5794                        struct cache_tree *extent_cache,
5795                        struct extent_buffer *buf, u64 flags)
5796 {
5797         struct extent_record *rec;
5798         struct cache_extent *cache;
5799         struct btrfs_key key;
5800         enum btrfs_tree_block_status status;
5801         int ret = 0;
5802         int level;
5803
5804         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5805         if (!cache)
5806                 return 1;
5807         rec = container_of(cache, struct extent_record, cache);
5808         rec->generation = btrfs_header_generation(buf);
5809
5810         level = btrfs_header_level(buf);
5811         if (btrfs_header_nritems(buf) > 0) {
5812
5813                 if (level == 0)
5814                         btrfs_item_key_to_cpu(buf, &key, 0);
5815                 else
5816                         btrfs_node_key_to_cpu(buf, &key, 0);
5817
5818                 rec->info_objectid = key.objectid;
5819         }
5820         rec->info_level = level;
5821
5822         if (btrfs_is_leaf(buf))
5823                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5824         else
5825                 status = btrfs_check_node(root, &rec->parent_key, buf);
5826
5827         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5828                 if (repair)
5829                         status = try_to_fix_bad_block(root, buf, status);
5830                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5831                         ret = -EIO;
5832                         fprintf(stderr, "bad block %llu\n",
5833                                 (unsigned long long)buf->start);
5834                 } else {
5835                         /*
5836                          * Signal to callers we need to start the scan over
5837                          * again since we'll have cowed blocks.
5838                          */
5839                         ret = -EAGAIN;
5840                 }
5841         } else {
5842                 rec->content_checked = 1;
5843                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844                         rec->owner_ref_checked = 1;
5845                 else {
5846                         ret = check_owner_ref(root, rec, buf);
5847                         if (!ret)
5848                                 rec->owner_ref_checked = 1;
5849                 }
5850         }
5851         if (!ret)
5852                 maybe_free_extent_rec(extent_cache, rec);
5853         return ret;
5854 }
5855
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857                                                 u64 parent, u64 root)
5858 {
5859         struct list_head *cur = rec->backrefs.next;
5860         struct extent_backref *node;
5861         struct tree_backref *back;
5862
5863         while(cur != &rec->backrefs) {
5864                 node = to_extent_backref(cur);
5865                 cur = cur->next;
5866                 if (node->is_data)
5867                         continue;
5868                 back = to_tree_backref(node);
5869                 if (parent > 0) {
5870                         if (!node->full_backref)
5871                                 continue;
5872                         if (parent == back->parent)
5873                                 return back;
5874                 } else {
5875                         if (node->full_backref)
5876                                 continue;
5877                         if (back->root == root)
5878                                 return back;
5879                 }
5880         }
5881         return NULL;
5882 }
5883
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885                                                 u64 parent, u64 root)
5886 {
5887         struct tree_backref *ref = malloc(sizeof(*ref));
5888
5889         if (!ref)
5890                 return NULL;
5891         memset(&ref->node, 0, sizeof(ref->node));
5892         if (parent > 0) {
5893                 ref->parent = parent;
5894                 ref->node.full_backref = 1;
5895         } else {
5896                 ref->root = root;
5897                 ref->node.full_backref = 0;
5898         }
5899         list_add_tail(&ref->node.list, &rec->backrefs);
5900
5901         return ref;
5902 }
5903
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905                                                 u64 parent, u64 root,
5906                                                 u64 owner, u64 offset,
5907                                                 int found_ref,
5908                                                 u64 disk_bytenr, u64 bytes)
5909 {
5910         struct list_head *cur = rec->backrefs.next;
5911         struct extent_backref *node;
5912         struct data_backref *back;
5913
5914         while(cur != &rec->backrefs) {
5915                 node = to_extent_backref(cur);
5916                 cur = cur->next;
5917                 if (!node->is_data)
5918                         continue;
5919                 back = to_data_backref(node);
5920                 if (parent > 0) {
5921                         if (!node->full_backref)
5922                                 continue;
5923                         if (parent == back->parent)
5924                                 return back;
5925                 } else {
5926                         if (node->full_backref)
5927                                 continue;
5928                         if (back->root == root && back->owner == owner &&
5929                             back->offset == offset) {
5930                                 if (found_ref && node->found_ref &&
5931                                     (back->bytes != bytes ||
5932                                     back->disk_bytenr != disk_bytenr))
5933                                         continue;
5934                                 return back;
5935                         }
5936                 }
5937         }
5938         return NULL;
5939 }
5940
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942                                                 u64 parent, u64 root,
5943                                                 u64 owner, u64 offset,
5944                                                 u64 max_size)
5945 {
5946         struct data_backref *ref = malloc(sizeof(*ref));
5947
5948         if (!ref)
5949                 return NULL;
5950         memset(&ref->node, 0, sizeof(ref->node));
5951         ref->node.is_data = 1;
5952
5953         if (parent > 0) {
5954                 ref->parent = parent;
5955                 ref->owner = 0;
5956                 ref->offset = 0;
5957                 ref->node.full_backref = 1;
5958         } else {
5959                 ref->root = root;
5960                 ref->owner = owner;
5961                 ref->offset = offset;
5962                 ref->node.full_backref = 0;
5963         }
5964         ref->bytes = max_size;
5965         ref->found_ref = 0;
5966         ref->num_refs = 0;
5967         list_add_tail(&ref->node.list, &rec->backrefs);
5968         if (max_size > rec->max_size)
5969                 rec->max_size = max_size;
5970         return ref;
5971 }
5972
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5975 {
5976         struct btrfs_block_group_cache *bg_cache;
5977
5978         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5979         if (!bg_cache)
5980                 return;
5981
5982         /* data extent, check chunk directly*/
5983         if (!rec->metadata) {
5984                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985                         rec->wrong_chunk_type = 1;
5986                 return;
5987         }
5988
5989         /* metadata extent, check the obvious case first */
5990         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991                                  BTRFS_BLOCK_GROUP_METADATA))) {
5992                 rec->wrong_chunk_type = 1;
5993                 return;
5994         }
5995
5996         /*
5997          * Check SYSTEM extent, as it's also marked as metadata, we can only
5998          * make sure it's a SYSTEM extent by its backref
5999          */
6000         if (!list_empty(&rec->backrefs)) {
6001                 struct extent_backref *node;
6002                 struct tree_backref *tback;
6003                 u64 bg_type;
6004
6005                 node = to_extent_backref(rec->backrefs.next);
6006                 if (node->is_data) {
6007                         /* tree block shouldn't have data backref */
6008                         rec->wrong_chunk_type = 1;
6009                         return;
6010                 }
6011                 tback = container_of(node, struct tree_backref, node);
6012
6013                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6015                 else
6016                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017                 if (!(bg_cache->flags & bg_type))
6018                         rec->wrong_chunk_type = 1;
6019         }
6020 }
6021
6022 /*
6023  * Allocate a new extent record, fill default values from @tmpl and insert int
6024  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025  * the cache, otherwise it fails.
6026  */
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028                 struct extent_record *tmpl)
6029 {
6030         struct extent_record *rec;
6031         int ret = 0;
6032
6033         BUG_ON(tmpl->max_size == 0);
6034         rec = malloc(sizeof(*rec));
6035         if (!rec)
6036                 return -ENOMEM;
6037         rec->start = tmpl->start;
6038         rec->max_size = tmpl->max_size;
6039         rec->nr = max(tmpl->nr, tmpl->max_size);
6040         rec->found_rec = tmpl->found_rec;
6041         rec->content_checked = tmpl->content_checked;
6042         rec->owner_ref_checked = tmpl->owner_ref_checked;
6043         rec->num_duplicates = 0;
6044         rec->metadata = tmpl->metadata;
6045         rec->flag_block_full_backref = FLAG_UNSET;
6046         rec->bad_full_backref = 0;
6047         rec->crossing_stripes = 0;
6048         rec->wrong_chunk_type = 0;
6049         rec->is_root = tmpl->is_root;
6050         rec->refs = tmpl->refs;
6051         rec->extent_item_refs = tmpl->extent_item_refs;
6052         rec->parent_generation = tmpl->parent_generation;
6053         INIT_LIST_HEAD(&rec->backrefs);
6054         INIT_LIST_HEAD(&rec->dups);
6055         INIT_LIST_HEAD(&rec->list);
6056         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6057         rec->cache.start = tmpl->start;
6058         rec->cache.size = tmpl->nr;
6059         ret = insert_cache_extent(extent_cache, &rec->cache);
6060         if (ret) {
6061                 free(rec);
6062                 return ret;
6063         }
6064         bytes_used += rec->nr;
6065
6066         if (tmpl->metadata)
6067                 rec->crossing_stripes = check_crossing_stripes(global_info,
6068                                 rec->start, global_info->tree_root->nodesize);
6069         check_extent_type(rec);
6070         return ret;
6071 }
6072
6073 /*
6074  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6075  * some are hints:
6076  * - refs              - if found, increase refs
6077  * - is_root           - if found, set
6078  * - content_checked   - if found, set
6079  * - owner_ref_checked - if found, set
6080  *
6081  * If not found, create a new one, initialize and insert.
6082  */
6083 static int add_extent_rec(struct cache_tree *extent_cache,
6084                 struct extent_record *tmpl)
6085 {
6086         struct extent_record *rec;
6087         struct cache_extent *cache;
6088         int ret = 0;
6089         int dup = 0;
6090
6091         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6092         if (cache) {
6093                 rec = container_of(cache, struct extent_record, cache);
6094                 if (tmpl->refs)
6095                         rec->refs++;
6096                 if (rec->nr == 1)
6097                         rec->nr = max(tmpl->nr, tmpl->max_size);
6098
6099                 /*
6100                  * We need to make sure to reset nr to whatever the extent
6101                  * record says was the real size, this way we can compare it to
6102                  * the backrefs.
6103                  */
6104                 if (tmpl->found_rec) {
6105                         if (tmpl->start != rec->start || rec->found_rec) {
6106                                 struct extent_record *tmp;
6107
6108                                 dup = 1;
6109                                 if (list_empty(&rec->list))
6110                                         list_add_tail(&rec->list,
6111                                                       &duplicate_extents);
6112
6113                                 /*
6114                                  * We have to do this song and dance in case we
6115                                  * find an extent record that falls inside of
6116                                  * our current extent record but does not have
6117                                  * the same objectid.
6118                                  */
6119                                 tmp = malloc(sizeof(*tmp));
6120                                 if (!tmp)
6121                                         return -ENOMEM;
6122                                 tmp->start = tmpl->start;
6123                                 tmp->max_size = tmpl->max_size;
6124                                 tmp->nr = tmpl->nr;
6125                                 tmp->found_rec = 1;
6126                                 tmp->metadata = tmpl->metadata;
6127                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6128                                 INIT_LIST_HEAD(&tmp->list);
6129                                 list_add_tail(&tmp->list, &rec->dups);
6130                                 rec->num_duplicates++;
6131                         } else {
6132                                 rec->nr = tmpl->nr;
6133                                 rec->found_rec = 1;
6134                         }
6135                 }
6136
6137                 if (tmpl->extent_item_refs && !dup) {
6138                         if (rec->extent_item_refs) {
6139                                 fprintf(stderr, "block %llu rec "
6140                                         "extent_item_refs %llu, passed %llu\n",
6141                                         (unsigned long long)tmpl->start,
6142                                         (unsigned long long)
6143                                                         rec->extent_item_refs,
6144                                         (unsigned long long)tmpl->extent_item_refs);
6145                         }
6146                         rec->extent_item_refs = tmpl->extent_item_refs;
6147                 }
6148                 if (tmpl->is_root)
6149                         rec->is_root = 1;
6150                 if (tmpl->content_checked)
6151                         rec->content_checked = 1;
6152                 if (tmpl->owner_ref_checked)
6153                         rec->owner_ref_checked = 1;
6154                 memcpy(&rec->parent_key, &tmpl->parent_key,
6155                                 sizeof(tmpl->parent_key));
6156                 if (tmpl->parent_generation)
6157                         rec->parent_generation = tmpl->parent_generation;
6158                 if (rec->max_size < tmpl->max_size)
6159                         rec->max_size = tmpl->max_size;
6160
6161                 /*
6162                  * A metadata extent can't cross stripe_len boundary, otherwise
6163                  * kernel scrub won't be able to handle it.
6164                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6165                  * it.
6166                  */
6167                 if (tmpl->metadata)
6168                         rec->crossing_stripes = check_crossing_stripes(
6169                                         global_info, rec->start,
6170                                         global_info->tree_root->nodesize);
6171                 check_extent_type(rec);
6172                 maybe_free_extent_rec(extent_cache, rec);
6173                 return ret;
6174         }
6175
6176         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6177
6178         return ret;
6179 }
6180
6181 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6182                             u64 parent, u64 root, int found_ref)
6183 {
6184         struct extent_record *rec;
6185         struct tree_backref *back;
6186         struct cache_extent *cache;
6187         int ret;
6188
6189         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6190         if (!cache) {
6191                 struct extent_record tmpl;
6192
6193                 memset(&tmpl, 0, sizeof(tmpl));
6194                 tmpl.start = bytenr;
6195                 tmpl.nr = 1;
6196                 tmpl.metadata = 1;
6197                 tmpl.max_size = 1;
6198
6199                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6200                 if (ret)
6201                         return ret;
6202
6203                 /* really a bug in cache_extent implement now */
6204                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6205                 if (!cache)
6206                         return -ENOENT;
6207         }
6208
6209         rec = container_of(cache, struct extent_record, cache);
6210         if (rec->start != bytenr) {
6211                 /*
6212                  * Several cause, from unaligned bytenr to over lapping extents
6213                  */
6214                 return -EEXIST;
6215         }
6216
6217         back = find_tree_backref(rec, parent, root);
6218         if (!back) {
6219                 back = alloc_tree_backref(rec, parent, root);
6220                 if (!back)
6221                         return -ENOMEM;
6222         }
6223
6224         if (found_ref) {
6225                 if (back->node.found_ref) {
6226                         fprintf(stderr, "Extent back ref already exists "
6227                                 "for %llu parent %llu root %llu \n",
6228                                 (unsigned long long)bytenr,
6229                                 (unsigned long long)parent,
6230                                 (unsigned long long)root);
6231                 }
6232                 back->node.found_ref = 1;
6233         } else {
6234                 if (back->node.found_extent_tree) {
6235                         fprintf(stderr, "Extent back ref already exists "
6236                                 "for %llu parent %llu root %llu \n",
6237                                 (unsigned long long)bytenr,
6238                                 (unsigned long long)parent,
6239                                 (unsigned long long)root);
6240                 }
6241                 back->node.found_extent_tree = 1;
6242         }
6243         check_extent_type(rec);
6244         maybe_free_extent_rec(extent_cache, rec);
6245         return 0;
6246 }
6247
6248 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6249                             u64 parent, u64 root, u64 owner, u64 offset,
6250                             u32 num_refs, int found_ref, u64 max_size)
6251 {
6252         struct extent_record *rec;
6253         struct data_backref *back;
6254         struct cache_extent *cache;
6255         int ret;
6256
6257         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6258         if (!cache) {
6259                 struct extent_record tmpl;
6260
6261                 memset(&tmpl, 0, sizeof(tmpl));
6262                 tmpl.start = bytenr;
6263                 tmpl.nr = 1;
6264                 tmpl.max_size = max_size;
6265
6266                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6267                 if (ret)
6268                         return ret;
6269
6270                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6271                 if (!cache)
6272                         abort();
6273         }
6274
6275         rec = container_of(cache, struct extent_record, cache);
6276         if (rec->max_size < max_size)
6277                 rec->max_size = max_size;
6278
6279         /*
6280          * If found_ref is set then max_size is the real size and must match the
6281          * existing refs.  So if we have already found a ref then we need to
6282          * make sure that this ref matches the existing one, otherwise we need
6283          * to add a new backref so we can notice that the backrefs don't match
6284          * and we need to figure out who is telling the truth.  This is to
6285          * account for that awful fsync bug I introduced where we'd end up with
6286          * a btrfs_file_extent_item that would have its length include multiple
6287          * prealloc extents or point inside of a prealloc extent.
6288          */
6289         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6290                                  bytenr, max_size);
6291         if (!back) {
6292                 back = alloc_data_backref(rec, parent, root, owner, offset,
6293                                           max_size);
6294                 BUG_ON(!back);
6295         }
6296
6297         if (found_ref) {
6298                 BUG_ON(num_refs != 1);
6299                 if (back->node.found_ref)
6300                         BUG_ON(back->bytes != max_size);
6301                 back->node.found_ref = 1;
6302                 back->found_ref += 1;
6303                 back->bytes = max_size;
6304                 back->disk_bytenr = bytenr;
6305                 rec->refs += 1;
6306                 rec->content_checked = 1;
6307                 rec->owner_ref_checked = 1;
6308         } else {
6309                 if (back->node.found_extent_tree) {
6310                         fprintf(stderr, "Extent back ref already exists "
6311                                 "for %llu parent %llu root %llu "
6312                                 "owner %llu offset %llu num_refs %lu\n",
6313                                 (unsigned long long)bytenr,
6314                                 (unsigned long long)parent,
6315                                 (unsigned long long)root,
6316                                 (unsigned long long)owner,
6317                                 (unsigned long long)offset,
6318                                 (unsigned long)num_refs);
6319                 }
6320                 back->num_refs = num_refs;
6321                 back->node.found_extent_tree = 1;
6322         }
6323         maybe_free_extent_rec(extent_cache, rec);
6324         return 0;
6325 }
6326
6327 static int add_pending(struct cache_tree *pending,
6328                        struct cache_tree *seen, u64 bytenr, u32 size)
6329 {
6330         int ret;
6331         ret = add_cache_extent(seen, bytenr, size);
6332         if (ret)
6333                 return ret;
6334         add_cache_extent(pending, bytenr, size);
6335         return 0;
6336 }
6337
6338 static int pick_next_pending(struct cache_tree *pending,
6339                         struct cache_tree *reada,
6340                         struct cache_tree *nodes,
6341                         u64 last, struct block_info *bits, int bits_nr,
6342                         int *reada_bits)
6343 {
6344         unsigned long node_start = last;
6345         struct cache_extent *cache;
6346         int ret;
6347
6348         cache = search_cache_extent(reada, 0);
6349         if (cache) {
6350                 bits[0].start = cache->start;
6351                 bits[0].size = cache->size;
6352                 *reada_bits = 1;
6353                 return 1;
6354         }
6355         *reada_bits = 0;
6356         if (node_start > 32768)
6357                 node_start -= 32768;
6358
6359         cache = search_cache_extent(nodes, node_start);
6360         if (!cache)
6361                 cache = search_cache_extent(nodes, 0);
6362
6363         if (!cache) {
6364                  cache = search_cache_extent(pending, 0);
6365                  if (!cache)
6366                          return 0;
6367                  ret = 0;
6368                  do {
6369                          bits[ret].start = cache->start;
6370                          bits[ret].size = cache->size;
6371                          cache = next_cache_extent(cache);
6372                          ret++;
6373                  } while (cache && ret < bits_nr);
6374                  return ret;
6375         }
6376
6377         ret = 0;
6378         do {
6379                 bits[ret].start = cache->start;
6380                 bits[ret].size = cache->size;
6381                 cache = next_cache_extent(cache);
6382                 ret++;
6383         } while (cache && ret < bits_nr);
6384
6385         if (bits_nr - ret > 8) {
6386                 u64 lookup = bits[0].start + bits[0].size;
6387                 struct cache_extent *next;
6388                 next = search_cache_extent(pending, lookup);
6389                 while(next) {
6390                         if (next->start - lookup > 32768)
6391                                 break;
6392                         bits[ret].start = next->start;
6393                         bits[ret].size = next->size;
6394                         lookup = next->start + next->size;
6395                         ret++;
6396                         if (ret == bits_nr)
6397                                 break;
6398                         next = next_cache_extent(next);
6399                         if (!next)
6400                                 break;
6401                 }
6402         }
6403         return ret;
6404 }
6405
6406 static void free_chunk_record(struct cache_extent *cache)
6407 {
6408         struct chunk_record *rec;
6409
6410         rec = container_of(cache, struct chunk_record, cache);
6411         list_del_init(&rec->list);
6412         list_del_init(&rec->dextents);
6413         free(rec);
6414 }
6415
6416 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6417 {
6418         cache_tree_free_extents(chunk_cache, free_chunk_record);
6419 }
6420
6421 static void free_device_record(struct rb_node *node)
6422 {
6423         struct device_record *rec;
6424
6425         rec = container_of(node, struct device_record, node);
6426         free(rec);
6427 }
6428
6429 FREE_RB_BASED_TREE(device_cache, free_device_record);
6430
6431 int insert_block_group_record(struct block_group_tree *tree,
6432                               struct block_group_record *bg_rec)
6433 {
6434         int ret;
6435
6436         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6437         if (ret)
6438                 return ret;
6439
6440         list_add_tail(&bg_rec->list, &tree->block_groups);
6441         return 0;
6442 }
6443
6444 static void free_block_group_record(struct cache_extent *cache)
6445 {
6446         struct block_group_record *rec;
6447
6448         rec = container_of(cache, struct block_group_record, cache);
6449         list_del_init(&rec->list);
6450         free(rec);
6451 }
6452
6453 void free_block_group_tree(struct block_group_tree *tree)
6454 {
6455         cache_tree_free_extents(&tree->tree, free_block_group_record);
6456 }
6457
6458 int insert_device_extent_record(struct device_extent_tree *tree,
6459                                 struct device_extent_record *de_rec)
6460 {
6461         int ret;
6462
6463         /*
6464          * Device extent is a bit different from the other extents, because
6465          * the extents which belong to the different devices may have the
6466          * same start and size, so we need use the special extent cache
6467          * search/insert functions.
6468          */
6469         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6470         if (ret)
6471                 return ret;
6472
6473         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6474         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6475         return 0;
6476 }
6477
6478 static void free_device_extent_record(struct cache_extent *cache)
6479 {
6480         struct device_extent_record *rec;
6481
6482         rec = container_of(cache, struct device_extent_record, cache);
6483         if (!list_empty(&rec->chunk_list))
6484                 list_del_init(&rec->chunk_list);
6485         if (!list_empty(&rec->device_list))
6486                 list_del_init(&rec->device_list);
6487         free(rec);
6488 }
6489
6490 void free_device_extent_tree(struct device_extent_tree *tree)
6491 {
6492         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6493 }
6494
6495 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6496 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6497                                  struct extent_buffer *leaf, int slot)
6498 {
6499         struct btrfs_extent_ref_v0 *ref0;
6500         struct btrfs_key key;
6501         int ret;
6502
6503         btrfs_item_key_to_cpu(leaf, &key, slot);
6504         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6505         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6506                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6507                                 0, 0);
6508         } else {
6509                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6510                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6511         }
6512         return ret;
6513 }
6514 #endif
6515
6516 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6517                                             struct btrfs_key *key,
6518                                             int slot)
6519 {
6520         struct btrfs_chunk *ptr;
6521         struct chunk_record *rec;
6522         int num_stripes, i;
6523
6524         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6525         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6526
6527         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6528         if (!rec) {
6529                 fprintf(stderr, "memory allocation failed\n");
6530                 exit(-1);
6531         }
6532
6533         INIT_LIST_HEAD(&rec->list);
6534         INIT_LIST_HEAD(&rec->dextents);
6535         rec->bg_rec = NULL;
6536
6537         rec->cache.start = key->offset;
6538         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6539
6540         rec->generation = btrfs_header_generation(leaf);
6541
6542         rec->objectid = key->objectid;
6543         rec->type = key->type;
6544         rec->offset = key->offset;
6545
6546         rec->length = rec->cache.size;
6547         rec->owner = btrfs_chunk_owner(leaf, ptr);
6548         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6549         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6550         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6551         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6552         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6553         rec->num_stripes = num_stripes;
6554         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6555
6556         for (i = 0; i < rec->num_stripes; ++i) {
6557                 rec->stripes[i].devid =
6558                         btrfs_stripe_devid_nr(leaf, ptr, i);
6559                 rec->stripes[i].offset =
6560                         btrfs_stripe_offset_nr(leaf, ptr, i);
6561                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6562                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6563                                 BTRFS_UUID_SIZE);
6564         }
6565
6566         return rec;
6567 }
6568
6569 static int process_chunk_item(struct cache_tree *chunk_cache,
6570                               struct btrfs_key *key, struct extent_buffer *eb,
6571                               int slot)
6572 {
6573         struct chunk_record *rec;
6574         struct btrfs_chunk *chunk;
6575         int ret = 0;
6576
6577         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6578         /*
6579          * Do extra check for this chunk item,
6580          *
6581          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6582          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6583          * and owner<->key_type check.
6584          */
6585         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6586                                       key->offset);
6587         if (ret < 0) {
6588                 error("chunk(%llu, %llu) is not valid, ignore it",
6589                       key->offset, btrfs_chunk_length(eb, chunk));
6590                 return 0;
6591         }
6592         rec = btrfs_new_chunk_record(eb, key, slot);
6593         ret = insert_cache_extent(chunk_cache, &rec->cache);
6594         if (ret) {
6595                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6596                         rec->offset, rec->length);
6597                 free(rec);
6598         }
6599
6600         return ret;
6601 }
6602
6603 static int process_device_item(struct rb_root *dev_cache,
6604                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6605 {
6606         struct btrfs_dev_item *ptr;
6607         struct device_record *rec;
6608         int ret = 0;
6609
6610         ptr = btrfs_item_ptr(eb,
6611                 slot, struct btrfs_dev_item);
6612
6613         rec = malloc(sizeof(*rec));
6614         if (!rec) {
6615                 fprintf(stderr, "memory allocation failed\n");
6616                 return -ENOMEM;
6617         }
6618
6619         rec->devid = key->offset;
6620         rec->generation = btrfs_header_generation(eb);
6621
6622         rec->objectid = key->objectid;
6623         rec->type = key->type;
6624         rec->offset = key->offset;
6625
6626         rec->devid = btrfs_device_id(eb, ptr);
6627         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6628         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6629
6630         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6631         if (ret) {
6632                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6633                 free(rec);
6634         }
6635
6636         return ret;
6637 }
6638
6639 struct block_group_record *
6640 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6641                              int slot)
6642 {
6643         struct btrfs_block_group_item *ptr;
6644         struct block_group_record *rec;
6645
6646         rec = calloc(1, sizeof(*rec));
6647         if (!rec) {
6648                 fprintf(stderr, "memory allocation failed\n");
6649                 exit(-1);
6650         }
6651
6652         rec->cache.start = key->objectid;
6653         rec->cache.size = key->offset;
6654
6655         rec->generation = btrfs_header_generation(leaf);
6656
6657         rec->objectid = key->objectid;
6658         rec->type = key->type;
6659         rec->offset = key->offset;
6660
6661         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6662         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6663
6664         INIT_LIST_HEAD(&rec->list);
6665
6666         return rec;
6667 }
6668
6669 static int process_block_group_item(struct block_group_tree *block_group_cache,
6670                                     struct btrfs_key *key,
6671                                     struct extent_buffer *eb, int slot)
6672 {
6673         struct block_group_record *rec;
6674         int ret = 0;
6675
6676         rec = btrfs_new_block_group_record(eb, key, slot);
6677         ret = insert_block_group_record(block_group_cache, rec);
6678         if (ret) {
6679                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6680                         rec->objectid, rec->offset);
6681                 free(rec);
6682         }
6683
6684         return ret;
6685 }
6686
6687 struct device_extent_record *
6688 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6689                                struct btrfs_key *key, int slot)
6690 {
6691         struct device_extent_record *rec;
6692         struct btrfs_dev_extent *ptr;
6693
6694         rec = calloc(1, sizeof(*rec));
6695         if (!rec) {
6696                 fprintf(stderr, "memory allocation failed\n");
6697                 exit(-1);
6698         }
6699
6700         rec->cache.objectid = key->objectid;
6701         rec->cache.start = key->offset;
6702
6703         rec->generation = btrfs_header_generation(leaf);
6704
6705         rec->objectid = key->objectid;
6706         rec->type = key->type;
6707         rec->offset = key->offset;
6708
6709         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6710         rec->chunk_objecteid =
6711                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6712         rec->chunk_offset =
6713                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6714         rec->length = btrfs_dev_extent_length(leaf, ptr);
6715         rec->cache.size = rec->length;
6716
6717         INIT_LIST_HEAD(&rec->chunk_list);
6718         INIT_LIST_HEAD(&rec->device_list);
6719
6720         return rec;
6721 }
6722
6723 static int
6724 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6725                            struct btrfs_key *key, struct extent_buffer *eb,
6726                            int slot)
6727 {
6728         struct device_extent_record *rec;
6729         int ret;
6730
6731         rec = btrfs_new_device_extent_record(eb, key, slot);
6732         ret = insert_device_extent_record(dev_extent_cache, rec);
6733         if (ret) {
6734                 fprintf(stderr,
6735                         "Device extent[%llu, %llu, %llu] existed.\n",
6736                         rec->objectid, rec->offset, rec->length);
6737                 free(rec);
6738         }
6739
6740         return ret;
6741 }
6742
6743 static int process_extent_item(struct btrfs_root *root,
6744                                struct cache_tree *extent_cache,
6745                                struct extent_buffer *eb, int slot)
6746 {
6747         struct btrfs_extent_item *ei;
6748         struct btrfs_extent_inline_ref *iref;
6749         struct btrfs_extent_data_ref *dref;
6750         struct btrfs_shared_data_ref *sref;
6751         struct btrfs_key key;
6752         struct extent_record tmpl;
6753         unsigned long end;
6754         unsigned long ptr;
6755         int ret;
6756         int type;
6757         u32 item_size = btrfs_item_size_nr(eb, slot);
6758         u64 refs = 0;
6759         u64 offset;
6760         u64 num_bytes;
6761         int metadata = 0;
6762
6763         btrfs_item_key_to_cpu(eb, &key, slot);
6764
6765         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6766                 metadata = 1;
6767                 num_bytes = root->nodesize;
6768         } else {
6769                 num_bytes = key.offset;
6770         }
6771
6772         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6773                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6774                       key.objectid, root->sectorsize);
6775                 return -EIO;
6776         }
6777         if (item_size < sizeof(*ei)) {
6778 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6779                 struct btrfs_extent_item_v0 *ei0;
6780                 BUG_ON(item_size != sizeof(*ei0));
6781                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6782                 refs = btrfs_extent_refs_v0(eb, ei0);
6783 #else
6784                 BUG();
6785 #endif
6786                 memset(&tmpl, 0, sizeof(tmpl));
6787                 tmpl.start = key.objectid;
6788                 tmpl.nr = num_bytes;
6789                 tmpl.extent_item_refs = refs;
6790                 tmpl.metadata = metadata;
6791                 tmpl.found_rec = 1;
6792                 tmpl.max_size = num_bytes;
6793
6794                 return add_extent_rec(extent_cache, &tmpl);
6795         }
6796
6797         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6798         refs = btrfs_extent_refs(eb, ei);
6799         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6800                 metadata = 1;
6801         else
6802                 metadata = 0;
6803         if (metadata && num_bytes != root->nodesize) {
6804                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6805                       num_bytes, root->nodesize);
6806                 return -EIO;
6807         }
6808         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6809                 error("ignore invalid data extent, length %llu is not aligned to %u",
6810                       num_bytes, root->sectorsize);
6811                 return -EIO;
6812         }
6813
6814         memset(&tmpl, 0, sizeof(tmpl));
6815         tmpl.start = key.objectid;
6816         tmpl.nr = num_bytes;
6817         tmpl.extent_item_refs = refs;
6818         tmpl.metadata = metadata;
6819         tmpl.found_rec = 1;
6820         tmpl.max_size = num_bytes;
6821         add_extent_rec(extent_cache, &tmpl);
6822
6823         ptr = (unsigned long)(ei + 1);
6824         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6825             key.type == BTRFS_EXTENT_ITEM_KEY)
6826                 ptr += sizeof(struct btrfs_tree_block_info);
6827
6828         end = (unsigned long)ei + item_size;
6829         while (ptr < end) {
6830                 iref = (struct btrfs_extent_inline_ref *)ptr;
6831                 type = btrfs_extent_inline_ref_type(eb, iref);
6832                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6833                 switch (type) {
6834                 case BTRFS_TREE_BLOCK_REF_KEY:
6835                         ret = add_tree_backref(extent_cache, key.objectid,
6836                                         0, offset, 0);
6837                         if (ret < 0)
6838                                 error(
6839                         "add_tree_backref failed (extent items tree block): %s",
6840                                       strerror(-ret));
6841                         break;
6842                 case BTRFS_SHARED_BLOCK_REF_KEY:
6843                         ret = add_tree_backref(extent_cache, key.objectid,
6844                                         offset, 0, 0);
6845                         if (ret < 0)
6846                                 error(
6847                         "add_tree_backref failed (extent items shared block): %s",
6848                                       strerror(-ret));
6849                         break;
6850                 case BTRFS_EXTENT_DATA_REF_KEY:
6851                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6852                         add_data_backref(extent_cache, key.objectid, 0,
6853                                         btrfs_extent_data_ref_root(eb, dref),
6854                                         btrfs_extent_data_ref_objectid(eb,
6855                                                                        dref),
6856                                         btrfs_extent_data_ref_offset(eb, dref),
6857                                         btrfs_extent_data_ref_count(eb, dref),
6858                                         0, num_bytes);
6859                         break;
6860                 case BTRFS_SHARED_DATA_REF_KEY:
6861                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6862                         add_data_backref(extent_cache, key.objectid, offset,
6863                                         0, 0, 0,
6864                                         btrfs_shared_data_ref_count(eb, sref),
6865                                         0, num_bytes);
6866                         break;
6867                 default:
6868                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6869                                 key.objectid, key.type, num_bytes);
6870                         goto out;
6871                 }
6872                 ptr += btrfs_extent_inline_ref_size(type);
6873         }
6874         WARN_ON(ptr > end);
6875 out:
6876         return 0;
6877 }
6878
6879 static int check_cache_range(struct btrfs_root *root,
6880                              struct btrfs_block_group_cache *cache,
6881                              u64 offset, u64 bytes)
6882 {
6883         struct btrfs_free_space *entry;
6884         u64 *logical;
6885         u64 bytenr;
6886         int stripe_len;
6887         int i, nr, ret;
6888
6889         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6890                 bytenr = btrfs_sb_offset(i);
6891                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6892                                        cache->key.objectid, bytenr, 0,
6893                                        &logical, &nr, &stripe_len);
6894                 if (ret)
6895                         return ret;
6896
6897                 while (nr--) {
6898                         if (logical[nr] + stripe_len <= offset)
6899                                 continue;
6900                         if (offset + bytes <= logical[nr])
6901                                 continue;
6902                         if (logical[nr] == offset) {
6903                                 if (stripe_len >= bytes) {
6904                                         free(logical);
6905                                         return 0;
6906                                 }
6907                                 bytes -= stripe_len;
6908                                 offset += stripe_len;
6909                         } else if (logical[nr] < offset) {
6910                                 if (logical[nr] + stripe_len >=
6911                                     offset + bytes) {
6912                                         free(logical);
6913                                         return 0;
6914                                 }
6915                                 bytes = (offset + bytes) -
6916                                         (logical[nr] + stripe_len);
6917                                 offset = logical[nr] + stripe_len;
6918                         } else {
6919                                 /*
6920                                  * Could be tricky, the super may land in the
6921                                  * middle of the area we're checking.  First
6922                                  * check the easiest case, it's at the end.
6923                                  */
6924                                 if (logical[nr] + stripe_len >=
6925                                     bytes + offset) {
6926                                         bytes = logical[nr] - offset;
6927                                         continue;
6928                                 }
6929
6930                                 /* Check the left side */
6931                                 ret = check_cache_range(root, cache,
6932                                                         offset,
6933                                                         logical[nr] - offset);
6934                                 if (ret) {
6935                                         free(logical);
6936                                         return ret;
6937                                 }
6938
6939                                 /* Now we continue with the right side */
6940                                 bytes = (offset + bytes) -
6941                                         (logical[nr] + stripe_len);
6942                                 offset = logical[nr] + stripe_len;
6943                         }
6944                 }
6945
6946                 free(logical);
6947         }
6948
6949         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6950         if (!entry) {
6951                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6952                         offset, offset+bytes);
6953                 return -EINVAL;
6954         }
6955
6956         if (entry->offset != offset) {
6957                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6958                         entry->offset);
6959                 return -EINVAL;
6960         }
6961
6962         if (entry->bytes != bytes) {
6963                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6964                         bytes, entry->bytes, offset);
6965                 return -EINVAL;
6966         }
6967
6968         unlink_free_space(cache->free_space_ctl, entry);
6969         free(entry);
6970         return 0;
6971 }
6972
6973 static int verify_space_cache(struct btrfs_root *root,
6974                               struct btrfs_block_group_cache *cache)
6975 {
6976         struct btrfs_path path;
6977         struct extent_buffer *leaf;
6978         struct btrfs_key key;
6979         u64 last;
6980         int ret = 0;
6981
6982         root = root->fs_info->extent_root;
6983
6984         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6985
6986         btrfs_init_path(&path);
6987         key.objectid = last;
6988         key.offset = 0;
6989         key.type = BTRFS_EXTENT_ITEM_KEY;
6990         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6991         if (ret < 0)
6992                 goto out;
6993         ret = 0;
6994         while (1) {
6995                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6996                         ret = btrfs_next_leaf(root, &path);
6997                         if (ret < 0)
6998                                 goto out;
6999                         if (ret > 0) {
7000                                 ret = 0;
7001                                 break;
7002                         }
7003                 }
7004                 leaf = path.nodes[0];
7005                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7006                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7007                         break;
7008                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7009                     key.type != BTRFS_METADATA_ITEM_KEY) {
7010                         path.slots[0]++;
7011                         continue;
7012                 }
7013
7014                 if (last == key.objectid) {
7015                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7016                                 last = key.objectid + key.offset;
7017                         else
7018                                 last = key.objectid + root->nodesize;
7019                         path.slots[0]++;
7020                         continue;
7021                 }
7022
7023                 ret = check_cache_range(root, cache, last,
7024                                         key.objectid - last);
7025                 if (ret)
7026                         break;
7027                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7028                         last = key.objectid + key.offset;
7029                 else
7030                         last = key.objectid + root->nodesize;
7031                 path.slots[0]++;
7032         }
7033
7034         if (last < cache->key.objectid + cache->key.offset)
7035                 ret = check_cache_range(root, cache, last,
7036                                         cache->key.objectid +
7037                                         cache->key.offset - last);
7038
7039 out:
7040         btrfs_release_path(&path);
7041
7042         if (!ret &&
7043             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7044                 fprintf(stderr, "There are still entries left in the space "
7045                         "cache\n");
7046                 ret = -EINVAL;
7047         }
7048
7049         return ret;
7050 }
7051
7052 static int check_space_cache(struct btrfs_root *root)
7053 {
7054         struct btrfs_block_group_cache *cache;
7055         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7056         int ret;
7057         int error = 0;
7058
7059         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7060             btrfs_super_generation(root->fs_info->super_copy) !=
7061             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7062                 printf("cache and super generation don't match, space cache "
7063                        "will be invalidated\n");
7064                 return 0;
7065         }
7066
7067         if (ctx.progress_enabled) {
7068                 ctx.tp = TASK_FREE_SPACE;
7069                 task_start(ctx.info);
7070         }
7071
7072         while (1) {
7073                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7074                 if (!cache)
7075                         break;
7076
7077                 start = cache->key.objectid + cache->key.offset;
7078                 if (!cache->free_space_ctl) {
7079                         if (btrfs_init_free_space_ctl(cache,
7080                                                       root->sectorsize)) {
7081                                 ret = -ENOMEM;
7082                                 break;
7083                         }
7084                 } else {
7085                         btrfs_remove_free_space_cache(cache);
7086                 }
7087
7088                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7089                         ret = exclude_super_stripes(root, cache);
7090                         if (ret) {
7091                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7092                                         strerror(-ret));
7093                                 error++;
7094                                 continue;
7095                         }
7096                         ret = load_free_space_tree(root->fs_info, cache);
7097                         free_excluded_extents(root, cache);
7098                         if (ret < 0) {
7099                                 fprintf(stderr, "could not load free space tree: %s\n",
7100                                         strerror(-ret));
7101                                 error++;
7102                                 continue;
7103                         }
7104                         error += ret;
7105                 } else {
7106                         ret = load_free_space_cache(root->fs_info, cache);
7107                         if (!ret)
7108                                 continue;
7109                 }
7110
7111                 ret = verify_space_cache(root, cache);
7112                 if (ret) {
7113                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7114                                 cache->key.objectid);
7115                         error++;
7116                 }
7117         }
7118
7119         task_stop(ctx.info);
7120
7121         return error ? -EINVAL : 0;
7122 }
7123
7124 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7125                         u64 num_bytes, unsigned long leaf_offset,
7126                         struct extent_buffer *eb) {
7127
7128         u64 offset = 0;
7129         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7130         char *data;
7131         unsigned long csum_offset;
7132         u32 csum;
7133         u32 csum_expected;
7134         u64 read_len;
7135         u64 data_checked = 0;
7136         u64 tmp;
7137         int ret = 0;
7138         int mirror;
7139         int num_copies;
7140
7141         if (num_bytes % root->sectorsize)
7142                 return -EINVAL;
7143
7144         data = malloc(num_bytes);
7145         if (!data)
7146                 return -ENOMEM;
7147
7148         while (offset < num_bytes) {
7149                 mirror = 0;
7150 again:
7151                 read_len = num_bytes - offset;
7152                 /* read as much space once a time */
7153                 ret = read_extent_data(root, data + offset,
7154                                 bytenr + offset, &read_len, mirror);
7155                 if (ret)
7156                         goto out;
7157                 data_checked = 0;
7158                 /* verify every 4k data's checksum */
7159                 while (data_checked < read_len) {
7160                         csum = ~(u32)0;
7161                         tmp = offset + data_checked;
7162
7163                         csum = btrfs_csum_data((char *)data + tmp,
7164                                                csum, root->sectorsize);
7165                         btrfs_csum_final(csum, (u8 *)&csum);
7166
7167                         csum_offset = leaf_offset +
7168                                  tmp / root->sectorsize * csum_size;
7169                         read_extent_buffer(eb, (char *)&csum_expected,
7170                                            csum_offset, csum_size);
7171                         /* try another mirror */
7172                         if (csum != csum_expected) {
7173                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7174                                                 mirror, bytenr + tmp,
7175                                                 csum, csum_expected);
7176                                 num_copies = btrfs_num_copies(
7177                                                 &root->fs_info->mapping_tree,
7178                                                 bytenr, num_bytes);
7179                                 if (mirror < num_copies - 1) {
7180                                         mirror += 1;
7181                                         goto again;
7182                                 }
7183                         }
7184                         data_checked += root->sectorsize;
7185                 }
7186                 offset += read_len;
7187         }
7188 out:
7189         free(data);
7190         return ret;
7191 }
7192
7193 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7194                                u64 num_bytes)
7195 {
7196         struct btrfs_path path;
7197         struct extent_buffer *leaf;
7198         struct btrfs_key key;
7199         int ret;
7200
7201         btrfs_init_path(&path);
7202         key.objectid = bytenr;
7203         key.type = BTRFS_EXTENT_ITEM_KEY;
7204         key.offset = (u64)-1;
7205
7206 again:
7207         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7208                                 0, 0);
7209         if (ret < 0) {
7210                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7211                 btrfs_release_path(&path);
7212                 return ret;
7213         } else if (ret) {
7214                 if (path.slots[0] > 0) {
7215                         path.slots[0]--;
7216                 } else {
7217                         ret = btrfs_prev_leaf(root, &path);
7218                         if (ret < 0) {
7219                                 goto out;
7220                         } else if (ret > 0) {
7221                                 ret = 0;
7222                                 goto out;
7223                         }
7224                 }
7225         }
7226
7227         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7228
7229         /*
7230          * Block group items come before extent items if they have the same
7231          * bytenr, so walk back one more just in case.  Dear future traveller,
7232          * first congrats on mastering time travel.  Now if it's not too much
7233          * trouble could you go back to 2006 and tell Chris to make the
7234          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7235          * EXTENT_ITEM_KEY please?
7236          */
7237         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7238                 if (path.slots[0] > 0) {
7239                         path.slots[0]--;
7240                 } else {
7241                         ret = btrfs_prev_leaf(root, &path);
7242                         if (ret < 0) {
7243                                 goto out;
7244                         } else if (ret > 0) {
7245                                 ret = 0;
7246                                 goto out;
7247                         }
7248                 }
7249                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7250         }
7251
7252         while (num_bytes) {
7253                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7254                         ret = btrfs_next_leaf(root, &path);
7255                         if (ret < 0) {
7256                                 fprintf(stderr, "Error going to next leaf "
7257                                         "%d\n", ret);
7258                                 btrfs_release_path(&path);
7259                                 return ret;
7260                         } else if (ret) {
7261                                 break;
7262                         }
7263                 }
7264                 leaf = path.nodes[0];
7265                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7266                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7267                         path.slots[0]++;
7268                         continue;
7269                 }
7270                 if (key.objectid + key.offset < bytenr) {
7271                         path.slots[0]++;
7272                         continue;
7273                 }
7274                 if (key.objectid > bytenr + num_bytes)
7275                         break;
7276
7277                 if (key.objectid == bytenr) {
7278                         if (key.offset >= num_bytes) {
7279                                 num_bytes = 0;
7280                                 break;
7281                         }
7282                         num_bytes -= key.offset;
7283                         bytenr += key.offset;
7284                 } else if (key.objectid < bytenr) {
7285                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7286                                 num_bytes = 0;
7287                                 break;
7288                         }
7289                         num_bytes = (bytenr + num_bytes) -
7290                                 (key.objectid + key.offset);
7291                         bytenr = key.objectid + key.offset;
7292                 } else {
7293                         if (key.objectid + key.offset < bytenr + num_bytes) {
7294                                 u64 new_start = key.objectid + key.offset;
7295                                 u64 new_bytes = bytenr + num_bytes - new_start;
7296
7297                                 /*
7298                                  * Weird case, the extent is in the middle of
7299                                  * our range, we'll have to search one side
7300                                  * and then the other.  Not sure if this happens
7301                                  * in real life, but no harm in coding it up
7302                                  * anyway just in case.
7303                                  */
7304                                 btrfs_release_path(&path);
7305                                 ret = check_extent_exists(root, new_start,
7306                                                           new_bytes);
7307                                 if (ret) {
7308                                         fprintf(stderr, "Right section didn't "
7309                                                 "have a record\n");
7310                                         break;
7311                                 }
7312                                 num_bytes = key.objectid - bytenr;
7313                                 goto again;
7314                         }
7315                         num_bytes = key.objectid - bytenr;
7316                 }
7317                 path.slots[0]++;
7318         }
7319         ret = 0;
7320
7321 out:
7322         if (num_bytes && !ret) {
7323                 fprintf(stderr, "There are no extents for csum range "
7324                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7325                 ret = 1;
7326         }
7327
7328         btrfs_release_path(&path);
7329         return ret;
7330 }
7331
7332 static int check_csums(struct btrfs_root *root)
7333 {
7334         struct btrfs_path path;
7335         struct extent_buffer *leaf;
7336         struct btrfs_key key;
7337         u64 offset = 0, num_bytes = 0;
7338         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7339         int errors = 0;
7340         int ret;
7341         u64 data_len;
7342         unsigned long leaf_offset;
7343
7344         root = root->fs_info->csum_root;
7345         if (!extent_buffer_uptodate(root->node)) {
7346                 fprintf(stderr, "No valid csum tree found\n");
7347                 return -ENOENT;
7348         }
7349
7350         btrfs_init_path(&path);
7351         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7352         key.type = BTRFS_EXTENT_CSUM_KEY;
7353         key.offset = 0;
7354         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7355         if (ret < 0) {
7356                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7357                 btrfs_release_path(&path);
7358                 return ret;
7359         }
7360
7361         if (ret > 0 && path.slots[0])
7362                 path.slots[0]--;
7363         ret = 0;
7364
7365         while (1) {
7366                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7367                         ret = btrfs_next_leaf(root, &path);
7368                         if (ret < 0) {
7369                                 fprintf(stderr, "Error going to next leaf "
7370                                         "%d\n", ret);
7371                                 break;
7372                         }
7373                         if (ret)
7374                                 break;
7375                 }
7376                 leaf = path.nodes[0];
7377
7378                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7379                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7380                         path.slots[0]++;
7381                         continue;
7382                 }
7383
7384                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7385                               csum_size) * root->sectorsize;
7386                 if (!check_data_csum)
7387                         goto skip_csum_check;
7388                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7389                 ret = check_extent_csums(root, key.offset, data_len,
7390                                          leaf_offset, leaf);
7391                 if (ret)
7392                         break;
7393 skip_csum_check:
7394                 if (!num_bytes) {
7395                         offset = key.offset;
7396                 } else if (key.offset != offset + num_bytes) {
7397                         ret = check_extent_exists(root, offset, num_bytes);
7398                         if (ret) {
7399                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7400                                         "there is no extent record\n",
7401                                         offset, offset+num_bytes);
7402                                 errors++;
7403                         }
7404                         offset = key.offset;
7405                         num_bytes = 0;
7406                 }
7407                 num_bytes += data_len;
7408                 path.slots[0]++;
7409         }
7410
7411         btrfs_release_path(&path);
7412         return errors;
7413 }
7414
7415 static int is_dropped_key(struct btrfs_key *key,
7416                           struct btrfs_key *drop_key) {
7417         if (key->objectid < drop_key->objectid)
7418                 return 1;
7419         else if (key->objectid == drop_key->objectid) {
7420                 if (key->type < drop_key->type)
7421                         return 1;
7422                 else if (key->type == drop_key->type) {
7423                         if (key->offset < drop_key->offset)
7424                                 return 1;
7425                 }
7426         }
7427         return 0;
7428 }
7429
7430 /*
7431  * Here are the rules for FULL_BACKREF.
7432  *
7433  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7434  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7435  *      FULL_BACKREF set.
7436  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7437  *    if it happened after the relocation occurred since we'll have dropped the
7438  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7439  *    have no real way to know for sure.
7440  *
7441  * We process the blocks one root at a time, and we start from the lowest root
7442  * objectid and go to the highest.  So we can just lookup the owner backref for
7443  * the record and if we don't find it then we know it doesn't exist and we have
7444  * a FULL BACKREF.
7445  *
7446  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7447  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7448  * be set or not and then we can check later once we've gathered all the refs.
7449  */
7450 static int calc_extent_flag(struct cache_tree *extent_cache,
7451                            struct extent_buffer *buf,
7452                            struct root_item_record *ri,
7453                            u64 *flags)
7454 {
7455         struct extent_record *rec;
7456         struct cache_extent *cache;
7457         struct tree_backref *tback;
7458         u64 owner = 0;
7459
7460         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7461         /* we have added this extent before */
7462         if (!cache)
7463                 return -ENOENT;
7464
7465         rec = container_of(cache, struct extent_record, cache);
7466
7467         /*
7468          * Except file/reloc tree, we can not have
7469          * FULL BACKREF MODE
7470          */
7471         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7472                 goto normal;
7473         /*
7474          * root node
7475          */
7476         if (buf->start == ri->bytenr)
7477                 goto normal;
7478
7479         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7480                 goto full_backref;
7481
7482         owner = btrfs_header_owner(buf);
7483         if (owner == ri->objectid)
7484                 goto normal;
7485
7486         tback = find_tree_backref(rec, 0, owner);
7487         if (!tback)
7488                 goto full_backref;
7489 normal:
7490         *flags = 0;
7491         if (rec->flag_block_full_backref != FLAG_UNSET &&
7492             rec->flag_block_full_backref != 0)
7493                 rec->bad_full_backref = 1;
7494         return 0;
7495 full_backref:
7496         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7497         if (rec->flag_block_full_backref != FLAG_UNSET &&
7498             rec->flag_block_full_backref != 1)
7499                 rec->bad_full_backref = 1;
7500         return 0;
7501 }
7502
7503 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7504 {
7505         fprintf(stderr, "Invalid key type(");
7506         print_key_type(stderr, 0, key_type);
7507         fprintf(stderr, ") found in root(");
7508         print_objectid(stderr, rootid, 0);
7509         fprintf(stderr, ")\n");
7510 }
7511
7512 /*
7513  * Check if the key is valid with its extent buffer.
7514  *
7515  * This is a early check in case invalid key exists in a extent buffer
7516  * This is not comprehensive yet, but should prevent wrong key/item passed
7517  * further
7518  */
7519 static int check_type_with_root(u64 rootid, u8 key_type)
7520 {
7521         switch (key_type) {
7522         /* Only valid in chunk tree */
7523         case BTRFS_DEV_ITEM_KEY:
7524         case BTRFS_CHUNK_ITEM_KEY:
7525                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7526                         goto err;
7527                 break;
7528         /* valid in csum and log tree */
7529         case BTRFS_CSUM_TREE_OBJECTID:
7530                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7531                       is_fstree(rootid)))
7532                         goto err;
7533                 break;
7534         case BTRFS_EXTENT_ITEM_KEY:
7535         case BTRFS_METADATA_ITEM_KEY:
7536         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7537                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7538                         goto err;
7539                 break;
7540         case BTRFS_ROOT_ITEM_KEY:
7541                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7542                         goto err;
7543                 break;
7544         case BTRFS_DEV_EXTENT_KEY:
7545                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7546                         goto err;
7547                 break;
7548         }
7549         return 0;
7550 err:
7551         report_mismatch_key_root(key_type, rootid);
7552         return -EINVAL;
7553 }
7554
7555 static int run_next_block(struct btrfs_root *root,
7556                           struct block_info *bits,
7557                           int bits_nr,
7558                           u64 *last,
7559                           struct cache_tree *pending,
7560                           struct cache_tree *seen,
7561                           struct cache_tree *reada,
7562                           struct cache_tree *nodes,
7563                           struct cache_tree *extent_cache,
7564                           struct cache_tree *chunk_cache,
7565                           struct rb_root *dev_cache,
7566                           struct block_group_tree *block_group_cache,
7567                           struct device_extent_tree *dev_extent_cache,
7568                           struct root_item_record *ri)
7569 {
7570         struct extent_buffer *buf;
7571         struct extent_record *rec = NULL;
7572         u64 bytenr;
7573         u32 size;
7574         u64 parent;
7575         u64 owner;
7576         u64 flags;
7577         u64 ptr;
7578         u64 gen = 0;
7579         int ret = 0;
7580         int i;
7581         int nritems;
7582         struct btrfs_key key;
7583         struct cache_extent *cache;
7584         int reada_bits;
7585
7586         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7587                                     bits_nr, &reada_bits);
7588         if (nritems == 0)
7589                 return 1;
7590
7591         if (!reada_bits) {
7592                 for(i = 0; i < nritems; i++) {
7593                         ret = add_cache_extent(reada, bits[i].start,
7594                                                bits[i].size);
7595                         if (ret == -EEXIST)
7596                                 continue;
7597
7598                         /* fixme, get the parent transid */
7599                         readahead_tree_block(root, bits[i].start,
7600                                              bits[i].size, 0);
7601                 }
7602         }
7603         *last = bits[0].start;
7604         bytenr = bits[0].start;
7605         size = bits[0].size;
7606
7607         cache = lookup_cache_extent(pending, bytenr, size);
7608         if (cache) {
7609                 remove_cache_extent(pending, cache);
7610                 free(cache);
7611         }
7612         cache = lookup_cache_extent(reada, bytenr, size);
7613         if (cache) {
7614                 remove_cache_extent(reada, cache);
7615                 free(cache);
7616         }
7617         cache = lookup_cache_extent(nodes, bytenr, size);
7618         if (cache) {
7619                 remove_cache_extent(nodes, cache);
7620                 free(cache);
7621         }
7622         cache = lookup_cache_extent(extent_cache, bytenr, size);
7623         if (cache) {
7624                 rec = container_of(cache, struct extent_record, cache);
7625                 gen = rec->parent_generation;
7626         }
7627
7628         /* fixme, get the real parent transid */
7629         buf = read_tree_block(root, bytenr, size, gen);
7630         if (!extent_buffer_uptodate(buf)) {
7631                 record_bad_block_io(root->fs_info,
7632                                     extent_cache, bytenr, size);
7633                 goto out;
7634         }
7635
7636         nritems = btrfs_header_nritems(buf);
7637
7638         flags = 0;
7639         if (!init_extent_tree) {
7640                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7641                                        btrfs_header_level(buf), 1, NULL,
7642                                        &flags);
7643                 if (ret < 0) {
7644                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7645                         if (ret < 0) {
7646                                 fprintf(stderr, "Couldn't calc extent flags\n");
7647                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7648                         }
7649                 }
7650         } else {
7651                 flags = 0;
7652                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7653                 if (ret < 0) {
7654                         fprintf(stderr, "Couldn't calc extent flags\n");
7655                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7656                 }
7657         }
7658
7659         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7660                 if (ri != NULL &&
7661                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7662                     ri->objectid == btrfs_header_owner(buf)) {
7663                         /*
7664                          * Ok we got to this block from it's original owner and
7665                          * we have FULL_BACKREF set.  Relocation can leave
7666                          * converted blocks over so this is altogether possible,
7667                          * however it's not possible if the generation > the
7668                          * last snapshot, so check for this case.
7669                          */
7670                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7671                             btrfs_header_generation(buf) > ri->last_snapshot) {
7672                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7673                                 rec->bad_full_backref = 1;
7674                         }
7675                 }
7676         } else {
7677                 if (ri != NULL &&
7678                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7679                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7680                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7681                         rec->bad_full_backref = 1;
7682                 }
7683         }
7684
7685         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7686                 rec->flag_block_full_backref = 1;
7687                 parent = bytenr;
7688                 owner = 0;
7689         } else {
7690                 rec->flag_block_full_backref = 0;
7691                 parent = 0;
7692                 owner = btrfs_header_owner(buf);
7693         }
7694
7695         ret = check_block(root, extent_cache, buf, flags);
7696         if (ret)
7697                 goto out;
7698
7699         if (btrfs_is_leaf(buf)) {
7700                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7701                 for (i = 0; i < nritems; i++) {
7702                         struct btrfs_file_extent_item *fi;
7703                         btrfs_item_key_to_cpu(buf, &key, i);
7704                         /*
7705                          * Check key type against the leaf owner.
7706                          * Could filter quite a lot of early error if
7707                          * owner is correct
7708                          */
7709                         if (check_type_with_root(btrfs_header_owner(buf),
7710                                                  key.type)) {
7711                                 fprintf(stderr, "ignoring invalid key\n");
7712                                 continue;
7713                         }
7714                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7715                                 process_extent_item(root, extent_cache, buf,
7716                                                     i);
7717                                 continue;
7718                         }
7719                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7720                                 process_extent_item(root, extent_cache, buf,
7721                                                     i);
7722                                 continue;
7723                         }
7724                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7725                                 total_csum_bytes +=
7726                                         btrfs_item_size_nr(buf, i);
7727                                 continue;
7728                         }
7729                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7730                                 process_chunk_item(chunk_cache, &key, buf, i);
7731                                 continue;
7732                         }
7733                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7734                                 process_device_item(dev_cache, &key, buf, i);
7735                                 continue;
7736                         }
7737                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7738                                 process_block_group_item(block_group_cache,
7739                                         &key, buf, i);
7740                                 continue;
7741                         }
7742                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7743                                 process_device_extent_item(dev_extent_cache,
7744                                         &key, buf, i);
7745                                 continue;
7746
7747                         }
7748                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7749 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7750                                 process_extent_ref_v0(extent_cache, buf, i);
7751 #else
7752                                 BUG();
7753 #endif
7754                                 continue;
7755                         }
7756
7757                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7758                                 ret = add_tree_backref(extent_cache,
7759                                                 key.objectid, 0, key.offset, 0);
7760                                 if (ret < 0)
7761                                         error(
7762                                 "add_tree_backref failed (leaf tree block): %s",
7763                                               strerror(-ret));
7764                                 continue;
7765                         }
7766                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7767                                 ret = add_tree_backref(extent_cache,
7768                                                 key.objectid, key.offset, 0, 0);
7769                                 if (ret < 0)
7770                                         error(
7771                                 "add_tree_backref failed (leaf shared block): %s",
7772                                               strerror(-ret));
7773                                 continue;
7774                         }
7775                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7776                                 struct btrfs_extent_data_ref *ref;
7777                                 ref = btrfs_item_ptr(buf, i,
7778                                                 struct btrfs_extent_data_ref);
7779                                 add_data_backref(extent_cache,
7780                                         key.objectid, 0,
7781                                         btrfs_extent_data_ref_root(buf, ref),
7782                                         btrfs_extent_data_ref_objectid(buf,
7783                                                                        ref),
7784                                         btrfs_extent_data_ref_offset(buf, ref),
7785                                         btrfs_extent_data_ref_count(buf, ref),
7786                                         0, root->sectorsize);
7787                                 continue;
7788                         }
7789                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7790                                 struct btrfs_shared_data_ref *ref;
7791                                 ref = btrfs_item_ptr(buf, i,
7792                                                 struct btrfs_shared_data_ref);
7793                                 add_data_backref(extent_cache,
7794                                         key.objectid, key.offset, 0, 0, 0,
7795                                         btrfs_shared_data_ref_count(buf, ref),
7796                                         0, root->sectorsize);
7797                                 continue;
7798                         }
7799                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7800                                 struct bad_item *bad;
7801
7802                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7803                                         continue;
7804                                 if (!owner)
7805                                         continue;
7806                                 bad = malloc(sizeof(struct bad_item));
7807                                 if (!bad)
7808                                         continue;
7809                                 INIT_LIST_HEAD(&bad->list);
7810                                 memcpy(&bad->key, &key,
7811                                        sizeof(struct btrfs_key));
7812                                 bad->root_id = owner;
7813                                 list_add_tail(&bad->list, &delete_items);
7814                                 continue;
7815                         }
7816                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7817                                 continue;
7818                         fi = btrfs_item_ptr(buf, i,
7819                                             struct btrfs_file_extent_item);
7820                         if (btrfs_file_extent_type(buf, fi) ==
7821                             BTRFS_FILE_EXTENT_INLINE)
7822                                 continue;
7823                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7824                                 continue;
7825
7826                         data_bytes_allocated +=
7827                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7828                         if (data_bytes_allocated < root->sectorsize) {
7829                                 abort();
7830                         }
7831                         data_bytes_referenced +=
7832                                 btrfs_file_extent_num_bytes(buf, fi);
7833                         add_data_backref(extent_cache,
7834                                 btrfs_file_extent_disk_bytenr(buf, fi),
7835                                 parent, owner, key.objectid, key.offset -
7836                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7837                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7838                 }
7839         } else {
7840                 int level;
7841                 struct btrfs_key first_key;
7842
7843                 first_key.objectid = 0;
7844
7845                 if (nritems > 0)
7846                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7847                 level = btrfs_header_level(buf);
7848                 for (i = 0; i < nritems; i++) {
7849                         struct extent_record tmpl;
7850
7851                         ptr = btrfs_node_blockptr(buf, i);
7852                         size = root->nodesize;
7853                         btrfs_node_key_to_cpu(buf, &key, i);
7854                         if (ri != NULL) {
7855                                 if ((level == ri->drop_level)
7856                                     && is_dropped_key(&key, &ri->drop_key)) {
7857                                         continue;
7858                                 }
7859                         }
7860
7861                         memset(&tmpl, 0, sizeof(tmpl));
7862                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7863                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7864                         tmpl.start = ptr;
7865                         tmpl.nr = size;
7866                         tmpl.refs = 1;
7867                         tmpl.metadata = 1;
7868                         tmpl.max_size = size;
7869                         ret = add_extent_rec(extent_cache, &tmpl);
7870                         if (ret < 0)
7871                                 goto out;
7872
7873                         ret = add_tree_backref(extent_cache, ptr, parent,
7874                                         owner, 1);
7875                         if (ret < 0) {
7876                                 error(
7877                                 "add_tree_backref failed (non-leaf block): %s",
7878                                       strerror(-ret));
7879                                 continue;
7880                         }
7881
7882                         if (level > 1) {
7883                                 add_pending(nodes, seen, ptr, size);
7884                         } else {
7885                                 add_pending(pending, seen, ptr, size);
7886                         }
7887                 }
7888                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7889                                       nritems) * sizeof(struct btrfs_key_ptr);
7890         }
7891         total_btree_bytes += buf->len;
7892         if (fs_root_objectid(btrfs_header_owner(buf)))
7893                 total_fs_tree_bytes += buf->len;
7894         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7895                 total_extent_tree_bytes += buf->len;
7896         if (!found_old_backref &&
7897             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7898             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7899             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7900                 found_old_backref = 1;
7901 out:
7902         free_extent_buffer(buf);
7903         return ret;
7904 }
7905
7906 static int add_root_to_pending(struct extent_buffer *buf,
7907                                struct cache_tree *extent_cache,
7908                                struct cache_tree *pending,
7909                                struct cache_tree *seen,
7910                                struct cache_tree *nodes,
7911                                u64 objectid)
7912 {
7913         struct extent_record tmpl;
7914         int ret;
7915
7916         if (btrfs_header_level(buf) > 0)
7917                 add_pending(nodes, seen, buf->start, buf->len);
7918         else
7919                 add_pending(pending, seen, buf->start, buf->len);
7920
7921         memset(&tmpl, 0, sizeof(tmpl));
7922         tmpl.start = buf->start;
7923         tmpl.nr = buf->len;
7924         tmpl.is_root = 1;
7925         tmpl.refs = 1;
7926         tmpl.metadata = 1;
7927         tmpl.max_size = buf->len;
7928         add_extent_rec(extent_cache, &tmpl);
7929
7930         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7931             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7932                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7933                                 0, 1);
7934         else
7935                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7936                                 1);
7937         return ret;
7938 }
7939
7940 /* as we fix the tree, we might be deleting blocks that
7941  * we're tracking for repair.  This hook makes sure we
7942  * remove any backrefs for blocks as we are fixing them.
7943  */
7944 static int free_extent_hook(struct btrfs_trans_handle *trans,
7945                             struct btrfs_root *root,
7946                             u64 bytenr, u64 num_bytes, u64 parent,
7947                             u64 root_objectid, u64 owner, u64 offset,
7948                             int refs_to_drop)
7949 {
7950         struct extent_record *rec;
7951         struct cache_extent *cache;
7952         int is_data;
7953         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7954
7955         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7956         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7957         if (!cache)
7958                 return 0;
7959
7960         rec = container_of(cache, struct extent_record, cache);
7961         if (is_data) {
7962                 struct data_backref *back;
7963                 back = find_data_backref(rec, parent, root_objectid, owner,
7964                                          offset, 1, bytenr, num_bytes);
7965                 if (!back)
7966                         goto out;
7967                 if (back->node.found_ref) {
7968                         back->found_ref -= refs_to_drop;
7969                         if (rec->refs)
7970                                 rec->refs -= refs_to_drop;
7971                 }
7972                 if (back->node.found_extent_tree) {
7973                         back->num_refs -= refs_to_drop;
7974                         if (rec->extent_item_refs)
7975                                 rec->extent_item_refs -= refs_to_drop;
7976                 }
7977                 if (back->found_ref == 0)
7978                         back->node.found_ref = 0;
7979                 if (back->num_refs == 0)
7980                         back->node.found_extent_tree = 0;
7981
7982                 if (!back->node.found_extent_tree && back->node.found_ref) {
7983                         list_del(&back->node.list);
7984                         free(back);
7985                 }
7986         } else {
7987                 struct tree_backref *back;
7988                 back = find_tree_backref(rec, parent, root_objectid);
7989                 if (!back)
7990                         goto out;
7991                 if (back->node.found_ref) {
7992                         if (rec->refs)
7993                                 rec->refs--;
7994                         back->node.found_ref = 0;
7995                 }
7996                 if (back->node.found_extent_tree) {
7997                         if (rec->extent_item_refs)
7998                                 rec->extent_item_refs--;
7999                         back->node.found_extent_tree = 0;
8000                 }
8001                 if (!back->node.found_extent_tree && back->node.found_ref) {
8002                         list_del(&back->node.list);
8003                         free(back);
8004                 }
8005         }
8006         maybe_free_extent_rec(extent_cache, rec);
8007 out:
8008         return 0;
8009 }
8010
8011 static int delete_extent_records(struct btrfs_trans_handle *trans,
8012                                  struct btrfs_root *root,
8013                                  struct btrfs_path *path,
8014                                  u64 bytenr)
8015 {
8016         struct btrfs_key key;
8017         struct btrfs_key found_key;
8018         struct extent_buffer *leaf;
8019         int ret;
8020         int slot;
8021
8022
8023         key.objectid = bytenr;
8024         key.type = (u8)-1;
8025         key.offset = (u64)-1;
8026
8027         while(1) {
8028                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8029                                         &key, path, 0, 1);
8030                 if (ret < 0)
8031                         break;
8032
8033                 if (ret > 0) {
8034                         ret = 0;
8035                         if (path->slots[0] == 0)
8036                                 break;
8037                         path->slots[0]--;
8038                 }
8039                 ret = 0;
8040
8041                 leaf = path->nodes[0];
8042                 slot = path->slots[0];
8043
8044                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8045                 if (found_key.objectid != bytenr)
8046                         break;
8047
8048                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8049                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8050                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8051                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8052                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8053                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8054                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8055                         btrfs_release_path(path);
8056                         if (found_key.type == 0) {
8057                                 if (found_key.offset == 0)
8058                                         break;
8059                                 key.offset = found_key.offset - 1;
8060                                 key.type = found_key.type;
8061                         }
8062                         key.type = found_key.type - 1;
8063                         key.offset = (u64)-1;
8064                         continue;
8065                 }
8066
8067                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8068                         found_key.objectid, found_key.type, found_key.offset);
8069
8070                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8071                 if (ret)
8072                         break;
8073                 btrfs_release_path(path);
8074
8075                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8076                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8077                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8078                                 found_key.offset : root->nodesize;
8079
8080                         ret = btrfs_update_block_group(trans, root, bytenr,
8081                                                        bytes, 0, 0);
8082                         if (ret)
8083                                 break;
8084                 }
8085         }
8086
8087         btrfs_release_path(path);
8088         return ret;
8089 }
8090
8091 /*
8092  * for a single backref, this will allocate a new extent
8093  * and add the backref to it.
8094  */
8095 static int record_extent(struct btrfs_trans_handle *trans,
8096                          struct btrfs_fs_info *info,
8097                          struct btrfs_path *path,
8098                          struct extent_record *rec,
8099                          struct extent_backref *back,
8100                          int allocated, u64 flags)
8101 {
8102         int ret = 0;
8103         struct btrfs_root *extent_root = info->extent_root;
8104         struct extent_buffer *leaf;
8105         struct btrfs_key ins_key;
8106         struct btrfs_extent_item *ei;
8107         struct data_backref *dback;
8108         struct btrfs_tree_block_info *bi;
8109
8110         if (!back->is_data)
8111                 rec->max_size = max_t(u64, rec->max_size,
8112                                     info->extent_root->nodesize);
8113
8114         if (!allocated) {
8115                 u32 item_size = sizeof(*ei);
8116
8117                 if (!back->is_data)
8118                         item_size += sizeof(*bi);
8119
8120                 ins_key.objectid = rec->start;
8121                 ins_key.offset = rec->max_size;
8122                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8123
8124                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8125                                         &ins_key, item_size);
8126                 if (ret)
8127                         goto fail;
8128
8129                 leaf = path->nodes[0];
8130                 ei = btrfs_item_ptr(leaf, path->slots[0],
8131                                     struct btrfs_extent_item);
8132
8133                 btrfs_set_extent_refs(leaf, ei, 0);
8134                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8135
8136                 if (back->is_data) {
8137                         btrfs_set_extent_flags(leaf, ei,
8138                                                BTRFS_EXTENT_FLAG_DATA);
8139                 } else {
8140                         struct btrfs_disk_key copy_key;;
8141
8142                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8143                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8144                                              sizeof(*bi));
8145
8146                         btrfs_set_disk_key_objectid(&copy_key,
8147                                                     rec->info_objectid);
8148                         btrfs_set_disk_key_type(&copy_key, 0);
8149                         btrfs_set_disk_key_offset(&copy_key, 0);
8150
8151                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8152                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8153
8154                         btrfs_set_extent_flags(leaf, ei,
8155                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8156                 }
8157
8158                 btrfs_mark_buffer_dirty(leaf);
8159                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8160                                                rec->max_size, 1, 0);
8161                 if (ret)
8162                         goto fail;
8163                 btrfs_release_path(path);
8164         }
8165
8166         if (back->is_data) {
8167                 u64 parent;
8168                 int i;
8169
8170                 dback = to_data_backref(back);
8171                 if (back->full_backref)
8172                         parent = dback->parent;
8173                 else
8174                         parent = 0;
8175
8176                 for (i = 0; i < dback->found_ref; i++) {
8177                         /* if parent != 0, we're doing a full backref
8178                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8179                          * just makes the backref allocator create a data
8180                          * backref
8181                          */
8182                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8183                                                    rec->start, rec->max_size,
8184                                                    parent,
8185                                                    dback->root,
8186                                                    parent ?
8187                                                    BTRFS_FIRST_FREE_OBJECTID :
8188                                                    dback->owner,
8189                                                    dback->offset);
8190                         if (ret)
8191                                 break;
8192                 }
8193                 fprintf(stderr, "adding new data backref"
8194                                 " on %llu %s %llu owner %llu"
8195                                 " offset %llu found %d\n",
8196                                 (unsigned long long)rec->start,
8197                                 back->full_backref ?
8198                                 "parent" : "root",
8199                                 back->full_backref ?
8200                                 (unsigned long long)parent :
8201                                 (unsigned long long)dback->root,
8202                                 (unsigned long long)dback->owner,
8203                                 (unsigned long long)dback->offset,
8204                                 dback->found_ref);
8205         } else {
8206                 u64 parent;
8207                 struct tree_backref *tback;
8208
8209                 tback = to_tree_backref(back);
8210                 if (back->full_backref)
8211                         parent = tback->parent;
8212                 else
8213                         parent = 0;
8214
8215                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8216                                            rec->start, rec->max_size,
8217                                            parent, tback->root, 0, 0);
8218                 fprintf(stderr, "adding new tree backref on "
8219                         "start %llu len %llu parent %llu root %llu\n",
8220                         rec->start, rec->max_size, parent, tback->root);
8221         }
8222 fail:
8223         btrfs_release_path(path);
8224         return ret;
8225 }
8226
8227 static struct extent_entry *find_entry(struct list_head *entries,
8228                                        u64 bytenr, u64 bytes)
8229 {
8230         struct extent_entry *entry = NULL;
8231
8232         list_for_each_entry(entry, entries, list) {
8233                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8234                         return entry;
8235         }
8236
8237         return NULL;
8238 }
8239
8240 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8241 {
8242         struct extent_entry *entry, *best = NULL, *prev = NULL;
8243
8244         list_for_each_entry(entry, entries, list) {
8245                 /*
8246                  * If there are as many broken entries as entries then we know
8247                  * not to trust this particular entry.
8248                  */
8249                 if (entry->broken == entry->count)
8250                         continue;
8251
8252                 /*
8253                  * Special case, when there are only two entries and 'best' is
8254                  * the first one
8255                  */
8256                 if (!prev) {
8257                         best = entry;
8258                         prev = entry;
8259                         continue;
8260                 }
8261
8262                 /*
8263                  * If our current entry == best then we can't be sure our best
8264                  * is really the best, so we need to keep searching.
8265                  */
8266                 if (best && best->count == entry->count) {
8267                         prev = entry;
8268                         best = NULL;
8269                         continue;
8270                 }
8271
8272                 /* Prev == entry, not good enough, have to keep searching */
8273                 if (!prev->broken && prev->count == entry->count)
8274                         continue;
8275
8276                 if (!best)
8277                         best = (prev->count > entry->count) ? prev : entry;
8278                 else if (best->count < entry->count)
8279                         best = entry;
8280                 prev = entry;
8281         }
8282
8283         return best;
8284 }
8285
8286 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8287                       struct data_backref *dback, struct extent_entry *entry)
8288 {
8289         struct btrfs_trans_handle *trans;
8290         struct btrfs_root *root;
8291         struct btrfs_file_extent_item *fi;
8292         struct extent_buffer *leaf;
8293         struct btrfs_key key;
8294         u64 bytenr, bytes;
8295         int ret, err;
8296
8297         key.objectid = dback->root;
8298         key.type = BTRFS_ROOT_ITEM_KEY;
8299         key.offset = (u64)-1;
8300         root = btrfs_read_fs_root(info, &key);
8301         if (IS_ERR(root)) {
8302                 fprintf(stderr, "Couldn't find root for our ref\n");
8303                 return -EINVAL;
8304         }
8305
8306         /*
8307          * The backref points to the original offset of the extent if it was
8308          * split, so we need to search down to the offset we have and then walk
8309          * forward until we find the backref we're looking for.
8310          */
8311         key.objectid = dback->owner;
8312         key.type = BTRFS_EXTENT_DATA_KEY;
8313         key.offset = dback->offset;
8314         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8315         if (ret < 0) {
8316                 fprintf(stderr, "Error looking up ref %d\n", ret);
8317                 return ret;
8318         }
8319
8320         while (1) {
8321                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8322                         ret = btrfs_next_leaf(root, path);
8323                         if (ret) {
8324                                 fprintf(stderr, "Couldn't find our ref, next\n");
8325                                 return -EINVAL;
8326                         }
8327                 }
8328                 leaf = path->nodes[0];
8329                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8330                 if (key.objectid != dback->owner ||
8331                     key.type != BTRFS_EXTENT_DATA_KEY) {
8332                         fprintf(stderr, "Couldn't find our ref, search\n");
8333                         return -EINVAL;
8334                 }
8335                 fi = btrfs_item_ptr(leaf, path->slots[0],
8336                                     struct btrfs_file_extent_item);
8337                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8338                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8339
8340                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8341                         break;
8342                 path->slots[0]++;
8343         }
8344
8345         btrfs_release_path(path);
8346
8347         trans = btrfs_start_transaction(root, 1);
8348         if (IS_ERR(trans))
8349                 return PTR_ERR(trans);
8350
8351         /*
8352          * Ok we have the key of the file extent we want to fix, now we can cow
8353          * down to the thing and fix it.
8354          */
8355         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8356         if (ret < 0) {
8357                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8358                         key.objectid, key.type, key.offset, ret);
8359                 goto out;
8360         }
8361         if (ret > 0) {
8362                 fprintf(stderr, "Well that's odd, we just found this key "
8363                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8364                         key.offset);
8365                 ret = -EINVAL;
8366                 goto out;
8367         }
8368         leaf = path->nodes[0];
8369         fi = btrfs_item_ptr(leaf, path->slots[0],
8370                             struct btrfs_file_extent_item);
8371
8372         if (btrfs_file_extent_compression(leaf, fi) &&
8373             dback->disk_bytenr != entry->bytenr) {
8374                 fprintf(stderr, "Ref doesn't match the record start and is "
8375                         "compressed, please take a btrfs-image of this file "
8376                         "system and send it to a btrfs developer so they can "
8377                         "complete this functionality for bytenr %Lu\n",
8378                         dback->disk_bytenr);
8379                 ret = -EINVAL;
8380                 goto out;
8381         }
8382
8383         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8384                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8385         } else if (dback->disk_bytenr > entry->bytenr) {
8386                 u64 off_diff, offset;
8387
8388                 off_diff = dback->disk_bytenr - entry->bytenr;
8389                 offset = btrfs_file_extent_offset(leaf, fi);
8390                 if (dback->disk_bytenr + offset +
8391                     btrfs_file_extent_num_bytes(leaf, fi) >
8392                     entry->bytenr + entry->bytes) {
8393                         fprintf(stderr, "Ref is past the entry end, please "
8394                                 "take a btrfs-image of this file system and "
8395                                 "send it to a btrfs developer, ref %Lu\n",
8396                                 dback->disk_bytenr);
8397                         ret = -EINVAL;
8398                         goto out;
8399                 }
8400                 offset += off_diff;
8401                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8402                 btrfs_set_file_extent_offset(leaf, fi, offset);
8403         } else if (dback->disk_bytenr < entry->bytenr) {
8404                 u64 offset;
8405
8406                 offset = btrfs_file_extent_offset(leaf, fi);
8407                 if (dback->disk_bytenr + offset < entry->bytenr) {
8408                         fprintf(stderr, "Ref is before the entry start, please"
8409                                 " take a btrfs-image of this file system and "
8410                                 "send it to a btrfs developer, ref %Lu\n",
8411                                 dback->disk_bytenr);
8412                         ret = -EINVAL;
8413                         goto out;
8414                 }
8415
8416                 offset += dback->disk_bytenr;
8417                 offset -= entry->bytenr;
8418                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8419                 btrfs_set_file_extent_offset(leaf, fi, offset);
8420         }
8421
8422         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8423
8424         /*
8425          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8426          * only do this if we aren't using compression, otherwise it's a
8427          * trickier case.
8428          */
8429         if (!btrfs_file_extent_compression(leaf, fi))
8430                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8431         else
8432                 printf("ram bytes may be wrong?\n");
8433         btrfs_mark_buffer_dirty(leaf);
8434 out:
8435         err = btrfs_commit_transaction(trans, root);
8436         btrfs_release_path(path);
8437         return ret ? ret : err;
8438 }
8439
8440 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8441                            struct extent_record *rec)
8442 {
8443         struct extent_backref *back;
8444         struct data_backref *dback;
8445         struct extent_entry *entry, *best = NULL;
8446         LIST_HEAD(entries);
8447         int nr_entries = 0;
8448         int broken_entries = 0;
8449         int ret = 0;
8450         short mismatch = 0;
8451
8452         /*
8453          * Metadata is easy and the backrefs should always agree on bytenr and
8454          * size, if not we've got bigger issues.
8455          */
8456         if (rec->metadata)
8457                 return 0;
8458
8459         list_for_each_entry(back, &rec->backrefs, list) {
8460                 if (back->full_backref || !back->is_data)
8461                         continue;
8462
8463                 dback = to_data_backref(back);
8464
8465                 /*
8466                  * We only pay attention to backrefs that we found a real
8467                  * backref for.
8468                  */
8469                 if (dback->found_ref == 0)
8470                         continue;
8471
8472                 /*
8473                  * For now we only catch when the bytes don't match, not the
8474                  * bytenr.  We can easily do this at the same time, but I want
8475                  * to have a fs image to test on before we just add repair
8476                  * functionality willy-nilly so we know we won't screw up the
8477                  * repair.
8478                  */
8479
8480                 entry = find_entry(&entries, dback->disk_bytenr,
8481                                    dback->bytes);
8482                 if (!entry) {
8483                         entry = malloc(sizeof(struct extent_entry));
8484                         if (!entry) {
8485                                 ret = -ENOMEM;
8486                                 goto out;
8487                         }
8488                         memset(entry, 0, sizeof(*entry));
8489                         entry->bytenr = dback->disk_bytenr;
8490                         entry->bytes = dback->bytes;
8491                         list_add_tail(&entry->list, &entries);
8492                         nr_entries++;
8493                 }
8494
8495                 /*
8496                  * If we only have on entry we may think the entries agree when
8497                  * in reality they don't so we have to do some extra checking.
8498                  */
8499                 if (dback->disk_bytenr != rec->start ||
8500                     dback->bytes != rec->nr || back->broken)
8501                         mismatch = 1;
8502
8503                 if (back->broken) {
8504                         entry->broken++;
8505                         broken_entries++;
8506                 }
8507
8508                 entry->count++;
8509         }
8510
8511         /* Yay all the backrefs agree, carry on good sir */
8512         if (nr_entries <= 1 && !mismatch)
8513                 goto out;
8514
8515         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8516                 "%Lu\n", rec->start);
8517
8518         /*
8519          * First we want to see if the backrefs can agree amongst themselves who
8520          * is right, so figure out which one of the entries has the highest
8521          * count.
8522          */
8523         best = find_most_right_entry(&entries);
8524
8525         /*
8526          * Ok so we may have an even split between what the backrefs think, so
8527          * this is where we use the extent ref to see what it thinks.
8528          */
8529         if (!best) {
8530                 entry = find_entry(&entries, rec->start, rec->nr);
8531                 if (!entry && (!broken_entries || !rec->found_rec)) {
8532                         fprintf(stderr, "Backrefs don't agree with each other "
8533                                 "and extent record doesn't agree with anybody,"
8534                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8535                                 rec->start, rec->nr);
8536                         ret = -EINVAL;
8537                         goto out;
8538                 } else if (!entry) {
8539                         /*
8540                          * Ok our backrefs were broken, we'll assume this is the
8541                          * correct value and add an entry for this range.
8542                          */
8543                         entry = malloc(sizeof(struct extent_entry));
8544                         if (!entry) {
8545                                 ret = -ENOMEM;
8546                                 goto out;
8547                         }
8548                         memset(entry, 0, sizeof(*entry));
8549                         entry->bytenr = rec->start;
8550                         entry->bytes = rec->nr;
8551                         list_add_tail(&entry->list, &entries);
8552                         nr_entries++;
8553                 }
8554                 entry->count++;
8555                 best = find_most_right_entry(&entries);
8556                 if (!best) {
8557                         fprintf(stderr, "Backrefs and extent record evenly "
8558                                 "split on who is right, this is going to "
8559                                 "require user input to fix bytenr %Lu bytes "
8560                                 "%Lu\n", rec->start, rec->nr);
8561                         ret = -EINVAL;
8562                         goto out;
8563                 }
8564         }
8565
8566         /*
8567          * I don't think this can happen currently as we'll abort() if we catch
8568          * this case higher up, but in case somebody removes that we still can't
8569          * deal with it properly here yet, so just bail out of that's the case.
8570          */
8571         if (best->bytenr != rec->start) {
8572                 fprintf(stderr, "Extent start and backref starts don't match, "
8573                         "please use btrfs-image on this file system and send "
8574                         "it to a btrfs developer so they can make fsck fix "
8575                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8576                         rec->start, rec->nr);
8577                 ret = -EINVAL;
8578                 goto out;
8579         }
8580
8581         /*
8582          * Ok great we all agreed on an extent record, let's go find the real
8583          * references and fix up the ones that don't match.
8584          */
8585         list_for_each_entry(back, &rec->backrefs, list) {
8586                 if (back->full_backref || !back->is_data)
8587                         continue;
8588
8589                 dback = to_data_backref(back);
8590
8591                 /*
8592                  * Still ignoring backrefs that don't have a real ref attached
8593                  * to them.
8594                  */
8595                 if (dback->found_ref == 0)
8596                         continue;
8597
8598                 if (dback->bytes == best->bytes &&
8599                     dback->disk_bytenr == best->bytenr)
8600                         continue;
8601
8602                 ret = repair_ref(info, path, dback, best);
8603                 if (ret)
8604                         goto out;
8605         }
8606
8607         /*
8608          * Ok we messed with the actual refs, which means we need to drop our
8609          * entire cache and go back and rescan.  I know this is a huge pain and
8610          * adds a lot of extra work, but it's the only way to be safe.  Once all
8611          * the backrefs agree we may not need to do anything to the extent
8612          * record itself.
8613          */
8614         ret = -EAGAIN;
8615 out:
8616         while (!list_empty(&entries)) {
8617                 entry = list_entry(entries.next, struct extent_entry, list);
8618                 list_del_init(&entry->list);
8619                 free(entry);
8620         }
8621         return ret;
8622 }
8623
8624 static int process_duplicates(struct cache_tree *extent_cache,
8625                               struct extent_record *rec)
8626 {
8627         struct extent_record *good, *tmp;
8628         struct cache_extent *cache;
8629         int ret;
8630
8631         /*
8632          * If we found a extent record for this extent then return, or if we
8633          * have more than one duplicate we are likely going to need to delete
8634          * something.
8635          */
8636         if (rec->found_rec || rec->num_duplicates > 1)
8637                 return 0;
8638
8639         /* Shouldn't happen but just in case */
8640         BUG_ON(!rec->num_duplicates);
8641
8642         /*
8643          * So this happens if we end up with a backref that doesn't match the
8644          * actual extent entry.  So either the backref is bad or the extent
8645          * entry is bad.  Either way we want to have the extent_record actually
8646          * reflect what we found in the extent_tree, so we need to take the
8647          * duplicate out and use that as the extent_record since the only way we
8648          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8649          */
8650         remove_cache_extent(extent_cache, &rec->cache);
8651
8652         good = to_extent_record(rec->dups.next);
8653         list_del_init(&good->list);
8654         INIT_LIST_HEAD(&good->backrefs);
8655         INIT_LIST_HEAD(&good->dups);
8656         good->cache.start = good->start;
8657         good->cache.size = good->nr;
8658         good->content_checked = 0;
8659         good->owner_ref_checked = 0;
8660         good->num_duplicates = 0;
8661         good->refs = rec->refs;
8662         list_splice_init(&rec->backrefs, &good->backrefs);
8663         while (1) {
8664                 cache = lookup_cache_extent(extent_cache, good->start,
8665                                             good->nr);
8666                 if (!cache)
8667                         break;
8668                 tmp = container_of(cache, struct extent_record, cache);
8669
8670                 /*
8671                  * If we find another overlapping extent and it's found_rec is
8672                  * set then it's a duplicate and we need to try and delete
8673                  * something.
8674                  */
8675                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8676                         if (list_empty(&good->list))
8677                                 list_add_tail(&good->list,
8678                                               &duplicate_extents);
8679                         good->num_duplicates += tmp->num_duplicates + 1;
8680                         list_splice_init(&tmp->dups, &good->dups);
8681                         list_del_init(&tmp->list);
8682                         list_add_tail(&tmp->list, &good->dups);
8683                         remove_cache_extent(extent_cache, &tmp->cache);
8684                         continue;
8685                 }
8686
8687                 /*
8688                  * Ok we have another non extent item backed extent rec, so lets
8689                  * just add it to this extent and carry on like we did above.
8690                  */
8691                 good->refs += tmp->refs;
8692                 list_splice_init(&tmp->backrefs, &good->backrefs);
8693                 remove_cache_extent(extent_cache, &tmp->cache);
8694                 free(tmp);
8695         }
8696         ret = insert_cache_extent(extent_cache, &good->cache);
8697         BUG_ON(ret);
8698         free(rec);
8699         return good->num_duplicates ? 0 : 1;
8700 }
8701
8702 static int delete_duplicate_records(struct btrfs_root *root,
8703                                     struct extent_record *rec)
8704 {
8705         struct btrfs_trans_handle *trans;
8706         LIST_HEAD(delete_list);
8707         struct btrfs_path path;
8708         struct extent_record *tmp, *good, *n;
8709         int nr_del = 0;
8710         int ret = 0, err;
8711         struct btrfs_key key;
8712
8713         btrfs_init_path(&path);
8714
8715         good = rec;
8716         /* Find the record that covers all of the duplicates. */
8717         list_for_each_entry(tmp, &rec->dups, list) {
8718                 if (good->start < tmp->start)
8719                         continue;
8720                 if (good->nr > tmp->nr)
8721                         continue;
8722
8723                 if (tmp->start + tmp->nr < good->start + good->nr) {
8724                         fprintf(stderr, "Ok we have overlapping extents that "
8725                                 "aren't completely covered by each other, this "
8726                                 "is going to require more careful thought.  "
8727                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8728                                 tmp->start, tmp->nr, good->start, good->nr);
8729                         abort();
8730                 }
8731                 good = tmp;
8732         }
8733
8734         if (good != rec)
8735                 list_add_tail(&rec->list, &delete_list);
8736
8737         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8738                 if (tmp == good)
8739                         continue;
8740                 list_move_tail(&tmp->list, &delete_list);
8741         }
8742
8743         root = root->fs_info->extent_root;
8744         trans = btrfs_start_transaction(root, 1);
8745         if (IS_ERR(trans)) {
8746                 ret = PTR_ERR(trans);
8747                 goto out;
8748         }
8749
8750         list_for_each_entry(tmp, &delete_list, list) {
8751                 if (tmp->found_rec == 0)
8752                         continue;
8753                 key.objectid = tmp->start;
8754                 key.type = BTRFS_EXTENT_ITEM_KEY;
8755                 key.offset = tmp->nr;
8756
8757                 /* Shouldn't happen but just in case */
8758                 if (tmp->metadata) {
8759                         fprintf(stderr, "Well this shouldn't happen, extent "
8760                                 "record overlaps but is metadata? "
8761                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8762                         abort();
8763                 }
8764
8765                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8766                 if (ret) {
8767                         if (ret > 0)
8768                                 ret = -EINVAL;
8769                         break;
8770                 }
8771                 ret = btrfs_del_item(trans, root, &path);
8772                 if (ret)
8773                         break;
8774                 btrfs_release_path(&path);
8775                 nr_del++;
8776         }
8777         err = btrfs_commit_transaction(trans, root);
8778         if (err && !ret)
8779                 ret = err;
8780 out:
8781         while (!list_empty(&delete_list)) {
8782                 tmp = to_extent_record(delete_list.next);
8783                 list_del_init(&tmp->list);
8784                 if (tmp == rec)
8785                         continue;
8786                 free(tmp);
8787         }
8788
8789         while (!list_empty(&rec->dups)) {
8790                 tmp = to_extent_record(rec->dups.next);
8791                 list_del_init(&tmp->list);
8792                 free(tmp);
8793         }
8794
8795         btrfs_release_path(&path);
8796
8797         if (!ret && !nr_del)
8798                 rec->num_duplicates = 0;
8799
8800         return ret ? ret : nr_del;
8801 }
8802
8803 static int find_possible_backrefs(struct btrfs_fs_info *info,
8804                                   struct btrfs_path *path,
8805                                   struct cache_tree *extent_cache,
8806                                   struct extent_record *rec)
8807 {
8808         struct btrfs_root *root;
8809         struct extent_backref *back;
8810         struct data_backref *dback;
8811         struct cache_extent *cache;
8812         struct btrfs_file_extent_item *fi;
8813         struct btrfs_key key;
8814         u64 bytenr, bytes;
8815         int ret;
8816
8817         list_for_each_entry(back, &rec->backrefs, list) {
8818                 /* Don't care about full backrefs (poor unloved backrefs) */
8819                 if (back->full_backref || !back->is_data)
8820                         continue;
8821
8822                 dback = to_data_backref(back);
8823
8824                 /* We found this one, we don't need to do a lookup */
8825                 if (dback->found_ref)
8826                         continue;
8827
8828                 key.objectid = dback->root;
8829                 key.type = BTRFS_ROOT_ITEM_KEY;
8830                 key.offset = (u64)-1;
8831
8832                 root = btrfs_read_fs_root(info, &key);
8833
8834                 /* No root, definitely a bad ref, skip */
8835                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8836                         continue;
8837                 /* Other err, exit */
8838                 if (IS_ERR(root))
8839                         return PTR_ERR(root);
8840
8841                 key.objectid = dback->owner;
8842                 key.type = BTRFS_EXTENT_DATA_KEY;
8843                 key.offset = dback->offset;
8844                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8845                 if (ret) {
8846                         btrfs_release_path(path);
8847                         if (ret < 0)
8848                                 return ret;
8849                         /* Didn't find it, we can carry on */
8850                         ret = 0;
8851                         continue;
8852                 }
8853
8854                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8855                                     struct btrfs_file_extent_item);
8856                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8857                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8858                 btrfs_release_path(path);
8859                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8860                 if (cache) {
8861                         struct extent_record *tmp;
8862                         tmp = container_of(cache, struct extent_record, cache);
8863
8864                         /*
8865                          * If we found an extent record for the bytenr for this
8866                          * particular backref then we can't add it to our
8867                          * current extent record.  We only want to add backrefs
8868                          * that don't have a corresponding extent item in the
8869                          * extent tree since they likely belong to this record
8870                          * and we need to fix it if it doesn't match bytenrs.
8871                          */
8872                         if  (tmp->found_rec)
8873                                 continue;
8874                 }
8875
8876                 dback->found_ref += 1;
8877                 dback->disk_bytenr = bytenr;
8878                 dback->bytes = bytes;
8879
8880                 /*
8881                  * Set this so the verify backref code knows not to trust the
8882                  * values in this backref.
8883                  */
8884                 back->broken = 1;
8885         }
8886
8887         return 0;
8888 }
8889
8890 /*
8891  * Record orphan data ref into corresponding root.
8892  *
8893  * Return 0 if the extent item contains data ref and recorded.
8894  * Return 1 if the extent item contains no useful data ref
8895  *   On that case, it may contains only shared_dataref or metadata backref
8896  *   or the file extent exists(this should be handled by the extent bytenr
8897  *   recovery routine)
8898  * Return <0 if something goes wrong.
8899  */
8900 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8901                                       struct extent_record *rec)
8902 {
8903         struct btrfs_key key;
8904         struct btrfs_root *dest_root;
8905         struct extent_backref *back;
8906         struct data_backref *dback;
8907         struct orphan_data_extent *orphan;
8908         struct btrfs_path path;
8909         int recorded_data_ref = 0;
8910         int ret = 0;
8911
8912         if (rec->metadata)
8913                 return 1;
8914         btrfs_init_path(&path);
8915         list_for_each_entry(back, &rec->backrefs, list) {
8916                 if (back->full_backref || !back->is_data ||
8917                     !back->found_extent_tree)
8918                         continue;
8919                 dback = to_data_backref(back);
8920                 if (dback->found_ref)
8921                         continue;
8922                 key.objectid = dback->root;
8923                 key.type = BTRFS_ROOT_ITEM_KEY;
8924                 key.offset = (u64)-1;
8925
8926                 dest_root = btrfs_read_fs_root(fs_info, &key);
8927
8928                 /* For non-exist root we just skip it */
8929                 if (IS_ERR(dest_root) || !dest_root)
8930                         continue;
8931
8932                 key.objectid = dback->owner;
8933                 key.type = BTRFS_EXTENT_DATA_KEY;
8934                 key.offset = dback->offset;
8935
8936                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8937                 btrfs_release_path(&path);
8938                 /*
8939                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8940                  * we need to record it for inode/file extent rebuild.
8941                  * For ret > 0, we record it only for file extent rebuild.
8942                  * For ret == 0, the file extent exists but only bytenr
8943                  * mismatch, let the original bytenr fix routine to handle,
8944                  * don't record it.
8945                  */
8946                 if (ret == 0)
8947                         continue;
8948                 ret = 0;
8949                 orphan = malloc(sizeof(*orphan));
8950                 if (!orphan) {
8951                         ret = -ENOMEM;
8952                         goto out;
8953                 }
8954                 INIT_LIST_HEAD(&orphan->list);
8955                 orphan->root = dback->root;
8956                 orphan->objectid = dback->owner;
8957                 orphan->offset = dback->offset;
8958                 orphan->disk_bytenr = rec->cache.start;
8959                 orphan->disk_len = rec->cache.size;
8960                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8961                 recorded_data_ref = 1;
8962         }
8963 out:
8964         btrfs_release_path(&path);
8965         if (!ret)
8966                 return !recorded_data_ref;
8967         else
8968                 return ret;
8969 }
8970
8971 /*
8972  * when an incorrect extent item is found, this will delete
8973  * all of the existing entries for it and recreate them
8974  * based on what the tree scan found.
8975  */
8976 static int fixup_extent_refs(struct btrfs_fs_info *info,
8977                              struct cache_tree *extent_cache,
8978                              struct extent_record *rec)
8979 {
8980         struct btrfs_trans_handle *trans = NULL;
8981         int ret;
8982         struct btrfs_path path;
8983         struct list_head *cur = rec->backrefs.next;
8984         struct cache_extent *cache;
8985         struct extent_backref *back;
8986         int allocated = 0;
8987         u64 flags = 0;
8988
8989         if (rec->flag_block_full_backref)
8990                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8991
8992         btrfs_init_path(&path);
8993         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8994                 /*
8995                  * Sometimes the backrefs themselves are so broken they don't
8996                  * get attached to any meaningful rec, so first go back and
8997                  * check any of our backrefs that we couldn't find and throw
8998                  * them into the list if we find the backref so that
8999                  * verify_backrefs can figure out what to do.
9000                  */
9001                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9002                 if (ret < 0)
9003                         goto out;
9004         }
9005
9006         /* step one, make sure all of the backrefs agree */
9007         ret = verify_backrefs(info, &path, rec);
9008         if (ret < 0)
9009                 goto out;
9010
9011         trans = btrfs_start_transaction(info->extent_root, 1);
9012         if (IS_ERR(trans)) {
9013                 ret = PTR_ERR(trans);
9014                 goto out;
9015         }
9016
9017         /* step two, delete all the existing records */
9018         ret = delete_extent_records(trans, info->extent_root, &path,
9019                                     rec->start);
9020
9021         if (ret < 0)
9022                 goto out;
9023
9024         /* was this block corrupt?  If so, don't add references to it */
9025         cache = lookup_cache_extent(info->corrupt_blocks,
9026                                     rec->start, rec->max_size);
9027         if (cache) {
9028                 ret = 0;
9029                 goto out;
9030         }
9031
9032         /* step three, recreate all the refs we did find */
9033         while(cur != &rec->backrefs) {
9034                 back = to_extent_backref(cur);
9035                 cur = cur->next;
9036
9037                 /*
9038                  * if we didn't find any references, don't create a
9039                  * new extent record
9040                  */
9041                 if (!back->found_ref)
9042                         continue;
9043
9044                 rec->bad_full_backref = 0;
9045                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9046                 allocated = 1;
9047
9048                 if (ret)
9049                         goto out;
9050         }
9051 out:
9052         if (trans) {
9053                 int err = btrfs_commit_transaction(trans, info->extent_root);
9054                 if (!ret)
9055                         ret = err;
9056         }
9057
9058         if (!ret)
9059                 fprintf(stderr, "Repaired extent references for %llu\n",
9060                                 (unsigned long long)rec->start);
9061
9062         btrfs_release_path(&path);
9063         return ret;
9064 }
9065
9066 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9067                               struct extent_record *rec)
9068 {
9069         struct btrfs_trans_handle *trans;
9070         struct btrfs_root *root = fs_info->extent_root;
9071         struct btrfs_path path;
9072         struct btrfs_extent_item *ei;
9073         struct btrfs_key key;
9074         u64 flags;
9075         int ret = 0;
9076
9077         key.objectid = rec->start;
9078         if (rec->metadata) {
9079                 key.type = BTRFS_METADATA_ITEM_KEY;
9080                 key.offset = rec->info_level;
9081         } else {
9082                 key.type = BTRFS_EXTENT_ITEM_KEY;
9083                 key.offset = rec->max_size;
9084         }
9085
9086         trans = btrfs_start_transaction(root, 0);
9087         if (IS_ERR(trans))
9088                 return PTR_ERR(trans);
9089
9090         btrfs_init_path(&path);
9091         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9092         if (ret < 0) {
9093                 btrfs_release_path(&path);
9094                 btrfs_commit_transaction(trans, root);
9095                 return ret;
9096         } else if (ret) {
9097                 fprintf(stderr, "Didn't find extent for %llu\n",
9098                         (unsigned long long)rec->start);
9099                 btrfs_release_path(&path);
9100                 btrfs_commit_transaction(trans, root);
9101                 return -ENOENT;
9102         }
9103
9104         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9105                             struct btrfs_extent_item);
9106         flags = btrfs_extent_flags(path.nodes[0], ei);
9107         if (rec->flag_block_full_backref) {
9108                 fprintf(stderr, "setting full backref on %llu\n",
9109                         (unsigned long long)key.objectid);
9110                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9111         } else {
9112                 fprintf(stderr, "clearing full backref on %llu\n",
9113                         (unsigned long long)key.objectid);
9114                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9115         }
9116         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9117         btrfs_mark_buffer_dirty(path.nodes[0]);
9118         btrfs_release_path(&path);
9119         ret = btrfs_commit_transaction(trans, root);
9120         if (!ret)
9121                 fprintf(stderr, "Repaired extent flags for %llu\n",
9122                                 (unsigned long long)rec->start);
9123
9124         return ret;
9125 }
9126
9127 /* right now we only prune from the extent allocation tree */
9128 static int prune_one_block(struct btrfs_trans_handle *trans,
9129                            struct btrfs_fs_info *info,
9130                            struct btrfs_corrupt_block *corrupt)
9131 {
9132         int ret;
9133         struct btrfs_path path;
9134         struct extent_buffer *eb;
9135         u64 found;
9136         int slot;
9137         int nritems;
9138         int level = corrupt->level + 1;
9139
9140         btrfs_init_path(&path);
9141 again:
9142         /* we want to stop at the parent to our busted block */
9143         path.lowest_level = level;
9144
9145         ret = btrfs_search_slot(trans, info->extent_root,
9146                                 &corrupt->key, &path, -1, 1);
9147
9148         if (ret < 0)
9149                 goto out;
9150
9151         eb = path.nodes[level];
9152         if (!eb) {
9153                 ret = -ENOENT;
9154                 goto out;
9155         }
9156
9157         /*
9158          * hopefully the search gave us the block we want to prune,
9159          * lets try that first
9160          */
9161         slot = path.slots[level];
9162         found =  btrfs_node_blockptr(eb, slot);
9163         if (found == corrupt->cache.start)
9164                 goto del_ptr;
9165
9166         nritems = btrfs_header_nritems(eb);
9167
9168         /* the search failed, lets scan this node and hope we find it */
9169         for (slot = 0; slot < nritems; slot++) {
9170                 found =  btrfs_node_blockptr(eb, slot);
9171                 if (found == corrupt->cache.start)
9172                         goto del_ptr;
9173         }
9174         /*
9175          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9176          * to this block
9177          */
9178         if (eb == info->extent_root->node) {
9179                 ret = -ENOENT;
9180                 goto out;
9181         } else {
9182                 level++;
9183                 btrfs_release_path(&path);
9184                 goto again;
9185         }
9186
9187 del_ptr:
9188         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9189         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9190
9191 out:
9192         btrfs_release_path(&path);
9193         return ret;
9194 }
9195
9196 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9197 {
9198         struct btrfs_trans_handle *trans = NULL;
9199         struct cache_extent *cache;
9200         struct btrfs_corrupt_block *corrupt;
9201
9202         while (1) {
9203                 cache = search_cache_extent(info->corrupt_blocks, 0);
9204                 if (!cache)
9205                         break;
9206                 if (!trans) {
9207                         trans = btrfs_start_transaction(info->extent_root, 1);
9208                         if (IS_ERR(trans))
9209                                 return PTR_ERR(trans);
9210                 }
9211                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9212                 prune_one_block(trans, info, corrupt);
9213                 remove_cache_extent(info->corrupt_blocks, cache);
9214         }
9215         if (trans)
9216                 return btrfs_commit_transaction(trans, info->extent_root);
9217         return 0;
9218 }
9219
9220 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9221 {
9222         struct btrfs_block_group_cache *cache;
9223         u64 start, end;
9224         int ret;
9225
9226         while (1) {
9227                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9228                                             &start, &end, EXTENT_DIRTY);
9229                 if (ret)
9230                         break;
9231                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9232         }
9233
9234         start = 0;
9235         while (1) {
9236                 cache = btrfs_lookup_first_block_group(fs_info, start);
9237                 if (!cache)
9238                         break;
9239                 if (cache->cached)
9240                         cache->cached = 0;
9241                 start = cache->key.objectid + cache->key.offset;
9242         }
9243 }
9244
9245 static int check_extent_refs(struct btrfs_root *root,
9246                              struct cache_tree *extent_cache)
9247 {
9248         struct extent_record *rec;
9249         struct cache_extent *cache;
9250         int ret = 0;
9251         int had_dups = 0;
9252
9253         if (repair) {
9254                 /*
9255                  * if we're doing a repair, we have to make sure
9256                  * we don't allocate from the problem extents.
9257                  * In the worst case, this will be all the
9258                  * extents in the FS
9259                  */
9260                 cache = search_cache_extent(extent_cache, 0);
9261                 while(cache) {
9262                         rec = container_of(cache, struct extent_record, cache);
9263                         set_extent_dirty(root->fs_info->excluded_extents,
9264                                          rec->start,
9265                                          rec->start + rec->max_size - 1);
9266                         cache = next_cache_extent(cache);
9267                 }
9268
9269                 /* pin down all the corrupted blocks too */
9270                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9271                 while(cache) {
9272                         set_extent_dirty(root->fs_info->excluded_extents,
9273                                          cache->start,
9274                                          cache->start + cache->size - 1);
9275                         cache = next_cache_extent(cache);
9276                 }
9277                 prune_corrupt_blocks(root->fs_info);
9278                 reset_cached_block_groups(root->fs_info);
9279         }
9280
9281         reset_cached_block_groups(root->fs_info);
9282
9283         /*
9284          * We need to delete any duplicate entries we find first otherwise we
9285          * could mess up the extent tree when we have backrefs that actually
9286          * belong to a different extent item and not the weird duplicate one.
9287          */
9288         while (repair && !list_empty(&duplicate_extents)) {
9289                 rec = to_extent_record(duplicate_extents.next);
9290                 list_del_init(&rec->list);
9291
9292                 /* Sometimes we can find a backref before we find an actual
9293                  * extent, so we need to process it a little bit to see if there
9294                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9295                  * if this is a backref screwup.  If we need to delete stuff
9296                  * process_duplicates() will return 0, otherwise it will return
9297                  * 1 and we
9298                  */
9299                 if (process_duplicates(extent_cache, rec))
9300                         continue;
9301                 ret = delete_duplicate_records(root, rec);
9302                 if (ret < 0)
9303                         return ret;
9304                 /*
9305                  * delete_duplicate_records will return the number of entries
9306                  * deleted, so if it's greater than 0 then we know we actually
9307                  * did something and we need to remove.
9308                  */
9309                 if (ret)
9310                         had_dups = 1;
9311         }
9312
9313         if (had_dups)
9314                 return -EAGAIN;
9315
9316         while(1) {
9317                 int cur_err = 0;
9318                 int fix = 0;
9319
9320                 cache = search_cache_extent(extent_cache, 0);
9321                 if (!cache)
9322                         break;
9323                 rec = container_of(cache, struct extent_record, cache);
9324                 if (rec->num_duplicates) {
9325                         fprintf(stderr, "extent item %llu has multiple extent "
9326                                 "items\n", (unsigned long long)rec->start);
9327                         cur_err = 1;
9328                 }
9329
9330                 if (rec->refs != rec->extent_item_refs) {
9331                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9332                                 (unsigned long long)rec->start,
9333                                 (unsigned long long)rec->nr);
9334                         fprintf(stderr, "extent item %llu, found %llu\n",
9335                                 (unsigned long long)rec->extent_item_refs,
9336                                 (unsigned long long)rec->refs);
9337                         ret = record_orphan_data_extents(root->fs_info, rec);
9338                         if (ret < 0)
9339                                 goto repair_abort;
9340                         fix = ret;
9341                         cur_err = 1;
9342                 }
9343                 if (all_backpointers_checked(rec, 1)) {
9344                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9345                                 (unsigned long long)rec->start,
9346                                 (unsigned long long)rec->nr);
9347                         fix = 1;
9348                         cur_err = 1;
9349                 }
9350                 if (!rec->owner_ref_checked) {
9351                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9352                                 (unsigned long long)rec->start,
9353                                 (unsigned long long)rec->nr);
9354                         fix = 1;
9355                         cur_err = 1;
9356                 }
9357
9358                 if (repair && fix) {
9359                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9360                         if (ret)
9361                                 goto repair_abort;
9362                 }
9363
9364
9365                 if (rec->bad_full_backref) {
9366                         fprintf(stderr, "bad full backref, on [%llu]\n",
9367                                 (unsigned long long)rec->start);
9368                         if (repair) {
9369                                 ret = fixup_extent_flags(root->fs_info, rec);
9370                                 if (ret)
9371                                         goto repair_abort;
9372                                 fix = 1;
9373                         }
9374                         cur_err = 1;
9375                 }
9376                 /*
9377                  * Although it's not a extent ref's problem, we reuse this
9378                  * routine for error reporting.
9379                  * No repair function yet.
9380                  */
9381                 if (rec->crossing_stripes) {
9382                         fprintf(stderr,
9383                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9384                                 rec->start, rec->start + rec->max_size);
9385                         cur_err = 1;
9386                 }
9387
9388                 if (rec->wrong_chunk_type) {
9389                         fprintf(stderr,
9390                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9391                                 rec->start, rec->start + rec->max_size);
9392                         cur_err = 1;
9393                 }
9394
9395                 remove_cache_extent(extent_cache, cache);
9396                 free_all_extent_backrefs(rec);
9397                 if (!init_extent_tree && repair && (!cur_err || fix))
9398                         clear_extent_dirty(root->fs_info->excluded_extents,
9399                                            rec->start,
9400                                            rec->start + rec->max_size - 1);
9401                 free(rec);
9402         }
9403 repair_abort:
9404         if (repair) {
9405                 if (ret && ret != -EAGAIN) {
9406                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9407                         exit(1);
9408                 } else if (!ret) {
9409                         struct btrfs_trans_handle *trans;
9410
9411                         root = root->fs_info->extent_root;
9412                         trans = btrfs_start_transaction(root, 1);
9413                         if (IS_ERR(trans)) {
9414                                 ret = PTR_ERR(trans);
9415                                 goto repair_abort;
9416                         }
9417
9418                         btrfs_fix_block_accounting(trans, root);
9419                         ret = btrfs_commit_transaction(trans, root);
9420                         if (ret)
9421                                 goto repair_abort;
9422                 }
9423                 return ret;
9424         }
9425         return 0;
9426 }
9427
9428 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9429 {
9430         u64 stripe_size;
9431
9432         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9433                 stripe_size = length;
9434                 stripe_size /= num_stripes;
9435         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9436                 stripe_size = length * 2;
9437                 stripe_size /= num_stripes;
9438         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9439                 stripe_size = length;
9440                 stripe_size /= (num_stripes - 1);
9441         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9442                 stripe_size = length;
9443                 stripe_size /= (num_stripes - 2);
9444         } else {
9445                 stripe_size = length;
9446         }
9447         return stripe_size;
9448 }
9449
9450 /*
9451  * Check the chunk with its block group/dev list ref:
9452  * Return 0 if all refs seems valid.
9453  * Return 1 if part of refs seems valid, need later check for rebuild ref
9454  * like missing block group and needs to search extent tree to rebuild them.
9455  * Return -1 if essential refs are missing and unable to rebuild.
9456  */
9457 static int check_chunk_refs(struct chunk_record *chunk_rec,
9458                             struct block_group_tree *block_group_cache,
9459                             struct device_extent_tree *dev_extent_cache,
9460                             int silent)
9461 {
9462         struct cache_extent *block_group_item;
9463         struct block_group_record *block_group_rec;
9464         struct cache_extent *dev_extent_item;
9465         struct device_extent_record *dev_extent_rec;
9466         u64 devid;
9467         u64 offset;
9468         u64 length;
9469         int metadump_v2 = 0;
9470         int i;
9471         int ret = 0;
9472
9473         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9474                                                chunk_rec->offset,
9475                                                chunk_rec->length);
9476         if (block_group_item) {
9477                 block_group_rec = container_of(block_group_item,
9478                                                struct block_group_record,
9479                                                cache);
9480                 if (chunk_rec->length != block_group_rec->offset ||
9481                     chunk_rec->offset != block_group_rec->objectid ||
9482                     (!metadump_v2 &&
9483                      chunk_rec->type_flags != block_group_rec->flags)) {
9484                         if (!silent)
9485                                 fprintf(stderr,
9486                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9487                                         chunk_rec->objectid,
9488                                         chunk_rec->type,
9489                                         chunk_rec->offset,
9490                                         chunk_rec->length,
9491                                         chunk_rec->offset,
9492                                         chunk_rec->type_flags,
9493                                         block_group_rec->objectid,
9494                                         block_group_rec->type,
9495                                         block_group_rec->offset,
9496                                         block_group_rec->offset,
9497                                         block_group_rec->objectid,
9498                                         block_group_rec->flags);
9499                         ret = -1;
9500                 } else {
9501                         list_del_init(&block_group_rec->list);
9502                         chunk_rec->bg_rec = block_group_rec;
9503                 }
9504         } else {
9505                 if (!silent)
9506                         fprintf(stderr,
9507                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9508                                 chunk_rec->objectid,
9509                                 chunk_rec->type,
9510                                 chunk_rec->offset,
9511                                 chunk_rec->length,
9512                                 chunk_rec->offset,
9513                                 chunk_rec->type_flags);
9514                 ret = 1;
9515         }
9516
9517         if (metadump_v2)
9518                 return ret;
9519
9520         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9521                                     chunk_rec->num_stripes);
9522         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9523                 devid = chunk_rec->stripes[i].devid;
9524                 offset = chunk_rec->stripes[i].offset;
9525                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9526                                                        devid, offset, length);
9527                 if (dev_extent_item) {
9528                         dev_extent_rec = container_of(dev_extent_item,
9529                                                 struct device_extent_record,
9530                                                 cache);
9531                         if (dev_extent_rec->objectid != devid ||
9532                             dev_extent_rec->offset != offset ||
9533                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9534                             dev_extent_rec->length != length) {
9535                                 if (!silent)
9536                                         fprintf(stderr,
9537                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9538                                                 chunk_rec->objectid,
9539                                                 chunk_rec->type,
9540                                                 chunk_rec->offset,
9541                                                 chunk_rec->stripes[i].devid,
9542                                                 chunk_rec->stripes[i].offset,
9543                                                 dev_extent_rec->objectid,
9544                                                 dev_extent_rec->offset,
9545                                                 dev_extent_rec->length);
9546                                 ret = -1;
9547                         } else {
9548                                 list_move(&dev_extent_rec->chunk_list,
9549                                           &chunk_rec->dextents);
9550                         }
9551                 } else {
9552                         if (!silent)
9553                                 fprintf(stderr,
9554                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9555                                         chunk_rec->objectid,
9556                                         chunk_rec->type,
9557                                         chunk_rec->offset,
9558                                         chunk_rec->stripes[i].devid,
9559                                         chunk_rec->stripes[i].offset);
9560                         ret = -1;
9561                 }
9562         }
9563         return ret;
9564 }
9565
9566 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9567 int check_chunks(struct cache_tree *chunk_cache,
9568                  struct block_group_tree *block_group_cache,
9569                  struct device_extent_tree *dev_extent_cache,
9570                  struct list_head *good, struct list_head *bad,
9571                  struct list_head *rebuild, int silent)
9572 {
9573         struct cache_extent *chunk_item;
9574         struct chunk_record *chunk_rec;
9575         struct block_group_record *bg_rec;
9576         struct device_extent_record *dext_rec;
9577         int err;
9578         int ret = 0;
9579
9580         chunk_item = first_cache_extent(chunk_cache);
9581         while (chunk_item) {
9582                 chunk_rec = container_of(chunk_item, struct chunk_record,
9583                                          cache);
9584                 err = check_chunk_refs(chunk_rec, block_group_cache,
9585                                        dev_extent_cache, silent);
9586                 if (err < 0)
9587                         ret = err;
9588                 if (err == 0 && good)
9589                         list_add_tail(&chunk_rec->list, good);
9590                 if (err > 0 && rebuild)
9591                         list_add_tail(&chunk_rec->list, rebuild);
9592                 if (err < 0 && bad)
9593                         list_add_tail(&chunk_rec->list, bad);
9594                 chunk_item = next_cache_extent(chunk_item);
9595         }
9596
9597         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9598                 if (!silent)
9599                         fprintf(stderr,
9600                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9601                                 bg_rec->objectid,
9602                                 bg_rec->offset,
9603                                 bg_rec->flags);
9604                 if (!ret)
9605                         ret = 1;
9606         }
9607
9608         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9609                             chunk_list) {
9610                 if (!silent)
9611                         fprintf(stderr,
9612                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9613                                 dext_rec->objectid,
9614                                 dext_rec->offset,
9615                                 dext_rec->length);
9616                 if (!ret)
9617                         ret = 1;
9618         }
9619         return ret;
9620 }
9621
9622
9623 static int check_device_used(struct device_record *dev_rec,
9624                              struct device_extent_tree *dext_cache)
9625 {
9626         struct cache_extent *cache;
9627         struct device_extent_record *dev_extent_rec;
9628         u64 total_byte = 0;
9629
9630         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9631         while (cache) {
9632                 dev_extent_rec = container_of(cache,
9633                                               struct device_extent_record,
9634                                               cache);
9635                 if (dev_extent_rec->objectid != dev_rec->devid)
9636                         break;
9637
9638                 list_del_init(&dev_extent_rec->device_list);
9639                 total_byte += dev_extent_rec->length;
9640                 cache = next_cache_extent(cache);
9641         }
9642
9643         if (total_byte != dev_rec->byte_used) {
9644                 fprintf(stderr,
9645                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9646                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9647                         dev_rec->type, dev_rec->offset);
9648                 return -1;
9649         } else {
9650                 return 0;
9651         }
9652 }
9653
9654 /* check btrfs_dev_item -> btrfs_dev_extent */
9655 static int check_devices(struct rb_root *dev_cache,
9656                          struct device_extent_tree *dev_extent_cache)
9657 {
9658         struct rb_node *dev_node;
9659         struct device_record *dev_rec;
9660         struct device_extent_record *dext_rec;
9661         int err;
9662         int ret = 0;
9663
9664         dev_node = rb_first(dev_cache);
9665         while (dev_node) {
9666                 dev_rec = container_of(dev_node, struct device_record, node);
9667                 err = check_device_used(dev_rec, dev_extent_cache);
9668                 if (err)
9669                         ret = err;
9670
9671                 dev_node = rb_next(dev_node);
9672         }
9673         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9674                             device_list) {
9675                 fprintf(stderr,
9676                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9677                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9678                 if (!ret)
9679                         ret = 1;
9680         }
9681         return ret;
9682 }
9683
9684 static int add_root_item_to_list(struct list_head *head,
9685                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9686                                   u8 level, u8 drop_level,
9687                                   int level_size, struct btrfs_key *drop_key)
9688 {
9689
9690         struct root_item_record *ri_rec;
9691         ri_rec = malloc(sizeof(*ri_rec));
9692         if (!ri_rec)
9693                 return -ENOMEM;
9694         ri_rec->bytenr = bytenr;
9695         ri_rec->objectid = objectid;
9696         ri_rec->level = level;
9697         ri_rec->level_size = level_size;
9698         ri_rec->drop_level = drop_level;
9699         ri_rec->last_snapshot = last_snapshot;
9700         if (drop_key)
9701                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9702         list_add_tail(&ri_rec->list, head);
9703
9704         return 0;
9705 }
9706
9707 static void free_root_item_list(struct list_head *list)
9708 {
9709         struct root_item_record *ri_rec;
9710
9711         while (!list_empty(list)) {
9712                 ri_rec = list_first_entry(list, struct root_item_record,
9713                                           list);
9714                 list_del_init(&ri_rec->list);
9715                 free(ri_rec);
9716         }
9717 }
9718
9719 static int deal_root_from_list(struct list_head *list,
9720                                struct btrfs_root *root,
9721                                struct block_info *bits,
9722                                int bits_nr,
9723                                struct cache_tree *pending,
9724                                struct cache_tree *seen,
9725                                struct cache_tree *reada,
9726                                struct cache_tree *nodes,
9727                                struct cache_tree *extent_cache,
9728                                struct cache_tree *chunk_cache,
9729                                struct rb_root *dev_cache,
9730                                struct block_group_tree *block_group_cache,
9731                                struct device_extent_tree *dev_extent_cache)
9732 {
9733         int ret = 0;
9734         u64 last;
9735
9736         while (!list_empty(list)) {
9737                 struct root_item_record *rec;
9738                 struct extent_buffer *buf;
9739                 rec = list_entry(list->next,
9740                                  struct root_item_record, list);
9741                 last = 0;
9742                 buf = read_tree_block(root->fs_info->tree_root,
9743                                       rec->bytenr, rec->level_size, 0);
9744                 if (!extent_buffer_uptodate(buf)) {
9745                         free_extent_buffer(buf);
9746                         ret = -EIO;
9747                         break;
9748                 }
9749                 ret = add_root_to_pending(buf, extent_cache, pending,
9750                                     seen, nodes, rec->objectid);
9751                 if (ret < 0)
9752                         break;
9753                 /*
9754                  * To rebuild extent tree, we need deal with snapshot
9755                  * one by one, otherwise we deal with node firstly which
9756                  * can maximize readahead.
9757                  */
9758                 while (1) {
9759                         ret = run_next_block(root, bits, bits_nr, &last,
9760                                              pending, seen, reada, nodes,
9761                                              extent_cache, chunk_cache,
9762                                              dev_cache, block_group_cache,
9763                                              dev_extent_cache, rec);
9764                         if (ret != 0)
9765                                 break;
9766                 }
9767                 free_extent_buffer(buf);
9768                 list_del(&rec->list);
9769                 free(rec);
9770                 if (ret < 0)
9771                         break;
9772         }
9773         while (ret >= 0) {
9774                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9775                                      reada, nodes, extent_cache, chunk_cache,
9776                                      dev_cache, block_group_cache,
9777                                      dev_extent_cache, NULL);
9778                 if (ret != 0) {
9779                         if (ret > 0)
9780                                 ret = 0;
9781                         break;
9782                 }
9783         }
9784         return ret;
9785 }
9786
9787 static int check_chunks_and_extents(struct btrfs_root *root)
9788 {
9789         struct rb_root dev_cache;
9790         struct cache_tree chunk_cache;
9791         struct block_group_tree block_group_cache;
9792         struct device_extent_tree dev_extent_cache;
9793         struct cache_tree extent_cache;
9794         struct cache_tree seen;
9795         struct cache_tree pending;
9796         struct cache_tree reada;
9797         struct cache_tree nodes;
9798         struct extent_io_tree excluded_extents;
9799         struct cache_tree corrupt_blocks;
9800         struct btrfs_path path;
9801         struct btrfs_key key;
9802         struct btrfs_key found_key;
9803         int ret, err = 0;
9804         struct block_info *bits;
9805         int bits_nr;
9806         struct extent_buffer *leaf;
9807         int slot;
9808         struct btrfs_root_item ri;
9809         struct list_head dropping_trees;
9810         struct list_head normal_trees;
9811         struct btrfs_root *root1;
9812         u64 objectid;
9813         u32 level_size;
9814         u8 level;
9815
9816         dev_cache = RB_ROOT;
9817         cache_tree_init(&chunk_cache);
9818         block_group_tree_init(&block_group_cache);
9819         device_extent_tree_init(&dev_extent_cache);
9820
9821         cache_tree_init(&extent_cache);
9822         cache_tree_init(&seen);
9823         cache_tree_init(&pending);
9824         cache_tree_init(&nodes);
9825         cache_tree_init(&reada);
9826         cache_tree_init(&corrupt_blocks);
9827         extent_io_tree_init(&excluded_extents);
9828         INIT_LIST_HEAD(&dropping_trees);
9829         INIT_LIST_HEAD(&normal_trees);
9830
9831         if (repair) {
9832                 root->fs_info->excluded_extents = &excluded_extents;
9833                 root->fs_info->fsck_extent_cache = &extent_cache;
9834                 root->fs_info->free_extent_hook = free_extent_hook;
9835                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9836         }
9837
9838         bits_nr = 1024;
9839         bits = malloc(bits_nr * sizeof(struct block_info));
9840         if (!bits) {
9841                 perror("malloc");
9842                 exit(1);
9843         }
9844
9845         if (ctx.progress_enabled) {
9846                 ctx.tp = TASK_EXTENTS;
9847                 task_start(ctx.info);
9848         }
9849
9850 again:
9851         root1 = root->fs_info->tree_root;
9852         level = btrfs_header_level(root1->node);
9853         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9854                                     root1->node->start, 0, level, 0,
9855                                     root1->nodesize, NULL);
9856         if (ret < 0)
9857                 goto out;
9858         root1 = root->fs_info->chunk_root;
9859         level = btrfs_header_level(root1->node);
9860         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9861                                     root1->node->start, 0, level, 0,
9862                                     root1->nodesize, NULL);
9863         if (ret < 0)
9864                 goto out;
9865         btrfs_init_path(&path);
9866         key.offset = 0;
9867         key.objectid = 0;
9868         key.type = BTRFS_ROOT_ITEM_KEY;
9869         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9870                                         &key, &path, 0, 0);
9871         if (ret < 0)
9872                 goto out;
9873         while(1) {
9874                 leaf = path.nodes[0];
9875                 slot = path.slots[0];
9876                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9877                         ret = btrfs_next_leaf(root, &path);
9878                         if (ret != 0)
9879                                 break;
9880                         leaf = path.nodes[0];
9881                         slot = path.slots[0];
9882                 }
9883                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9884                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9885                         unsigned long offset;
9886                         u64 last_snapshot;
9887
9888                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9889                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9890                         last_snapshot = btrfs_root_last_snapshot(&ri);
9891                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9892                                 level = btrfs_root_level(&ri);
9893                                 level_size = root->nodesize;
9894                                 ret = add_root_item_to_list(&normal_trees,
9895                                                 found_key.objectid,
9896                                                 btrfs_root_bytenr(&ri),
9897                                                 last_snapshot, level,
9898                                                 0, level_size, NULL);
9899                                 if (ret < 0)
9900                                         goto out;
9901                         } else {
9902                                 level = btrfs_root_level(&ri);
9903                                 level_size = root->nodesize;
9904                                 objectid = found_key.objectid;
9905                                 btrfs_disk_key_to_cpu(&found_key,
9906                                                       &ri.drop_progress);
9907                                 ret = add_root_item_to_list(&dropping_trees,
9908                                                 objectid,
9909                                                 btrfs_root_bytenr(&ri),
9910                                                 last_snapshot, level,
9911                                                 ri.drop_level,
9912                                                 level_size, &found_key);
9913                                 if (ret < 0)
9914                                         goto out;
9915                         }
9916                 }
9917                 path.slots[0]++;
9918         }
9919         btrfs_release_path(&path);
9920
9921         /*
9922          * check_block can return -EAGAIN if it fixes something, please keep
9923          * this in mind when dealing with return values from these functions, if
9924          * we get -EAGAIN we want to fall through and restart the loop.
9925          */
9926         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9927                                   &seen, &reada, &nodes, &extent_cache,
9928                                   &chunk_cache, &dev_cache, &block_group_cache,
9929                                   &dev_extent_cache);
9930         if (ret < 0) {
9931                 if (ret == -EAGAIN)
9932                         goto loop;
9933                 goto out;
9934         }
9935         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9936                                   &pending, &seen, &reada, &nodes,
9937                                   &extent_cache, &chunk_cache, &dev_cache,
9938                                   &block_group_cache, &dev_extent_cache);
9939         if (ret < 0) {
9940                 if (ret == -EAGAIN)
9941                         goto loop;
9942                 goto out;
9943         }
9944
9945         ret = check_chunks(&chunk_cache, &block_group_cache,
9946                            &dev_extent_cache, NULL, NULL, NULL, 0);
9947         if (ret) {
9948                 if (ret == -EAGAIN)
9949                         goto loop;
9950                 err = ret;
9951         }
9952
9953         ret = check_extent_refs(root, &extent_cache);
9954         if (ret < 0) {
9955                 if (ret == -EAGAIN)
9956                         goto loop;
9957                 goto out;
9958         }
9959
9960         ret = check_devices(&dev_cache, &dev_extent_cache);
9961         if (ret && err)
9962                 ret = err;
9963
9964 out:
9965         task_stop(ctx.info);
9966         if (repair) {
9967                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9968                 extent_io_tree_cleanup(&excluded_extents);
9969                 root->fs_info->fsck_extent_cache = NULL;
9970                 root->fs_info->free_extent_hook = NULL;
9971                 root->fs_info->corrupt_blocks = NULL;
9972                 root->fs_info->excluded_extents = NULL;
9973         }
9974         free(bits);
9975         free_chunk_cache_tree(&chunk_cache);
9976         free_device_cache_tree(&dev_cache);
9977         free_block_group_tree(&block_group_cache);
9978         free_device_extent_tree(&dev_extent_cache);
9979         free_extent_cache_tree(&seen);
9980         free_extent_cache_tree(&pending);
9981         free_extent_cache_tree(&reada);
9982         free_extent_cache_tree(&nodes);
9983         free_root_item_list(&normal_trees);
9984         free_root_item_list(&dropping_trees);
9985         return ret;
9986 loop:
9987         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9988         free_extent_cache_tree(&seen);
9989         free_extent_cache_tree(&pending);
9990         free_extent_cache_tree(&reada);
9991         free_extent_cache_tree(&nodes);
9992         free_chunk_cache_tree(&chunk_cache);
9993         free_block_group_tree(&block_group_cache);
9994         free_device_cache_tree(&dev_cache);
9995         free_device_extent_tree(&dev_extent_cache);
9996         free_extent_record_cache(&extent_cache);
9997         free_root_item_list(&normal_trees);
9998         free_root_item_list(&dropping_trees);
9999         extent_io_tree_cleanup(&excluded_extents);
10000         goto again;
10001 }
10002
10003 /*
10004  * Check backrefs of a tree block given by @bytenr or @eb.
10005  *
10006  * @root:       the root containing the @bytenr or @eb
10007  * @eb:         tree block extent buffer, can be NULL
10008  * @bytenr:     bytenr of the tree block to search
10009  * @level:      tree level of the tree block
10010  * @owner:      owner of the tree block
10011  *
10012  * Return >0 for any error found and output error message
10013  * Return 0 for no error found
10014  */
10015 static int check_tree_block_ref(struct btrfs_root *root,
10016                                 struct extent_buffer *eb, u64 bytenr,
10017                                 int level, u64 owner)
10018 {
10019         struct btrfs_key key;
10020         struct btrfs_root *extent_root = root->fs_info->extent_root;
10021         struct btrfs_path path;
10022         struct btrfs_extent_item *ei;
10023         struct btrfs_extent_inline_ref *iref;
10024         struct extent_buffer *leaf;
10025         unsigned long end;
10026         unsigned long ptr;
10027         int slot;
10028         int skinny_level;
10029         int type;
10030         u32 nodesize = root->nodesize;
10031         u32 item_size;
10032         u64 offset;
10033         int tree_reloc_root = 0;
10034         int found_ref = 0;
10035         int err = 0;
10036         int ret;
10037
10038         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10039             btrfs_header_bytenr(root->node) == bytenr)
10040                 tree_reloc_root = 1;
10041
10042         btrfs_init_path(&path);
10043         key.objectid = bytenr;
10044         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10045                 key.type = BTRFS_METADATA_ITEM_KEY;
10046         else
10047                 key.type = BTRFS_EXTENT_ITEM_KEY;
10048         key.offset = (u64)-1;
10049
10050         /* Search for the backref in extent tree */
10051         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10052         if (ret < 0) {
10053                 err |= BACKREF_MISSING;
10054                 goto out;
10055         }
10056         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10057         if (ret) {
10058                 err |= BACKREF_MISSING;
10059                 goto out;
10060         }
10061
10062         leaf = path.nodes[0];
10063         slot = path.slots[0];
10064         btrfs_item_key_to_cpu(leaf, &key, slot);
10065
10066         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10067
10068         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10069                 skinny_level = (int)key.offset;
10070                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10071         } else {
10072                 struct btrfs_tree_block_info *info;
10073
10074                 info = (struct btrfs_tree_block_info *)(ei + 1);
10075                 skinny_level = btrfs_tree_block_level(leaf, info);
10076                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10077         }
10078
10079         if (eb) {
10080                 u64 header_gen;
10081                 u64 extent_gen;
10082
10083                 if (!(btrfs_extent_flags(leaf, ei) &
10084                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10085                         error(
10086                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10087                                 key.objectid, nodesize,
10088                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10089                         err = BACKREF_MISMATCH;
10090                 }
10091                 header_gen = btrfs_header_generation(eb);
10092                 extent_gen = btrfs_extent_generation(leaf, ei);
10093                 if (header_gen != extent_gen) {
10094                         error(
10095         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10096                                 key.objectid, nodesize, header_gen,
10097                                 extent_gen);
10098                         err = BACKREF_MISMATCH;
10099                 }
10100                 if (level != skinny_level) {
10101                         error(
10102                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10103                                 key.objectid, nodesize, level, skinny_level);
10104                         err = BACKREF_MISMATCH;
10105                 }
10106                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10107                         error(
10108                         "extent[%llu %u] is referred by other roots than %llu",
10109                                 key.objectid, nodesize, root->objectid);
10110                         err = BACKREF_MISMATCH;
10111                 }
10112         }
10113
10114         /*
10115          * Iterate the extent/metadata item to find the exact backref
10116          */
10117         item_size = btrfs_item_size_nr(leaf, slot);
10118         ptr = (unsigned long)iref;
10119         end = (unsigned long)ei + item_size;
10120         while (ptr < end) {
10121                 iref = (struct btrfs_extent_inline_ref *)ptr;
10122                 type = btrfs_extent_inline_ref_type(leaf, iref);
10123                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10124
10125                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10126                         (offset == root->objectid || offset == owner)) {
10127                         found_ref = 1;
10128                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10129                         /*
10130                          * Backref of tree reloc root points to itself, no need
10131                          * to check backref any more.
10132                          */
10133                         if (tree_reloc_root)
10134                                 found_ref = 1;
10135                         else
10136                         /* Check if the backref points to valid referencer */
10137                                 found_ref = !check_tree_block_ref(root, NULL,
10138                                                 offset, level + 1, owner);
10139                 }
10140
10141                 if (found_ref)
10142                         break;
10143                 ptr += btrfs_extent_inline_ref_size(type);
10144         }
10145
10146         /*
10147          * Inlined extent item doesn't have what we need, check
10148          * TREE_BLOCK_REF_KEY
10149          */
10150         if (!found_ref) {
10151                 btrfs_release_path(&path);
10152                 key.objectid = bytenr;
10153                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10154                 key.offset = root->objectid;
10155
10156                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10157                 if (!ret)
10158                         found_ref = 1;
10159         }
10160         if (!found_ref)
10161                 err |= BACKREF_MISSING;
10162 out:
10163         btrfs_release_path(&path);
10164         if (eb && (err & BACKREF_MISSING))
10165                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10166                         bytenr, nodesize, owner, level);
10167         return err;
10168 }
10169
10170 /*
10171  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10172  *
10173  * Return >0 any error found and output error message
10174  * Return 0 for no error found
10175  */
10176 static int check_extent_data_item(struct btrfs_root *root,
10177                                   struct extent_buffer *eb, int slot)
10178 {
10179         struct btrfs_file_extent_item *fi;
10180         struct btrfs_path path;
10181         struct btrfs_root *extent_root = root->fs_info->extent_root;
10182         struct btrfs_key fi_key;
10183         struct btrfs_key dbref_key;
10184         struct extent_buffer *leaf;
10185         struct btrfs_extent_item *ei;
10186         struct btrfs_extent_inline_ref *iref;
10187         struct btrfs_extent_data_ref *dref;
10188         u64 owner;
10189         u64 disk_bytenr;
10190         u64 disk_num_bytes;
10191         u64 extent_num_bytes;
10192         u64 extent_flags;
10193         u32 item_size;
10194         unsigned long end;
10195         unsigned long ptr;
10196         int type;
10197         u64 ref_root;
10198         int found_dbackref = 0;
10199         int err = 0;
10200         int ret;
10201
10202         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10203         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10204
10205         /* Nothing to check for hole and inline data extents */
10206         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10207             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10208                 return 0;
10209
10210         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10211         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10212         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10213
10214         /* Check unaligned disk_num_bytes and num_bytes */
10215         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10216                 error(
10217 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10218                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10219                         root->sectorsize);
10220                 err |= BYTES_UNALIGNED;
10221         } else {
10222                 data_bytes_allocated += disk_num_bytes;
10223         }
10224         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10225                 error(
10226 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10227                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10228                         root->sectorsize);
10229                 err |= BYTES_UNALIGNED;
10230         } else {
10231                 data_bytes_referenced += extent_num_bytes;
10232         }
10233         owner = btrfs_header_owner(eb);
10234
10235         /* Check the extent item of the file extent in extent tree */
10236         btrfs_init_path(&path);
10237         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10238         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10239         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10240
10241         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10242         if (ret)
10243                 goto out;
10244
10245         leaf = path.nodes[0];
10246         slot = path.slots[0];
10247         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10248
10249         extent_flags = btrfs_extent_flags(leaf, ei);
10250
10251         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10252                 error(
10253                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10254                     disk_bytenr, disk_num_bytes,
10255                     BTRFS_EXTENT_FLAG_DATA);
10256                 err |= BACKREF_MISMATCH;
10257         }
10258
10259         /* Check data backref inside that extent item */
10260         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10261         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10262         ptr = (unsigned long)iref;
10263         end = (unsigned long)ei + item_size;
10264         while (ptr < end) {
10265                 iref = (struct btrfs_extent_inline_ref *)ptr;
10266                 type = btrfs_extent_inline_ref_type(leaf, iref);
10267                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10268
10269                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10270                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10271                         if (ref_root == owner || ref_root == root->objectid)
10272                                 found_dbackref = 1;
10273                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10274                         found_dbackref = !check_tree_block_ref(root, NULL,
10275                                 btrfs_extent_inline_ref_offset(leaf, iref),
10276                                 0, owner);
10277                 }
10278
10279                 if (found_dbackref)
10280                         break;
10281                 ptr += btrfs_extent_inline_ref_size(type);
10282         }
10283
10284         if (!found_dbackref) {
10285                 btrfs_release_path(&path);
10286
10287                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10288                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10289                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10290                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10291                                 fi_key.objectid, fi_key.offset);
10292
10293                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10294                                         &dbref_key, &path, 0, 0);
10295                 if (!ret) {
10296                         found_dbackref = 1;
10297                         goto out;
10298                 }
10299
10300                 btrfs_release_path(&path);
10301
10302                 /*
10303                  * Neither inlined nor EXTENT_DATA_REF found, try
10304                  * SHARED_DATA_REF as last chance.
10305                  */
10306                 dbref_key.objectid = disk_bytenr;
10307                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10308                 dbref_key.offset = eb->start;
10309
10310                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10311                                         &dbref_key, &path, 0, 0);
10312                 if (!ret) {
10313                         found_dbackref = 1;
10314                         goto out;
10315                 }
10316         }
10317
10318 out:
10319         if (!found_dbackref)
10320                 err |= BACKREF_MISSING;
10321         btrfs_release_path(&path);
10322         if (err & BACKREF_MISSING) {
10323                 error("data extent[%llu %llu] backref lost",
10324                       disk_bytenr, disk_num_bytes);
10325         }
10326         return err;
10327 }
10328
10329 /*
10330  * Get real tree block level for the case like shared block
10331  * Return >= 0 as tree level
10332  * Return <0 for error
10333  */
10334 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10335 {
10336         struct extent_buffer *eb;
10337         struct btrfs_path path;
10338         struct btrfs_key key;
10339         struct btrfs_extent_item *ei;
10340         u64 flags;
10341         u64 transid;
10342         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10343         u8 backref_level;
10344         u8 header_level;
10345         int ret;
10346
10347         /* Search extent tree for extent generation and level */
10348         key.objectid = bytenr;
10349         key.type = BTRFS_METADATA_ITEM_KEY;
10350         key.offset = (u64)-1;
10351
10352         btrfs_init_path(&path);
10353         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10354         if (ret < 0)
10355                 goto release_out;
10356         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10357         if (ret < 0)
10358                 goto release_out;
10359         if (ret > 0) {
10360                 ret = -ENOENT;
10361                 goto release_out;
10362         }
10363
10364         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10365         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10366                             struct btrfs_extent_item);
10367         flags = btrfs_extent_flags(path.nodes[0], ei);
10368         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10369                 ret = -ENOENT;
10370                 goto release_out;
10371         }
10372
10373         /* Get transid for later read_tree_block() check */
10374         transid = btrfs_extent_generation(path.nodes[0], ei);
10375
10376         /* Get backref level as one source */
10377         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10378                 backref_level = key.offset;
10379         } else {
10380                 struct btrfs_tree_block_info *info;
10381
10382                 info = (struct btrfs_tree_block_info *)(ei + 1);
10383                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10384         }
10385         btrfs_release_path(&path);
10386
10387         /* Get level from tree block as an alternative source */
10388         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10389         if (!extent_buffer_uptodate(eb)) {
10390                 free_extent_buffer(eb);
10391                 return -EIO;
10392         }
10393         header_level = btrfs_header_level(eb);
10394         free_extent_buffer(eb);
10395
10396         if (header_level != backref_level)
10397                 return -EIO;
10398         return header_level;
10399
10400 release_out:
10401         btrfs_release_path(&path);
10402         return ret;
10403 }
10404
10405 /*
10406  * Check if a tree block backref is valid (points to a valid tree block)
10407  * if level == -1, level will be resolved
10408  * Return >0 for any error found and print error message
10409  */
10410 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10411                                     u64 bytenr, int level)
10412 {
10413         struct btrfs_root *root;
10414         struct btrfs_key key;
10415         struct btrfs_path path;
10416         struct extent_buffer *eb;
10417         struct extent_buffer *node;
10418         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10419         int err = 0;
10420         int ret;
10421
10422         /* Query level for level == -1 special case */
10423         if (level == -1)
10424                 level = query_tree_block_level(fs_info, bytenr);
10425         if (level < 0) {
10426                 err |= REFERENCER_MISSING;
10427                 goto out;
10428         }
10429
10430         key.objectid = root_id;
10431         key.type = BTRFS_ROOT_ITEM_KEY;
10432         key.offset = (u64)-1;
10433
10434         root = btrfs_read_fs_root(fs_info, &key);
10435         if (IS_ERR(root)) {
10436                 err |= REFERENCER_MISSING;
10437                 goto out;
10438         }
10439
10440         /* Read out the tree block to get item/node key */
10441         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10442         if (!extent_buffer_uptodate(eb)) {
10443                 err |= REFERENCER_MISSING;
10444                 free_extent_buffer(eb);
10445                 goto out;
10446         }
10447
10448         /* Empty tree, no need to check key */
10449         if (!btrfs_header_nritems(eb) && !level) {
10450                 free_extent_buffer(eb);
10451                 goto out;
10452         }
10453
10454         if (level)
10455                 btrfs_node_key_to_cpu(eb, &key, 0);
10456         else
10457                 btrfs_item_key_to_cpu(eb, &key, 0);
10458
10459         free_extent_buffer(eb);
10460
10461         btrfs_init_path(&path);
10462         path.lowest_level = level;
10463         /* Search with the first key, to ensure we can reach it */
10464         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10465         if (ret < 0) {
10466                 err |= REFERENCER_MISSING;
10467                 goto release_out;
10468         }
10469
10470         node = path.nodes[level];
10471         if (btrfs_header_bytenr(node) != bytenr) {
10472                 error(
10473         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10474                         bytenr, nodesize, bytenr,
10475                         btrfs_header_bytenr(node));
10476                 err |= REFERENCER_MISMATCH;
10477         }
10478         if (btrfs_header_level(node) != level) {
10479                 error(
10480         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10481                         bytenr, nodesize, level,
10482                         btrfs_header_level(node));
10483                 err |= REFERENCER_MISMATCH;
10484         }
10485
10486 release_out:
10487         btrfs_release_path(&path);
10488 out:
10489         if (err & REFERENCER_MISSING) {
10490                 if (level < 0)
10491                         error("extent [%llu %d] lost referencer (owner: %llu)",
10492                                 bytenr, nodesize, root_id);
10493                 else
10494                         error(
10495                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10496                                 bytenr, nodesize, root_id, level);
10497         }
10498
10499         return err;
10500 }
10501
10502 /*
10503  * Check if tree block @eb is tree reloc root.
10504  * Return 0 if it's not or any problem happens
10505  * Return 1 if it's a tree reloc root
10506  */
10507 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10508                                  struct extent_buffer *eb)
10509 {
10510         struct btrfs_root *tree_reloc_root;
10511         struct btrfs_key key;
10512         u64 bytenr = btrfs_header_bytenr(eb);
10513         u64 owner = btrfs_header_owner(eb);
10514         int ret = 0;
10515
10516         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10517         key.offset = owner;
10518         key.type = BTRFS_ROOT_ITEM_KEY;
10519
10520         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10521         if (IS_ERR(tree_reloc_root))
10522                 return 0;
10523
10524         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10525                 ret = 1;
10526         btrfs_free_fs_root(tree_reloc_root);
10527         return ret;
10528 }
10529
10530 /*
10531  * Check referencer for shared block backref
10532  * If level == -1, this function will resolve the level.
10533  */
10534 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10535                                      u64 parent, u64 bytenr, int level)
10536 {
10537         struct extent_buffer *eb;
10538         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10539         u32 nr;
10540         int found_parent = 0;
10541         int i;
10542
10543         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10544         if (!extent_buffer_uptodate(eb))
10545                 goto out;
10546
10547         if (level == -1)
10548                 level = query_tree_block_level(fs_info, bytenr);
10549         if (level < 0)
10550                 goto out;
10551
10552         /* It's possible it's a tree reloc root */
10553         if (parent == bytenr) {
10554                 if (is_tree_reloc_root(fs_info, eb))
10555                         found_parent = 1;
10556                 goto out;
10557         }
10558
10559         if (level + 1 != btrfs_header_level(eb))
10560                 goto out;
10561
10562         nr = btrfs_header_nritems(eb);
10563         for (i = 0; i < nr; i++) {
10564                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10565                         found_parent = 1;
10566                         break;
10567                 }
10568         }
10569 out:
10570         free_extent_buffer(eb);
10571         if (!found_parent) {
10572                 error(
10573         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10574                         bytenr, nodesize, parent, level);
10575                 return REFERENCER_MISSING;
10576         }
10577         return 0;
10578 }
10579
10580 /*
10581  * Check referencer for normal (inlined) data ref
10582  * If len == 0, it will be resolved by searching in extent tree
10583  */
10584 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10585                                      u64 root_id, u64 objectid, u64 offset,
10586                                      u64 bytenr, u64 len, u32 count)
10587 {
10588         struct btrfs_root *root;
10589         struct btrfs_root *extent_root = fs_info->extent_root;
10590         struct btrfs_key key;
10591         struct btrfs_path path;
10592         struct extent_buffer *leaf;
10593         struct btrfs_file_extent_item *fi;
10594         u32 found_count = 0;
10595         int slot;
10596         int ret = 0;
10597
10598         if (!len) {
10599                 key.objectid = bytenr;
10600                 key.type = BTRFS_EXTENT_ITEM_KEY;
10601                 key.offset = (u64)-1;
10602
10603                 btrfs_init_path(&path);
10604                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10605                 if (ret < 0)
10606                         goto out;
10607                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10608                 if (ret)
10609                         goto out;
10610                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10611                 if (key.objectid != bytenr ||
10612                     key.type != BTRFS_EXTENT_ITEM_KEY)
10613                         goto out;
10614                 len = key.offset;
10615                 btrfs_release_path(&path);
10616         }
10617         key.objectid = root_id;
10618         key.type = BTRFS_ROOT_ITEM_KEY;
10619         key.offset = (u64)-1;
10620         btrfs_init_path(&path);
10621
10622         root = btrfs_read_fs_root(fs_info, &key);
10623         if (IS_ERR(root))
10624                 goto out;
10625
10626         key.objectid = objectid;
10627         key.type = BTRFS_EXTENT_DATA_KEY;
10628         /*
10629          * It can be nasty as data backref offset is
10630          * file offset - file extent offset, which is smaller or
10631          * equal to original backref offset.  The only special case is
10632          * overflow.  So we need to special check and do further search.
10633          */
10634         key.offset = offset & (1ULL << 63) ? 0 : offset;
10635
10636         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10637         if (ret < 0)
10638                 goto out;
10639
10640         /*
10641          * Search afterwards to get correct one
10642          * NOTE: As we must do a comprehensive check on the data backref to
10643          * make sure the dref count also matches, we must iterate all file
10644          * extents for that inode.
10645          */
10646         while (1) {
10647                 leaf = path.nodes[0];
10648                 slot = path.slots[0];
10649
10650                 if (slot >= btrfs_header_nritems(leaf))
10651                         goto next;
10652                 btrfs_item_key_to_cpu(leaf, &key, slot);
10653                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10654                         break;
10655                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10656                 /*
10657                  * Except normal disk bytenr and disk num bytes, we still
10658                  * need to do extra check on dbackref offset as
10659                  * dbackref offset = file_offset - file_extent_offset
10660                  */
10661                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10662                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10663                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10664                     offset)
10665                         found_count++;
10666
10667 next:
10668                 ret = btrfs_next_item(root, &path);
10669                 if (ret)
10670                         break;
10671         }
10672 out:
10673         btrfs_release_path(&path);
10674         if (found_count != count) {
10675                 error(
10676 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10677                         bytenr, len, root_id, objectid, offset, count, found_count);
10678                 return REFERENCER_MISSING;
10679         }
10680         return 0;
10681 }
10682
10683 /*
10684  * Check if the referencer of a shared data backref exists
10685  */
10686 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10687                                      u64 parent, u64 bytenr)
10688 {
10689         struct extent_buffer *eb;
10690         struct btrfs_key key;
10691         struct btrfs_file_extent_item *fi;
10692         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10693         u32 nr;
10694         int found_parent = 0;
10695         int i;
10696
10697         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10698         if (!extent_buffer_uptodate(eb))
10699                 goto out;
10700
10701         nr = btrfs_header_nritems(eb);
10702         for (i = 0; i < nr; i++) {
10703                 btrfs_item_key_to_cpu(eb, &key, i);
10704                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10705                         continue;
10706
10707                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10708                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10709                         continue;
10710
10711                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10712                         found_parent = 1;
10713                         break;
10714                 }
10715         }
10716
10717 out:
10718         free_extent_buffer(eb);
10719         if (!found_parent) {
10720                 error("shared extent %llu referencer lost (parent: %llu)",
10721                         bytenr, parent);
10722                 return REFERENCER_MISSING;
10723         }
10724         return 0;
10725 }
10726
10727 /*
10728  * This function will check a given extent item, including its backref and
10729  * itself (like crossing stripe boundary and type)
10730  *
10731  * Since we don't use extent_record anymore, introduce new error bit
10732  */
10733 static int check_extent_item(struct btrfs_fs_info *fs_info,
10734                              struct extent_buffer *eb, int slot)
10735 {
10736         struct btrfs_extent_item *ei;
10737         struct btrfs_extent_inline_ref *iref;
10738         struct btrfs_extent_data_ref *dref;
10739         unsigned long end;
10740         unsigned long ptr;
10741         int type;
10742         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10743         u32 item_size = btrfs_item_size_nr(eb, slot);
10744         u64 flags;
10745         u64 offset;
10746         int metadata = 0;
10747         int level;
10748         struct btrfs_key key;
10749         int ret;
10750         int err = 0;
10751
10752         btrfs_item_key_to_cpu(eb, &key, slot);
10753         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10754                 bytes_used += key.offset;
10755         else
10756                 bytes_used += nodesize;
10757
10758         if (item_size < sizeof(*ei)) {
10759                 /*
10760                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10761                  * old thing when on disk format is still un-determined.
10762                  * No need to care about it anymore
10763                  */
10764                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10765                 return -ENOTTY;
10766         }
10767
10768         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10769         flags = btrfs_extent_flags(eb, ei);
10770
10771         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10772                 metadata = 1;
10773         if (metadata && check_crossing_stripes(global_info, key.objectid,
10774                                                eb->len)) {
10775                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10776                       key.objectid, key.objectid + nodesize);
10777                 err |= CROSSING_STRIPE_BOUNDARY;
10778         }
10779
10780         ptr = (unsigned long)(ei + 1);
10781
10782         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10783                 /* Old EXTENT_ITEM metadata */
10784                 struct btrfs_tree_block_info *info;
10785
10786                 info = (struct btrfs_tree_block_info *)ptr;
10787                 level = btrfs_tree_block_level(eb, info);
10788                 ptr += sizeof(struct btrfs_tree_block_info);
10789         } else {
10790                 /* New METADATA_ITEM */
10791                 level = key.offset;
10792         }
10793         end = (unsigned long)ei + item_size;
10794
10795 next:
10796         /* Reached extent item end normally */
10797         if (ptr == end)
10798                 goto out;
10799
10800         /* Beyond extent item end, wrong item size */
10801         if (ptr > end) {
10802                 err |= ITEM_SIZE_MISMATCH;
10803                 error("extent item at bytenr %llu slot %d has wrong size",
10804                         eb->start, slot);
10805                 goto out;
10806         }
10807
10808         /* Now check every backref in this extent item */
10809         iref = (struct btrfs_extent_inline_ref *)ptr;
10810         type = btrfs_extent_inline_ref_type(eb, iref);
10811         offset = btrfs_extent_inline_ref_offset(eb, iref);
10812         switch (type) {
10813         case BTRFS_TREE_BLOCK_REF_KEY:
10814                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10815                                                level);
10816                 err |= ret;
10817                 break;
10818         case BTRFS_SHARED_BLOCK_REF_KEY:
10819                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10820                                                  level);
10821                 err |= ret;
10822                 break;
10823         case BTRFS_EXTENT_DATA_REF_KEY:
10824                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10825                 ret = check_extent_data_backref(fs_info,
10826                                 btrfs_extent_data_ref_root(eb, dref),
10827                                 btrfs_extent_data_ref_objectid(eb, dref),
10828                                 btrfs_extent_data_ref_offset(eb, dref),
10829                                 key.objectid, key.offset,
10830                                 btrfs_extent_data_ref_count(eb, dref));
10831                 err |= ret;
10832                 break;
10833         case BTRFS_SHARED_DATA_REF_KEY:
10834                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10835                 err |= ret;
10836                 break;
10837         default:
10838                 error("extent[%llu %d %llu] has unknown ref type: %d",
10839                         key.objectid, key.type, key.offset, type);
10840                 err |= UNKNOWN_TYPE;
10841                 goto out;
10842         }
10843
10844         ptr += btrfs_extent_inline_ref_size(type);
10845         goto next;
10846
10847 out:
10848         return err;
10849 }
10850
10851 /*
10852  * Check if a dev extent item is referred correctly by its chunk
10853  */
10854 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10855                                  struct extent_buffer *eb, int slot)
10856 {
10857         struct btrfs_root *chunk_root = fs_info->chunk_root;
10858         struct btrfs_dev_extent *ptr;
10859         struct btrfs_path path;
10860         struct btrfs_key chunk_key;
10861         struct btrfs_key devext_key;
10862         struct btrfs_chunk *chunk;
10863         struct extent_buffer *l;
10864         int num_stripes;
10865         u64 length;
10866         int i;
10867         int found_chunk = 0;
10868         int ret;
10869
10870         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10871         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10872         length = btrfs_dev_extent_length(eb, ptr);
10873
10874         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10875         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10876         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10877
10878         btrfs_init_path(&path);
10879         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10880         if (ret)
10881                 goto out;
10882
10883         l = path.nodes[0];
10884         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10885         if (btrfs_chunk_length(l, chunk) != length)
10886                 goto out;
10887
10888         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10889         for (i = 0; i < num_stripes; i++) {
10890                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10891                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10892
10893                 if (devid == devext_key.objectid &&
10894                     offset == devext_key.offset) {
10895                         found_chunk = 1;
10896                         break;
10897                 }
10898         }
10899 out:
10900         btrfs_release_path(&path);
10901         if (!found_chunk) {
10902                 error(
10903                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10904                         devext_key.objectid, devext_key.offset, length);
10905                 return REFERENCER_MISSING;
10906         }
10907         return 0;
10908 }
10909
10910 /*
10911  * Check if the used space is correct with the dev item
10912  */
10913 static int check_dev_item(struct btrfs_fs_info *fs_info,
10914                           struct extent_buffer *eb, int slot)
10915 {
10916         struct btrfs_root *dev_root = fs_info->dev_root;
10917         struct btrfs_dev_item *dev_item;
10918         struct btrfs_path path;
10919         struct btrfs_key key;
10920         struct btrfs_dev_extent *ptr;
10921         u64 dev_id;
10922         u64 used;
10923         u64 total = 0;
10924         int ret;
10925
10926         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10927         dev_id = btrfs_device_id(eb, dev_item);
10928         used = btrfs_device_bytes_used(eb, dev_item);
10929
10930         key.objectid = dev_id;
10931         key.type = BTRFS_DEV_EXTENT_KEY;
10932         key.offset = 0;
10933
10934         btrfs_init_path(&path);
10935         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10936         if (ret < 0) {
10937                 btrfs_item_key_to_cpu(eb, &key, slot);
10938                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10939                         key.objectid, key.type, key.offset);
10940                 btrfs_release_path(&path);
10941                 return REFERENCER_MISSING;
10942         }
10943
10944         /* Iterate dev_extents to calculate the used space of a device */
10945         while (1) {
10946                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10947                         goto next;
10948
10949                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10950                 if (key.objectid > dev_id)
10951                         break;
10952                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10953                         goto next;
10954
10955                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10956                                      struct btrfs_dev_extent);
10957                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10958 next:
10959                 ret = btrfs_next_item(dev_root, &path);
10960                 if (ret)
10961                         break;
10962         }
10963         btrfs_release_path(&path);
10964
10965         if (used != total) {
10966                 btrfs_item_key_to_cpu(eb, &key, slot);
10967                 error(
10968 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10969                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10970                         BTRFS_DEV_EXTENT_KEY, dev_id);
10971                 return ACCOUNTING_MISMATCH;
10972         }
10973         return 0;
10974 }
10975
10976 /*
10977  * Check a block group item with its referener (chunk) and its used space
10978  * with extent/metadata item
10979  */
10980 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10981                                   struct extent_buffer *eb, int slot)
10982 {
10983         struct btrfs_root *extent_root = fs_info->extent_root;
10984         struct btrfs_root *chunk_root = fs_info->chunk_root;
10985         struct btrfs_block_group_item *bi;
10986         struct btrfs_block_group_item bg_item;
10987         struct btrfs_path path;
10988         struct btrfs_key bg_key;
10989         struct btrfs_key chunk_key;
10990         struct btrfs_key extent_key;
10991         struct btrfs_chunk *chunk;
10992         struct extent_buffer *leaf;
10993         struct btrfs_extent_item *ei;
10994         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10995         u64 flags;
10996         u64 bg_flags;
10997         u64 used;
10998         u64 total = 0;
10999         int ret;
11000         int err = 0;
11001
11002         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11003         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11004         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11005         used = btrfs_block_group_used(&bg_item);
11006         bg_flags = btrfs_block_group_flags(&bg_item);
11007
11008         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11009         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11010         chunk_key.offset = bg_key.objectid;
11011
11012         btrfs_init_path(&path);
11013         /* Search for the referencer chunk */
11014         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11015         if (ret) {
11016                 error(
11017                 "block group[%llu %llu] did not find the related chunk item",
11018                         bg_key.objectid, bg_key.offset);
11019                 err |= REFERENCER_MISSING;
11020         } else {
11021                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11022                                         struct btrfs_chunk);
11023                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11024                                                 bg_key.offset) {
11025                         error(
11026         "block group[%llu %llu] related chunk item length does not match",
11027                                 bg_key.objectid, bg_key.offset);
11028                         err |= REFERENCER_MISMATCH;
11029                 }
11030         }
11031         btrfs_release_path(&path);
11032
11033         /* Search from the block group bytenr */
11034         extent_key.objectid = bg_key.objectid;
11035         extent_key.type = 0;
11036         extent_key.offset = 0;
11037
11038         btrfs_init_path(&path);
11039         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11040         if (ret < 0)
11041                 goto out;
11042
11043         /* Iterate extent tree to account used space */
11044         while (1) {
11045                 leaf = path.nodes[0];
11046
11047                 /* Search slot can point to the last item beyond leaf nritems */
11048                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11049                         goto next;
11050
11051                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11052                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11053                         break;
11054
11055                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11056                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11057                         goto next;
11058                 if (extent_key.objectid < bg_key.objectid)
11059                         goto next;
11060
11061                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11062                         total += nodesize;
11063                 else
11064                         total += extent_key.offset;
11065
11066                 ei = btrfs_item_ptr(leaf, path.slots[0],
11067                                     struct btrfs_extent_item);
11068                 flags = btrfs_extent_flags(leaf, ei);
11069                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11070                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11071                                 error(
11072                         "bad extent[%llu, %llu) type mismatch with chunk",
11073                                         extent_key.objectid,
11074                                         extent_key.objectid + extent_key.offset);
11075                                 err |= CHUNK_TYPE_MISMATCH;
11076                         }
11077                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11078                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11079                                     BTRFS_BLOCK_GROUP_METADATA))) {
11080                                 error(
11081                         "bad extent[%llu, %llu) type mismatch with chunk",
11082                                         extent_key.objectid,
11083                                         extent_key.objectid + nodesize);
11084                                 err |= CHUNK_TYPE_MISMATCH;
11085                         }
11086                 }
11087 next:
11088                 ret = btrfs_next_item(extent_root, &path);
11089                 if (ret)
11090                         break;
11091         }
11092
11093 out:
11094         btrfs_release_path(&path);
11095
11096         if (total != used) {
11097                 error(
11098                 "block group[%llu %llu] used %llu but extent items used %llu",
11099                         bg_key.objectid, bg_key.offset, used, total);
11100                 err |= ACCOUNTING_MISMATCH;
11101         }
11102         return err;
11103 }
11104
11105 /*
11106  * Check a chunk item.
11107  * Including checking all referred dev_extents and block group
11108  */
11109 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11110                             struct extent_buffer *eb, int slot)
11111 {
11112         struct btrfs_root *extent_root = fs_info->extent_root;
11113         struct btrfs_root *dev_root = fs_info->dev_root;
11114         struct btrfs_path path;
11115         struct btrfs_key chunk_key;
11116         struct btrfs_key bg_key;
11117         struct btrfs_key devext_key;
11118         struct btrfs_chunk *chunk;
11119         struct extent_buffer *leaf;
11120         struct btrfs_block_group_item *bi;
11121         struct btrfs_block_group_item bg_item;
11122         struct btrfs_dev_extent *ptr;
11123         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11124         u64 length;
11125         u64 chunk_end;
11126         u64 type;
11127         u64 profile;
11128         int num_stripes;
11129         u64 offset;
11130         u64 objectid;
11131         int i;
11132         int ret;
11133         int err = 0;
11134
11135         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11136         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11137         length = btrfs_chunk_length(eb, chunk);
11138         chunk_end = chunk_key.offset + length;
11139         if (!IS_ALIGNED(length, sectorsize)) {
11140                 error("chunk[%llu %llu) not aligned to %u",
11141                         chunk_key.offset, chunk_end, sectorsize);
11142                 err |= BYTES_UNALIGNED;
11143                 goto out;
11144         }
11145
11146         type = btrfs_chunk_type(eb, chunk);
11147         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11148         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11149                 error("chunk[%llu %llu) has no chunk type",
11150                         chunk_key.offset, chunk_end);
11151                 err |= UNKNOWN_TYPE;
11152         }
11153         if (profile && (profile & (profile - 1))) {
11154                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11155                         chunk_key.offset, chunk_end, profile);
11156                 err |= UNKNOWN_TYPE;
11157         }
11158
11159         bg_key.objectid = chunk_key.offset;
11160         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11161         bg_key.offset = length;
11162
11163         btrfs_init_path(&path);
11164         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11165         if (ret) {
11166                 error(
11167                 "chunk[%llu %llu) did not find the related block group item",
11168                         chunk_key.offset, chunk_end);
11169                 err |= REFERENCER_MISSING;
11170         } else{
11171                 leaf = path.nodes[0];
11172                 bi = btrfs_item_ptr(leaf, path.slots[0],
11173                                     struct btrfs_block_group_item);
11174                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11175                                    sizeof(bg_item));
11176                 if (btrfs_block_group_flags(&bg_item) != type) {
11177                         error(
11178 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11179                                 chunk_key.offset, chunk_end, type,
11180                                 btrfs_block_group_flags(&bg_item));
11181                         err |= REFERENCER_MISSING;
11182                 }
11183         }
11184
11185         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11186         for (i = 0; i < num_stripes; i++) {
11187                 btrfs_release_path(&path);
11188                 btrfs_init_path(&path);
11189                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11190                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11191                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11192
11193                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11194                                         0, 0);
11195                 if (ret)
11196                         goto not_match_dev;
11197
11198                 leaf = path.nodes[0];
11199                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11200                                      struct btrfs_dev_extent);
11201                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11202                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11203                 if (objectid != chunk_key.objectid ||
11204                     offset != chunk_key.offset ||
11205                     btrfs_dev_extent_length(leaf, ptr) != length)
11206                         goto not_match_dev;
11207                 continue;
11208 not_match_dev:
11209                 err |= BACKREF_MISSING;
11210                 error(
11211                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11212                         chunk_key.objectid, chunk_end, i);
11213                 continue;
11214         }
11215         btrfs_release_path(&path);
11216 out:
11217         return err;
11218 }
11219
11220 /*
11221  * Main entry function to check known items and update related accounting info
11222  */
11223 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11224 {
11225         struct btrfs_fs_info *fs_info = root->fs_info;
11226         struct btrfs_key key;
11227         int slot = 0;
11228         int type;
11229         struct btrfs_extent_data_ref *dref;
11230         int ret;
11231         int err = 0;
11232
11233 next:
11234         btrfs_item_key_to_cpu(eb, &key, slot);
11235         type = key.type;
11236
11237         switch (type) {
11238         case BTRFS_EXTENT_DATA_KEY:
11239                 ret = check_extent_data_item(root, eb, slot);
11240                 err |= ret;
11241                 break;
11242         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11243                 ret = check_block_group_item(fs_info, eb, slot);
11244                 err |= ret;
11245                 break;
11246         case BTRFS_DEV_ITEM_KEY:
11247                 ret = check_dev_item(fs_info, eb, slot);
11248                 err |= ret;
11249                 break;
11250         case BTRFS_CHUNK_ITEM_KEY:
11251                 ret = check_chunk_item(fs_info, eb, slot);
11252                 err |= ret;
11253                 break;
11254         case BTRFS_DEV_EXTENT_KEY:
11255                 ret = check_dev_extent_item(fs_info, eb, slot);
11256                 err |= ret;
11257                 break;
11258         case BTRFS_EXTENT_ITEM_KEY:
11259         case BTRFS_METADATA_ITEM_KEY:
11260                 ret = check_extent_item(fs_info, eb, slot);
11261                 err |= ret;
11262                 break;
11263         case BTRFS_EXTENT_CSUM_KEY:
11264                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11265                 break;
11266         case BTRFS_TREE_BLOCK_REF_KEY:
11267                 ret = check_tree_block_backref(fs_info, key.offset,
11268                                                key.objectid, -1);
11269                 err |= ret;
11270                 break;
11271         case BTRFS_EXTENT_DATA_REF_KEY:
11272                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11273                 ret = check_extent_data_backref(fs_info,
11274                                 btrfs_extent_data_ref_root(eb, dref),
11275                                 btrfs_extent_data_ref_objectid(eb, dref),
11276                                 btrfs_extent_data_ref_offset(eb, dref),
11277                                 key.objectid, 0,
11278                                 btrfs_extent_data_ref_count(eb, dref));
11279                 err |= ret;
11280                 break;
11281         case BTRFS_SHARED_BLOCK_REF_KEY:
11282                 ret = check_shared_block_backref(fs_info, key.offset,
11283                                                  key.objectid, -1);
11284                 err |= ret;
11285                 break;
11286         case BTRFS_SHARED_DATA_REF_KEY:
11287                 ret = check_shared_data_backref(fs_info, key.offset,
11288                                                 key.objectid);
11289                 err |= ret;
11290                 break;
11291         default:
11292                 break;
11293         }
11294
11295         if (++slot < btrfs_header_nritems(eb))
11296                 goto next;
11297
11298         return err;
11299 }
11300
11301 /*
11302  * Helper function for later fs/subvol tree check.  To determine if a tree
11303  * block should be checked.
11304  * This function will ensure only the direct referencer with lowest rootid to
11305  * check a fs/subvolume tree block.
11306  *
11307  * Backref check at extent tree would detect errors like missing subvolume
11308  * tree, so we can do aggressive check to reduce duplicated checks.
11309  */
11310 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11311 {
11312         struct btrfs_root *extent_root = root->fs_info->extent_root;
11313         struct btrfs_key key;
11314         struct btrfs_path path;
11315         struct extent_buffer *leaf;
11316         int slot;
11317         struct btrfs_extent_item *ei;
11318         unsigned long ptr;
11319         unsigned long end;
11320         int type;
11321         u32 item_size;
11322         u64 offset;
11323         struct btrfs_extent_inline_ref *iref;
11324         int ret;
11325
11326         btrfs_init_path(&path);
11327         key.objectid = btrfs_header_bytenr(eb);
11328         key.type = BTRFS_METADATA_ITEM_KEY;
11329         key.offset = (u64)-1;
11330
11331         /*
11332          * Any failure in backref resolving means we can't determine
11333          * whom the tree block belongs to.
11334          * So in that case, we need to check that tree block
11335          */
11336         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11337         if (ret < 0)
11338                 goto need_check;
11339
11340         ret = btrfs_previous_extent_item(extent_root, &path,
11341                                          btrfs_header_bytenr(eb));
11342         if (ret)
11343                 goto need_check;
11344
11345         leaf = path.nodes[0];
11346         slot = path.slots[0];
11347         btrfs_item_key_to_cpu(leaf, &key, slot);
11348         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11349
11350         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11351                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11352         } else {
11353                 struct btrfs_tree_block_info *info;
11354
11355                 info = (struct btrfs_tree_block_info *)(ei + 1);
11356                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11357         }
11358
11359         item_size = btrfs_item_size_nr(leaf, slot);
11360         ptr = (unsigned long)iref;
11361         end = (unsigned long)ei + item_size;
11362         while (ptr < end) {
11363                 iref = (struct btrfs_extent_inline_ref *)ptr;
11364                 type = btrfs_extent_inline_ref_type(leaf, iref);
11365                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11366
11367                 /*
11368                  * We only check the tree block if current root is
11369                  * the lowest referencer of it.
11370                  */
11371                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11372                     offset < root->objectid) {
11373                         btrfs_release_path(&path);
11374                         return 0;
11375                 }
11376
11377                 ptr += btrfs_extent_inline_ref_size(type);
11378         }
11379         /*
11380          * Normally we should also check keyed tree block ref, but that may be
11381          * very time consuming.  Inlined ref should already make us skip a lot
11382          * of refs now.  So skip search keyed tree block ref.
11383          */
11384
11385 need_check:
11386         btrfs_release_path(&path);
11387         return 1;
11388 }
11389
11390 /*
11391  * Traversal function for tree block. We will do:
11392  * 1) Skip shared fs/subvolume tree blocks
11393  * 2) Update related bytes accounting
11394  * 3) Pre-order traversal
11395  */
11396 static int traverse_tree_block(struct btrfs_root *root,
11397                                 struct extent_buffer *node)
11398 {
11399         struct extent_buffer *eb;
11400         struct btrfs_key key;
11401         struct btrfs_key drop_key;
11402         int level;
11403         u64 nr;
11404         int i;
11405         int err = 0;
11406         int ret;
11407
11408         /*
11409          * Skip shared fs/subvolume tree block, in that case they will
11410          * be checked by referencer with lowest rootid
11411          */
11412         if (is_fstree(root->objectid) && !should_check(root, node))
11413                 return 0;
11414
11415         /* Update bytes accounting */
11416         total_btree_bytes += node->len;
11417         if (fs_root_objectid(btrfs_header_owner(node)))
11418                 total_fs_tree_bytes += node->len;
11419         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11420                 total_extent_tree_bytes += node->len;
11421         if (!found_old_backref &&
11422             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11423             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11424             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11425                 found_old_backref = 1;
11426
11427         /* pre-order tranversal, check itself first */
11428         level = btrfs_header_level(node);
11429         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11430                                    btrfs_header_level(node),
11431                                    btrfs_header_owner(node));
11432         err |= ret;
11433         if (err)
11434                 error(
11435         "check %s failed root %llu bytenr %llu level %d, force continue check",
11436                         level ? "node":"leaf", root->objectid,
11437                         btrfs_header_bytenr(node), btrfs_header_level(node));
11438
11439         if (!level) {
11440                 btree_space_waste += btrfs_leaf_free_space(root, node);
11441                 ret = check_leaf_items(root, node);
11442                 err |= ret;
11443                 return err;
11444         }
11445
11446         nr = btrfs_header_nritems(node);
11447         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11448         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11449                 sizeof(struct btrfs_key_ptr);
11450
11451         /* Then check all its children */
11452         for (i = 0; i < nr; i++) {
11453                 u64 blocknr = btrfs_node_blockptr(node, i);
11454
11455                 btrfs_node_key_to_cpu(node, &key, i);
11456                 if (level == root->root_item.drop_level &&
11457                     is_dropped_key(&key, &drop_key))
11458                         continue;
11459
11460                 /*
11461                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11462                  * to call the function itself.
11463                  */
11464                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11465                 if (extent_buffer_uptodate(eb)) {
11466                         ret = traverse_tree_block(root, eb);
11467                         err |= ret;
11468                 }
11469                 free_extent_buffer(eb);
11470         }
11471
11472         return err;
11473 }
11474
11475 /*
11476  * Low memory usage version check_chunks_and_extents.
11477  */
11478 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11479 {
11480         struct btrfs_path path;
11481         struct btrfs_key key;
11482         struct btrfs_root *root1;
11483         struct btrfs_root *cur_root;
11484         int err = 0;
11485         int ret;
11486
11487         root1 = root->fs_info->chunk_root;
11488         ret = traverse_tree_block(root1, root1->node);
11489         err |= ret;
11490
11491         root1 = root->fs_info->tree_root;
11492         ret = traverse_tree_block(root1, root1->node);
11493         err |= ret;
11494
11495         btrfs_init_path(&path);
11496         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11497         key.offset = 0;
11498         key.type = BTRFS_ROOT_ITEM_KEY;
11499
11500         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11501         if (ret) {
11502                 error("cannot find extent treet in tree_root");
11503                 goto out;
11504         }
11505
11506         while (1) {
11507                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11508                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11509                         goto next;
11510                 key.offset = (u64)-1;
11511
11512                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11513                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11514                                         &key);
11515                 else
11516                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11517                 if (IS_ERR(cur_root) || !cur_root) {
11518                         error("failed to read tree: %lld", key.objectid);
11519                         goto next;
11520                 }
11521
11522                 ret = traverse_tree_block(cur_root, cur_root->node);
11523                 err |= ret;
11524
11525                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11526                         btrfs_free_fs_root(cur_root);
11527 next:
11528                 ret = btrfs_next_item(root1, &path);
11529                 if (ret)
11530                         goto out;
11531         }
11532
11533 out:
11534         btrfs_release_path(&path);
11535         return err;
11536 }
11537
11538 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11539                            struct btrfs_root *root, int overwrite)
11540 {
11541         struct extent_buffer *c;
11542         struct extent_buffer *old = root->node;
11543         int level;
11544         int ret;
11545         struct btrfs_disk_key disk_key = {0,0,0};
11546
11547         level = 0;
11548
11549         if (overwrite) {
11550                 c = old;
11551                 extent_buffer_get(c);
11552                 goto init;
11553         }
11554         c = btrfs_alloc_free_block(trans, root,
11555                                    root->nodesize,
11556                                    root->root_key.objectid,
11557                                    &disk_key, level, 0, 0);
11558         if (IS_ERR(c)) {
11559                 c = old;
11560                 extent_buffer_get(c);
11561                 overwrite = 1;
11562         }
11563 init:
11564         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11565         btrfs_set_header_level(c, level);
11566         btrfs_set_header_bytenr(c, c->start);
11567         btrfs_set_header_generation(c, trans->transid);
11568         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11569         btrfs_set_header_owner(c, root->root_key.objectid);
11570
11571         write_extent_buffer(c, root->fs_info->fsid,
11572                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11573
11574         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11575                             btrfs_header_chunk_tree_uuid(c),
11576                             BTRFS_UUID_SIZE);
11577
11578         btrfs_mark_buffer_dirty(c);
11579         /*
11580          * this case can happen in the following case:
11581          *
11582          * 1.overwrite previous root.
11583          *
11584          * 2.reinit reloc data root, this is because we skip pin
11585          * down reloc data tree before which means we can allocate
11586          * same block bytenr here.
11587          */
11588         if (old->start == c->start) {
11589                 btrfs_set_root_generation(&root->root_item,
11590                                           trans->transid);
11591                 root->root_item.level = btrfs_header_level(root->node);
11592                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11593                                         &root->root_key, &root->root_item);
11594                 if (ret) {
11595                         free_extent_buffer(c);
11596                         return ret;
11597                 }
11598         }
11599         free_extent_buffer(old);
11600         root->node = c;
11601         add_root_to_dirty_list(root);
11602         return 0;
11603 }
11604
11605 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11606                                 struct extent_buffer *eb, int tree_root)
11607 {
11608         struct extent_buffer *tmp;
11609         struct btrfs_root_item *ri;
11610         struct btrfs_key key;
11611         u64 bytenr;
11612         u32 nodesize;
11613         int level = btrfs_header_level(eb);
11614         int nritems;
11615         int ret;
11616         int i;
11617
11618         /*
11619          * If we have pinned this block before, don't pin it again.
11620          * This can not only avoid forever loop with broken filesystem
11621          * but also give us some speedups.
11622          */
11623         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11624                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11625                 return 0;
11626
11627         btrfs_pin_extent(fs_info, eb->start, eb->len);
11628
11629         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11630         nritems = btrfs_header_nritems(eb);
11631         for (i = 0; i < nritems; i++) {
11632                 if (level == 0) {
11633                         btrfs_item_key_to_cpu(eb, &key, i);
11634                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11635                                 continue;
11636                         /* Skip the extent root and reloc roots */
11637                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11638                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11639                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11640                                 continue;
11641                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11642                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11643
11644                         /*
11645                          * If at any point we start needing the real root we
11646                          * will have to build a stump root for the root we are
11647                          * in, but for now this doesn't actually use the root so
11648                          * just pass in extent_root.
11649                          */
11650                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11651                                               nodesize, 0);
11652                         if (!extent_buffer_uptodate(tmp)) {
11653                                 fprintf(stderr, "Error reading root block\n");
11654                                 return -EIO;
11655                         }
11656                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11657                         free_extent_buffer(tmp);
11658                         if (ret)
11659                                 return ret;
11660                 } else {
11661                         bytenr = btrfs_node_blockptr(eb, i);
11662
11663                         /* If we aren't the tree root don't read the block */
11664                         if (level == 1 && !tree_root) {
11665                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11666                                 continue;
11667                         }
11668
11669                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11670                                               nodesize, 0);
11671                         if (!extent_buffer_uptodate(tmp)) {
11672                                 fprintf(stderr, "Error reading tree block\n");
11673                                 return -EIO;
11674                         }
11675                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11676                         free_extent_buffer(tmp);
11677                         if (ret)
11678                                 return ret;
11679                 }
11680         }
11681
11682         return 0;
11683 }
11684
11685 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11686 {
11687         int ret;
11688
11689         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11690         if (ret)
11691                 return ret;
11692
11693         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11694 }
11695
11696 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11697 {
11698         struct btrfs_block_group_cache *cache;
11699         struct btrfs_path path;
11700         struct extent_buffer *leaf;
11701         struct btrfs_chunk *chunk;
11702         struct btrfs_key key;
11703         int ret;
11704         u64 start;
11705
11706         btrfs_init_path(&path);
11707         key.objectid = 0;
11708         key.type = BTRFS_CHUNK_ITEM_KEY;
11709         key.offset = 0;
11710         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11711         if (ret < 0) {
11712                 btrfs_release_path(&path);
11713                 return ret;
11714         }
11715
11716         /*
11717          * We do this in case the block groups were screwed up and had alloc
11718          * bits that aren't actually set on the chunks.  This happens with
11719          * restored images every time and could happen in real life I guess.
11720          */
11721         fs_info->avail_data_alloc_bits = 0;
11722         fs_info->avail_metadata_alloc_bits = 0;
11723         fs_info->avail_system_alloc_bits = 0;
11724
11725         /* First we need to create the in-memory block groups */
11726         while (1) {
11727                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11728                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11729                         if (ret < 0) {
11730                                 btrfs_release_path(&path);
11731                                 return ret;
11732                         }
11733                         if (ret) {
11734                                 ret = 0;
11735                                 break;
11736                         }
11737                 }
11738                 leaf = path.nodes[0];
11739                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11740                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11741                         path.slots[0]++;
11742                         continue;
11743                 }
11744
11745                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11746                 btrfs_add_block_group(fs_info, 0,
11747                                       btrfs_chunk_type(leaf, chunk),
11748                                       key.objectid, key.offset,
11749                                       btrfs_chunk_length(leaf, chunk));
11750                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11751                                  key.offset + btrfs_chunk_length(leaf, chunk));
11752                 path.slots[0]++;
11753         }
11754         start = 0;
11755         while (1) {
11756                 cache = btrfs_lookup_first_block_group(fs_info, start);
11757                 if (!cache)
11758                         break;
11759                 cache->cached = 1;
11760                 start = cache->key.objectid + cache->key.offset;
11761         }
11762
11763         btrfs_release_path(&path);
11764         return 0;
11765 }
11766
11767 static int reset_balance(struct btrfs_trans_handle *trans,
11768                          struct btrfs_fs_info *fs_info)
11769 {
11770         struct btrfs_root *root = fs_info->tree_root;
11771         struct btrfs_path path;
11772         struct extent_buffer *leaf;
11773         struct btrfs_key key;
11774         int del_slot, del_nr = 0;
11775         int ret;
11776         int found = 0;
11777
11778         btrfs_init_path(&path);
11779         key.objectid = BTRFS_BALANCE_OBJECTID;
11780         key.type = BTRFS_BALANCE_ITEM_KEY;
11781         key.offset = 0;
11782         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11783         if (ret) {
11784                 if (ret > 0)
11785                         ret = 0;
11786                 if (!ret)
11787                         goto reinit_data_reloc;
11788                 else
11789                         goto out;
11790         }
11791
11792         ret = btrfs_del_item(trans, root, &path);
11793         if (ret)
11794                 goto out;
11795         btrfs_release_path(&path);
11796
11797         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11798         key.type = BTRFS_ROOT_ITEM_KEY;
11799         key.offset = 0;
11800         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11801         if (ret < 0)
11802                 goto out;
11803         while (1) {
11804                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11805                         if (!found)
11806                                 break;
11807
11808                         if (del_nr) {
11809                                 ret = btrfs_del_items(trans, root, &path,
11810                                                       del_slot, del_nr);
11811                                 del_nr = 0;
11812                                 if (ret)
11813                                         goto out;
11814                         }
11815                         key.offset++;
11816                         btrfs_release_path(&path);
11817
11818                         found = 0;
11819                         ret = btrfs_search_slot(trans, root, &key, &path,
11820                                                 -1, 1);
11821                         if (ret < 0)
11822                                 goto out;
11823                         continue;
11824                 }
11825                 found = 1;
11826                 leaf = path.nodes[0];
11827                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11828                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11829                         break;
11830                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11831                         path.slots[0]++;
11832                         continue;
11833                 }
11834                 if (!del_nr) {
11835                         del_slot = path.slots[0];
11836                         del_nr = 1;
11837                 } else {
11838                         del_nr++;
11839                 }
11840                 path.slots[0]++;
11841         }
11842
11843         if (del_nr) {
11844                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11845                 if (ret)
11846                         goto out;
11847         }
11848         btrfs_release_path(&path);
11849
11850 reinit_data_reloc:
11851         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11852         key.type = BTRFS_ROOT_ITEM_KEY;
11853         key.offset = (u64)-1;
11854         root = btrfs_read_fs_root(fs_info, &key);
11855         if (IS_ERR(root)) {
11856                 fprintf(stderr, "Error reading data reloc tree\n");
11857                 ret = PTR_ERR(root);
11858                 goto out;
11859         }
11860         record_root_in_trans(trans, root);
11861         ret = btrfs_fsck_reinit_root(trans, root, 0);
11862         if (ret)
11863                 goto out;
11864         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11865 out:
11866         btrfs_release_path(&path);
11867         return ret;
11868 }
11869
11870 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11871                               struct btrfs_fs_info *fs_info)
11872 {
11873         u64 start = 0;
11874         int ret;
11875
11876         /*
11877          * The only reason we don't do this is because right now we're just
11878          * walking the trees we find and pinning down their bytes, we don't look
11879          * at any of the leaves.  In order to do mixed groups we'd have to check
11880          * the leaves of any fs roots and pin down the bytes for any file
11881          * extents we find.  Not hard but why do it if we don't have to?
11882          */
11883         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11884                 fprintf(stderr, "We don't support re-initing the extent tree "
11885                         "for mixed block groups yet, please notify a btrfs "
11886                         "developer you want to do this so they can add this "
11887                         "functionality.\n");
11888                 return -EINVAL;
11889         }
11890
11891         /*
11892          * first we need to walk all of the trees except the extent tree and pin
11893          * down the bytes that are in use so we don't overwrite any existing
11894          * metadata.
11895          */
11896         ret = pin_metadata_blocks(fs_info);
11897         if (ret) {
11898                 fprintf(stderr, "error pinning down used bytes\n");
11899                 return ret;
11900         }
11901
11902         /*
11903          * Need to drop all the block groups since we're going to recreate all
11904          * of them again.
11905          */
11906         btrfs_free_block_groups(fs_info);
11907         ret = reset_block_groups(fs_info);
11908         if (ret) {
11909                 fprintf(stderr, "error resetting the block groups\n");
11910                 return ret;
11911         }
11912
11913         /* Ok we can allocate now, reinit the extent root */
11914         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11915         if (ret) {
11916                 fprintf(stderr, "extent root initialization failed\n");
11917                 /*
11918                  * When the transaction code is updated we should end the
11919                  * transaction, but for now progs only knows about commit so
11920                  * just return an error.
11921                  */
11922                 return ret;
11923         }
11924
11925         /*
11926          * Now we have all the in-memory block groups setup so we can make
11927          * allocations properly, and the metadata we care about is safe since we
11928          * pinned all of it above.
11929          */
11930         while (1) {
11931                 struct btrfs_block_group_cache *cache;
11932
11933                 cache = btrfs_lookup_first_block_group(fs_info, start);
11934                 if (!cache)
11935                         break;
11936                 start = cache->key.objectid + cache->key.offset;
11937                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11938                                         &cache->key, &cache->item,
11939                                         sizeof(cache->item));
11940                 if (ret) {
11941                         fprintf(stderr, "Error adding block group\n");
11942                         return ret;
11943                 }
11944                 btrfs_extent_post_op(trans, fs_info->extent_root);
11945         }
11946
11947         ret = reset_balance(trans, fs_info);
11948         if (ret)
11949                 fprintf(stderr, "error resetting the pending balance\n");
11950
11951         return ret;
11952 }
11953
11954 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11955 {
11956         struct btrfs_path path;
11957         struct btrfs_trans_handle *trans;
11958         struct btrfs_key key;
11959         int ret;
11960
11961         printf("Recowing metadata block %llu\n", eb->start);
11962         key.objectid = btrfs_header_owner(eb);
11963         key.type = BTRFS_ROOT_ITEM_KEY;
11964         key.offset = (u64)-1;
11965
11966         root = btrfs_read_fs_root(root->fs_info, &key);
11967         if (IS_ERR(root)) {
11968                 fprintf(stderr, "Couldn't find owner root %llu\n",
11969                         key.objectid);
11970                 return PTR_ERR(root);
11971         }
11972
11973         trans = btrfs_start_transaction(root, 1);
11974         if (IS_ERR(trans))
11975                 return PTR_ERR(trans);
11976
11977         btrfs_init_path(&path);
11978         path.lowest_level = btrfs_header_level(eb);
11979         if (path.lowest_level)
11980                 btrfs_node_key_to_cpu(eb, &key, 0);
11981         else
11982                 btrfs_item_key_to_cpu(eb, &key, 0);
11983
11984         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11985         btrfs_commit_transaction(trans, root);
11986         btrfs_release_path(&path);
11987         return ret;
11988 }
11989
11990 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11991 {
11992         struct btrfs_path path;
11993         struct btrfs_trans_handle *trans;
11994         struct btrfs_key key;
11995         int ret;
11996
11997         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11998                bad->key.type, bad->key.offset);
11999         key.objectid = bad->root_id;
12000         key.type = BTRFS_ROOT_ITEM_KEY;
12001         key.offset = (u64)-1;
12002
12003         root = btrfs_read_fs_root(root->fs_info, &key);
12004         if (IS_ERR(root)) {
12005                 fprintf(stderr, "Couldn't find owner root %llu\n",
12006                         key.objectid);
12007                 return PTR_ERR(root);
12008         }
12009
12010         trans = btrfs_start_transaction(root, 1);
12011         if (IS_ERR(trans))
12012                 return PTR_ERR(trans);
12013
12014         btrfs_init_path(&path);
12015         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12016         if (ret) {
12017                 if (ret > 0)
12018                         ret = 0;
12019                 goto out;
12020         }
12021         ret = btrfs_del_item(trans, root, &path);
12022 out:
12023         btrfs_commit_transaction(trans, root);
12024         btrfs_release_path(&path);
12025         return ret;
12026 }
12027
12028 static int zero_log_tree(struct btrfs_root *root)
12029 {
12030         struct btrfs_trans_handle *trans;
12031         int ret;
12032
12033         trans = btrfs_start_transaction(root, 1);
12034         if (IS_ERR(trans)) {
12035                 ret = PTR_ERR(trans);
12036                 return ret;
12037         }
12038         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12039         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12040         ret = btrfs_commit_transaction(trans, root);
12041         return ret;
12042 }
12043
12044 static int populate_csum(struct btrfs_trans_handle *trans,
12045                          struct btrfs_root *csum_root, char *buf, u64 start,
12046                          u64 len)
12047 {
12048         u64 offset = 0;
12049         u64 sectorsize;
12050         int ret = 0;
12051
12052         while (offset < len) {
12053                 sectorsize = csum_root->sectorsize;
12054                 ret = read_extent_data(csum_root, buf, start + offset,
12055                                        &sectorsize, 0);
12056                 if (ret)
12057                         break;
12058                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12059                                             start + offset, buf, sectorsize);
12060                 if (ret)
12061                         break;
12062                 offset += sectorsize;
12063         }
12064         return ret;
12065 }
12066
12067 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12068                                       struct btrfs_root *csum_root,
12069                                       struct btrfs_root *cur_root)
12070 {
12071         struct btrfs_path path;
12072         struct btrfs_key key;
12073         struct extent_buffer *node;
12074         struct btrfs_file_extent_item *fi;
12075         char *buf = NULL;
12076         u64 start = 0;
12077         u64 len = 0;
12078         int slot = 0;
12079         int ret = 0;
12080
12081         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12082         if (!buf)
12083                 return -ENOMEM;
12084
12085         btrfs_init_path(&path);
12086         key.objectid = 0;
12087         key.offset = 0;
12088         key.type = 0;
12089         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12090         if (ret < 0)
12091                 goto out;
12092         /* Iterate all regular file extents and fill its csum */
12093         while (1) {
12094                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12095
12096                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12097                         goto next;
12098                 node = path.nodes[0];
12099                 slot = path.slots[0];
12100                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12101                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12102                         goto next;
12103                 start = btrfs_file_extent_disk_bytenr(node, fi);
12104                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12105
12106                 ret = populate_csum(trans, csum_root, buf, start, len);
12107                 if (ret == -EEXIST)
12108                         ret = 0;
12109                 if (ret < 0)
12110                         goto out;
12111 next:
12112                 /*
12113                  * TODO: if next leaf is corrupted, jump to nearest next valid
12114                  * leaf.
12115                  */
12116                 ret = btrfs_next_item(cur_root, &path);
12117                 if (ret < 0)
12118                         goto out;
12119                 if (ret > 0) {
12120                         ret = 0;
12121                         goto out;
12122                 }
12123         }
12124
12125 out:
12126         btrfs_release_path(&path);
12127         free(buf);
12128         return ret;
12129 }
12130
12131 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12132                                   struct btrfs_root *csum_root)
12133 {
12134         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12135         struct btrfs_path path;
12136         struct btrfs_root *tree_root = fs_info->tree_root;
12137         struct btrfs_root *cur_root;
12138         struct extent_buffer *node;
12139         struct btrfs_key key;
12140         int slot = 0;
12141         int ret = 0;
12142
12143         btrfs_init_path(&path);
12144         key.objectid = BTRFS_FS_TREE_OBJECTID;
12145         key.offset = 0;
12146         key.type = BTRFS_ROOT_ITEM_KEY;
12147         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12148         if (ret < 0)
12149                 goto out;
12150         if (ret > 0) {
12151                 ret = -ENOENT;
12152                 goto out;
12153         }
12154
12155         while (1) {
12156                 node = path.nodes[0];
12157                 slot = path.slots[0];
12158                 btrfs_item_key_to_cpu(node, &key, slot);
12159                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12160                         goto out;
12161                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12162                         goto next;
12163                 if (!is_fstree(key.objectid))
12164                         goto next;
12165                 key.offset = (u64)-1;
12166
12167                 cur_root = btrfs_read_fs_root(fs_info, &key);
12168                 if (IS_ERR(cur_root) || !cur_root) {
12169                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12170                                 key.objectid);
12171                         goto out;
12172                 }
12173                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12174                                 cur_root);
12175                 if (ret < 0)
12176                         goto out;
12177 next:
12178                 ret = btrfs_next_item(tree_root, &path);
12179                 if (ret > 0) {
12180                         ret = 0;
12181                         goto out;
12182                 }
12183                 if (ret < 0)
12184                         goto out;
12185         }
12186
12187 out:
12188         btrfs_release_path(&path);
12189         return ret;
12190 }
12191
12192 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12193                                       struct btrfs_root *csum_root)
12194 {
12195         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12196         struct btrfs_path path;
12197         struct btrfs_extent_item *ei;
12198         struct extent_buffer *leaf;
12199         char *buf;
12200         struct btrfs_key key;
12201         int ret;
12202
12203         btrfs_init_path(&path);
12204         key.objectid = 0;
12205         key.type = BTRFS_EXTENT_ITEM_KEY;
12206         key.offset = 0;
12207         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12208         if (ret < 0) {
12209                 btrfs_release_path(&path);
12210                 return ret;
12211         }
12212
12213         buf = malloc(csum_root->sectorsize);
12214         if (!buf) {
12215                 btrfs_release_path(&path);
12216                 return -ENOMEM;
12217         }
12218
12219         while (1) {
12220                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12221                         ret = btrfs_next_leaf(extent_root, &path);
12222                         if (ret < 0)
12223                                 break;
12224                         if (ret) {
12225                                 ret = 0;
12226                                 break;
12227                         }
12228                 }
12229                 leaf = path.nodes[0];
12230
12231                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12232                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12233                         path.slots[0]++;
12234                         continue;
12235                 }
12236
12237                 ei = btrfs_item_ptr(leaf, path.slots[0],
12238                                     struct btrfs_extent_item);
12239                 if (!(btrfs_extent_flags(leaf, ei) &
12240                       BTRFS_EXTENT_FLAG_DATA)) {
12241                         path.slots[0]++;
12242                         continue;
12243                 }
12244
12245                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12246                                     key.offset);
12247                 if (ret)
12248                         break;
12249                 path.slots[0]++;
12250         }
12251
12252         btrfs_release_path(&path);
12253         free(buf);
12254         return ret;
12255 }
12256
12257 /*
12258  * Recalculate the csum and put it into the csum tree.
12259  *
12260  * Extent tree init will wipe out all the extent info, so in that case, we
12261  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12262  * will use fs/subvol trees to init the csum tree.
12263  */
12264 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12265                           struct btrfs_root *csum_root,
12266                           int search_fs_tree)
12267 {
12268         if (search_fs_tree)
12269                 return fill_csum_tree_from_fs(trans, csum_root);
12270         else
12271                 return fill_csum_tree_from_extent(trans, csum_root);
12272 }
12273
12274 static void free_roots_info_cache(void)
12275 {
12276         if (!roots_info_cache)
12277                 return;
12278
12279         while (!cache_tree_empty(roots_info_cache)) {
12280                 struct cache_extent *entry;
12281                 struct root_item_info *rii;
12282
12283                 entry = first_cache_extent(roots_info_cache);
12284                 if (!entry)
12285                         break;
12286                 remove_cache_extent(roots_info_cache, entry);
12287                 rii = container_of(entry, struct root_item_info, cache_extent);
12288                 free(rii);
12289         }
12290
12291         free(roots_info_cache);
12292         roots_info_cache = NULL;
12293 }
12294
12295 static int build_roots_info_cache(struct btrfs_fs_info *info)
12296 {
12297         int ret = 0;
12298         struct btrfs_key key;
12299         struct extent_buffer *leaf;
12300         struct btrfs_path path;
12301
12302         if (!roots_info_cache) {
12303                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12304                 if (!roots_info_cache)
12305                         return -ENOMEM;
12306                 cache_tree_init(roots_info_cache);
12307         }
12308
12309         btrfs_init_path(&path);
12310         key.objectid = 0;
12311         key.type = BTRFS_EXTENT_ITEM_KEY;
12312         key.offset = 0;
12313         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12314         if (ret < 0)
12315                 goto out;
12316         leaf = path.nodes[0];
12317
12318         while (1) {
12319                 struct btrfs_key found_key;
12320                 struct btrfs_extent_item *ei;
12321                 struct btrfs_extent_inline_ref *iref;
12322                 int slot = path.slots[0];
12323                 int type;
12324                 u64 flags;
12325                 u64 root_id;
12326                 u8 level;
12327                 struct cache_extent *entry;
12328                 struct root_item_info *rii;
12329
12330                 if (slot >= btrfs_header_nritems(leaf)) {
12331                         ret = btrfs_next_leaf(info->extent_root, &path);
12332                         if (ret < 0) {
12333                                 break;
12334                         } else if (ret) {
12335                                 ret = 0;
12336                                 break;
12337                         }
12338                         leaf = path.nodes[0];
12339                         slot = path.slots[0];
12340                 }
12341
12342                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12343
12344                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12345                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12346                         goto next;
12347
12348                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12349                 flags = btrfs_extent_flags(leaf, ei);
12350
12351                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12352                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12353                         goto next;
12354
12355                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12356                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12357                         level = found_key.offset;
12358                 } else {
12359                         struct btrfs_tree_block_info *binfo;
12360
12361                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12362                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12363                         level = btrfs_tree_block_level(leaf, binfo);
12364                 }
12365
12366                 /*
12367                  * For a root extent, it must be of the following type and the
12368                  * first (and only one) iref in the item.
12369                  */
12370                 type = btrfs_extent_inline_ref_type(leaf, iref);
12371                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12372                         goto next;
12373
12374                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12375                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12376                 if (!entry) {
12377                         rii = malloc(sizeof(struct root_item_info));
12378                         if (!rii) {
12379                                 ret = -ENOMEM;
12380                                 goto out;
12381                         }
12382                         rii->cache_extent.start = root_id;
12383                         rii->cache_extent.size = 1;
12384                         rii->level = (u8)-1;
12385                         entry = &rii->cache_extent;
12386                         ret = insert_cache_extent(roots_info_cache, entry);
12387                         ASSERT(ret == 0);
12388                 } else {
12389                         rii = container_of(entry, struct root_item_info,
12390                                            cache_extent);
12391                 }
12392
12393                 ASSERT(rii->cache_extent.start == root_id);
12394                 ASSERT(rii->cache_extent.size == 1);
12395
12396                 if (level > rii->level || rii->level == (u8)-1) {
12397                         rii->level = level;
12398                         rii->bytenr = found_key.objectid;
12399                         rii->gen = btrfs_extent_generation(leaf, ei);
12400                         rii->node_count = 1;
12401                 } else if (level == rii->level) {
12402                         rii->node_count++;
12403                 }
12404 next:
12405                 path.slots[0]++;
12406         }
12407
12408 out:
12409         btrfs_release_path(&path);
12410
12411         return ret;
12412 }
12413
12414 static int maybe_repair_root_item(struct btrfs_path *path,
12415                                   const struct btrfs_key *root_key,
12416                                   const int read_only_mode)
12417 {
12418         const u64 root_id = root_key->objectid;
12419         struct cache_extent *entry;
12420         struct root_item_info *rii;
12421         struct btrfs_root_item ri;
12422         unsigned long offset;
12423
12424         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12425         if (!entry) {
12426                 fprintf(stderr,
12427                         "Error: could not find extent items for root %llu\n",
12428                         root_key->objectid);
12429                 return -ENOENT;
12430         }
12431
12432         rii = container_of(entry, struct root_item_info, cache_extent);
12433         ASSERT(rii->cache_extent.start == root_id);
12434         ASSERT(rii->cache_extent.size == 1);
12435
12436         if (rii->node_count != 1) {
12437                 fprintf(stderr,
12438                         "Error: could not find btree root extent for root %llu\n",
12439                         root_id);
12440                 return -ENOENT;
12441         }
12442
12443         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12444         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12445
12446         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12447             btrfs_root_level(&ri) != rii->level ||
12448             btrfs_root_generation(&ri) != rii->gen) {
12449
12450                 /*
12451                  * If we're in repair mode but our caller told us to not update
12452                  * the root item, i.e. just check if it needs to be updated, don't
12453                  * print this message, since the caller will call us again shortly
12454                  * for the same root item without read only mode (the caller will
12455                  * open a transaction first).
12456                  */
12457                 if (!(read_only_mode && repair))
12458                         fprintf(stderr,
12459                                 "%sroot item for root %llu,"
12460                                 " current bytenr %llu, current gen %llu, current level %u,"
12461                                 " new bytenr %llu, new gen %llu, new level %u\n",
12462                                 (read_only_mode ? "" : "fixing "),
12463                                 root_id,
12464                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12465                                 btrfs_root_level(&ri),
12466                                 rii->bytenr, rii->gen, rii->level);
12467
12468                 if (btrfs_root_generation(&ri) > rii->gen) {
12469                         fprintf(stderr,
12470                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12471                                 root_id, btrfs_root_generation(&ri), rii->gen);
12472                         return -EINVAL;
12473                 }
12474
12475                 if (!read_only_mode) {
12476                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12477                         btrfs_set_root_level(&ri, rii->level);
12478                         btrfs_set_root_generation(&ri, rii->gen);
12479                         write_extent_buffer(path->nodes[0], &ri,
12480                                             offset, sizeof(ri));
12481                 }
12482
12483                 return 1;
12484         }
12485
12486         return 0;
12487 }
12488
12489 /*
12490  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12491  * caused read-only snapshots to be corrupted if they were created at a moment
12492  * when the source subvolume/snapshot had orphan items. The issue was that the
12493  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12494  * node instead of the post orphan cleanup root node.
12495  * So this function, and its callees, just detects and fixes those cases. Even
12496  * though the regression was for read-only snapshots, this function applies to
12497  * any snapshot/subvolume root.
12498  * This must be run before any other repair code - not doing it so, makes other
12499  * repair code delete or modify backrefs in the extent tree for example, which
12500  * will result in an inconsistent fs after repairing the root items.
12501  */
12502 static int repair_root_items(struct btrfs_fs_info *info)
12503 {
12504         struct btrfs_path path;
12505         struct btrfs_key key;
12506         struct extent_buffer *leaf;
12507         struct btrfs_trans_handle *trans = NULL;
12508         int ret = 0;
12509         int bad_roots = 0;
12510         int need_trans = 0;
12511
12512         btrfs_init_path(&path);
12513
12514         ret = build_roots_info_cache(info);
12515         if (ret)
12516                 goto out;
12517
12518         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12519         key.type = BTRFS_ROOT_ITEM_KEY;
12520         key.offset = 0;
12521
12522 again:
12523         /*
12524          * Avoid opening and committing transactions if a leaf doesn't have
12525          * any root items that need to be fixed, so that we avoid rotating
12526          * backup roots unnecessarily.
12527          */
12528         if (need_trans) {
12529                 trans = btrfs_start_transaction(info->tree_root, 1);
12530                 if (IS_ERR(trans)) {
12531                         ret = PTR_ERR(trans);
12532                         goto out;
12533                 }
12534         }
12535
12536         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12537                                 0, trans ? 1 : 0);
12538         if (ret < 0)
12539                 goto out;
12540         leaf = path.nodes[0];
12541
12542         while (1) {
12543                 struct btrfs_key found_key;
12544
12545                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12546                         int no_more_keys = find_next_key(&path, &key);
12547
12548                         btrfs_release_path(&path);
12549                         if (trans) {
12550                                 ret = btrfs_commit_transaction(trans,
12551                                                                info->tree_root);
12552                                 trans = NULL;
12553                                 if (ret < 0)
12554                                         goto out;
12555                         }
12556                         need_trans = 0;
12557                         if (no_more_keys)
12558                                 break;
12559                         goto again;
12560                 }
12561
12562                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12563
12564                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12565                         goto next;
12566                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12567                         goto next;
12568
12569                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12570                 if (ret < 0)
12571                         goto out;
12572                 if (ret) {
12573                         if (!trans && repair) {
12574                                 need_trans = 1;
12575                                 key = found_key;
12576                                 btrfs_release_path(&path);
12577                                 goto again;
12578                         }
12579                         bad_roots++;
12580                 }
12581 next:
12582                 path.slots[0]++;
12583         }
12584         ret = 0;
12585 out:
12586         free_roots_info_cache();
12587         btrfs_release_path(&path);
12588         if (trans)
12589                 btrfs_commit_transaction(trans, info->tree_root);
12590         if (ret < 0)
12591                 return ret;
12592
12593         return bad_roots;
12594 }
12595
12596 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12597 {
12598         struct btrfs_trans_handle *trans;
12599         struct btrfs_block_group_cache *bg_cache;
12600         u64 current = 0;
12601         int ret = 0;
12602
12603         /* Clear all free space cache inodes and its extent data */
12604         while (1) {
12605                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12606                 if (!bg_cache)
12607                         break;
12608                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12609                 if (ret < 0)
12610                         return ret;
12611                 current = bg_cache->key.objectid + bg_cache->key.offset;
12612         }
12613
12614         /* Don't forget to set cache_generation to -1 */
12615         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12616         if (IS_ERR(trans)) {
12617                 error("failed to update super block cache generation");
12618                 return PTR_ERR(trans);
12619         }
12620         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12621         btrfs_commit_transaction(trans, fs_info->tree_root);
12622
12623         return ret;
12624 }
12625
12626 const char * const cmd_check_usage[] = {
12627         "btrfs check [options] <device>",
12628         "Check structural integrity of a filesystem (unmounted).",
12629         "Check structural integrity of an unmounted filesystem. Verify internal",
12630         "trees' consistency and item connectivity. In the repair mode try to",
12631         "fix the problems found. ",
12632         "WARNING: the repair mode is considered dangerous",
12633         "",
12634         "-s|--super <superblock>     use this superblock copy",
12635         "-b|--backup                 use the first valid backup root copy",
12636         "--repair                    try to repair the filesystem",
12637         "--readonly                  run in read-only mode (default)",
12638         "--init-csum-tree            create a new CRC tree",
12639         "--init-extent-tree          create a new extent tree",
12640         "--mode <MODE>               allows choice of memory/IO trade-offs",
12641         "                            where MODE is one of:",
12642         "                            original - read inodes and extents to memory (requires",
12643         "                                       more memory, does less IO)",
12644         "                            lowmem   - try to use less memory but read blocks again",
12645         "                                       when needed",
12646         "--check-data-csum           verify checksums of data blocks",
12647         "-Q|--qgroup-report          print a report on qgroup consistency",
12648         "-E|--subvol-extents <subvolid>",
12649         "                            print subvolume extents and sharing state",
12650         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12651         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12652         "-p|--progress               indicate progress",
12653         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12654         NULL
12655 };
12656
12657 int cmd_check(int argc, char **argv)
12658 {
12659         struct cache_tree root_cache;
12660         struct btrfs_root *root;
12661         struct btrfs_fs_info *info;
12662         u64 bytenr = 0;
12663         u64 subvolid = 0;
12664         u64 tree_root_bytenr = 0;
12665         u64 chunk_root_bytenr = 0;
12666         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12667         int ret;
12668         int err = 0;
12669         u64 num;
12670         int init_csum_tree = 0;
12671         int readonly = 0;
12672         int clear_space_cache = 0;
12673         int qgroup_report = 0;
12674         int qgroups_repaired = 0;
12675         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12676
12677         while(1) {
12678                 int c;
12679                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12680                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12681                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12682                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12683                 static const struct option long_options[] = {
12684                         { "super", required_argument, NULL, 's' },
12685                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12686                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12687                         { "init-csum-tree", no_argument, NULL,
12688                                 GETOPT_VAL_INIT_CSUM },
12689                         { "init-extent-tree", no_argument, NULL,
12690                                 GETOPT_VAL_INIT_EXTENT },
12691                         { "check-data-csum", no_argument, NULL,
12692                                 GETOPT_VAL_CHECK_CSUM },
12693                         { "backup", no_argument, NULL, 'b' },
12694                         { "subvol-extents", required_argument, NULL, 'E' },
12695                         { "qgroup-report", no_argument, NULL, 'Q' },
12696                         { "tree-root", required_argument, NULL, 'r' },
12697                         { "chunk-root", required_argument, NULL,
12698                                 GETOPT_VAL_CHUNK_TREE },
12699                         { "progress", no_argument, NULL, 'p' },
12700                         { "mode", required_argument, NULL,
12701                                 GETOPT_VAL_MODE },
12702                         { "clear-space-cache", required_argument, NULL,
12703                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12704                         { NULL, 0, NULL, 0}
12705                 };
12706
12707                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12708                 if (c < 0)
12709                         break;
12710                 switch(c) {
12711                         case 'a': /* ignored */ break;
12712                         case 'b':
12713                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12714                                 break;
12715                         case 's':
12716                                 num = arg_strtou64(optarg);
12717                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12718                                         error(
12719                                         "super mirror should be less than %d",
12720                                                 BTRFS_SUPER_MIRROR_MAX);
12721                                         exit(1);
12722                                 }
12723                                 bytenr = btrfs_sb_offset(((int)num));
12724                                 printf("using SB copy %llu, bytenr %llu\n", num,
12725                                        (unsigned long long)bytenr);
12726                                 break;
12727                         case 'Q':
12728                                 qgroup_report = 1;
12729                                 break;
12730                         case 'E':
12731                                 subvolid = arg_strtou64(optarg);
12732                                 break;
12733                         case 'r':
12734                                 tree_root_bytenr = arg_strtou64(optarg);
12735                                 break;
12736                         case GETOPT_VAL_CHUNK_TREE:
12737                                 chunk_root_bytenr = arg_strtou64(optarg);
12738                                 break;
12739                         case 'p':
12740                                 ctx.progress_enabled = true;
12741                                 break;
12742                         case '?':
12743                         case 'h':
12744                                 usage(cmd_check_usage);
12745                         case GETOPT_VAL_REPAIR:
12746                                 printf("enabling repair mode\n");
12747                                 repair = 1;
12748                                 ctree_flags |= OPEN_CTREE_WRITES;
12749                                 break;
12750                         case GETOPT_VAL_READONLY:
12751                                 readonly = 1;
12752                                 break;
12753                         case GETOPT_VAL_INIT_CSUM:
12754                                 printf("Creating a new CRC tree\n");
12755                                 init_csum_tree = 1;
12756                                 repair = 1;
12757                                 ctree_flags |= OPEN_CTREE_WRITES;
12758                                 break;
12759                         case GETOPT_VAL_INIT_EXTENT:
12760                                 init_extent_tree = 1;
12761                                 ctree_flags |= (OPEN_CTREE_WRITES |
12762                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12763                                 repair = 1;
12764                                 break;
12765                         case GETOPT_VAL_CHECK_CSUM:
12766                                 check_data_csum = 1;
12767                                 break;
12768                         case GETOPT_VAL_MODE:
12769                                 check_mode = parse_check_mode(optarg);
12770                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12771                                         error("unknown mode: %s", optarg);
12772                                         exit(1);
12773                                 }
12774                                 break;
12775                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12776                                 if (strcmp(optarg, "v1") == 0) {
12777                                         clear_space_cache = 1;
12778                                 } else if (strcmp(optarg, "v2") == 0) {
12779                                         clear_space_cache = 2;
12780                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12781                                 } else {
12782                                         error(
12783                 "invalid argument to --clear-space-cache, must be v1 or v2");
12784                                         exit(1);
12785                                 }
12786                                 ctree_flags |= OPEN_CTREE_WRITES;
12787                                 break;
12788                 }
12789         }
12790
12791         if (check_argc_exact(argc - optind, 1))
12792                 usage(cmd_check_usage);
12793
12794         if (ctx.progress_enabled) {
12795                 ctx.tp = TASK_NOTHING;
12796                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12797         }
12798
12799         /* This check is the only reason for --readonly to exist */
12800         if (readonly && repair) {
12801                 error("repair options are not compatible with --readonly");
12802                 exit(1);
12803         }
12804
12805         /*
12806          * Not supported yet
12807          */
12808         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12809                 error("low memory mode doesn't support repair yet");
12810                 exit(1);
12811         }
12812
12813         radix_tree_init();
12814         cache_tree_init(&root_cache);
12815
12816         if((ret = check_mounted(argv[optind])) < 0) {
12817                 error("could not check mount status: %s", strerror(-ret));
12818                 err |= !!ret;
12819                 goto err_out;
12820         } else if(ret) {
12821                 error("%s is currently mounted, aborting", argv[optind]);
12822                 ret = -EBUSY;
12823                 err |= !!ret;
12824                 goto err_out;
12825         }
12826
12827         /* only allow partial opening under repair mode */
12828         if (repair)
12829                 ctree_flags |= OPEN_CTREE_PARTIAL;
12830
12831         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12832                                   chunk_root_bytenr, ctree_flags);
12833         if (!info) {
12834                 error("cannot open file system");
12835                 ret = -EIO;
12836                 err |= !!ret;
12837                 goto err_out;
12838         }
12839
12840         global_info = info;
12841         root = info->fs_root;
12842         if (clear_space_cache == 1) {
12843                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12844                         error(
12845                 "free space cache v2 detected, use --clear-space-cache v2");
12846                         ret = 1;
12847                         goto close_out;
12848                 }
12849                 printf("Clearing free space cache\n");
12850                 ret = clear_free_space_cache(info);
12851                 if (ret) {
12852                         error("failed to clear free space cache");
12853                         ret = 1;
12854                 } else {
12855                         printf("Free space cache cleared\n");
12856                 }
12857                 goto close_out;
12858         } else if (clear_space_cache == 2) {
12859                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12860                         printf("no free space cache v2 to clear\n");
12861                         ret = 0;
12862                         goto close_out;
12863                 }
12864                 printf("Clear free space cache v2\n");
12865                 ret = btrfs_clear_free_space_tree(info);
12866                 if (ret) {
12867                         error("failed to clear free space cache v2: %d", ret);
12868                         ret = 1;
12869                 } else {
12870                         printf("free space cache v2 cleared\n");
12871                 }
12872                 goto close_out;
12873         }
12874
12875         /*
12876          * repair mode will force us to commit transaction which
12877          * will make us fail to load log tree when mounting.
12878          */
12879         if (repair && btrfs_super_log_root(info->super_copy)) {
12880                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12881                 if (!ret) {
12882                         ret = 1;
12883                         err |= !!ret;
12884                         goto close_out;
12885                 }
12886                 ret = zero_log_tree(root);
12887                 err |= !!ret;
12888                 if (ret) {
12889                         error("failed to zero log tree: %d", ret);
12890                         goto close_out;
12891                 }
12892         }
12893
12894         uuid_unparse(info->super_copy->fsid, uuidbuf);
12895         if (qgroup_report) {
12896                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12897                        uuidbuf);
12898                 ret = qgroup_verify_all(info);
12899                 err |= !!ret;
12900                 if (ret == 0)
12901                         report_qgroups(1);
12902                 goto close_out;
12903         }
12904         if (subvolid) {
12905                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12906                        subvolid, argv[optind], uuidbuf);
12907                 ret = print_extent_state(info, subvolid);
12908                 err |= !!ret;
12909                 goto close_out;
12910         }
12911         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12912
12913         if (!extent_buffer_uptodate(info->tree_root->node) ||
12914             !extent_buffer_uptodate(info->dev_root->node) ||
12915             !extent_buffer_uptodate(info->chunk_root->node)) {
12916                 error("critical roots corrupted, unable to check the filesystem");
12917                 err |= !!ret;
12918                 ret = -EIO;
12919                 goto close_out;
12920         }
12921
12922         if (init_extent_tree || init_csum_tree) {
12923                 struct btrfs_trans_handle *trans;
12924
12925                 trans = btrfs_start_transaction(info->extent_root, 0);
12926                 if (IS_ERR(trans)) {
12927                         error("error starting transaction");
12928                         ret = PTR_ERR(trans);
12929                         err |= !!ret;
12930                         goto close_out;
12931                 }
12932
12933                 if (init_extent_tree) {
12934                         printf("Creating a new extent tree\n");
12935                         ret = reinit_extent_tree(trans, info);
12936                         err |= !!ret;
12937                         if (ret)
12938                                 goto close_out;
12939                 }
12940
12941                 if (init_csum_tree) {
12942                         printf("Reinitialize checksum tree\n");
12943                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12944                         if (ret) {
12945                                 error("checksum tree initialization failed: %d",
12946                                                 ret);
12947                                 ret = -EIO;
12948                                 err |= !!ret;
12949                                 goto close_out;
12950                         }
12951
12952                         ret = fill_csum_tree(trans, info->csum_root,
12953                                              init_extent_tree);
12954                         err |= !!ret;
12955                         if (ret) {
12956                                 error("checksum tree refilling failed: %d", ret);
12957                                 return -EIO;
12958                         }
12959                 }
12960                 /*
12961                  * Ok now we commit and run the normal fsck, which will add
12962                  * extent entries for all of the items it finds.
12963                  */
12964                 ret = btrfs_commit_transaction(trans, info->extent_root);
12965                 err |= !!ret;
12966                 if (ret)
12967                         goto close_out;
12968         }
12969         if (!extent_buffer_uptodate(info->extent_root->node)) {
12970                 error("critical: extent_root, unable to check the filesystem");
12971                 ret = -EIO;
12972                 err |= !!ret;
12973                 goto close_out;
12974         }
12975         if (!extent_buffer_uptodate(info->csum_root->node)) {
12976                 error("critical: csum_root, unable to check the filesystem");
12977                 ret = -EIO;
12978                 err |= !!ret;
12979                 goto close_out;
12980         }
12981
12982         if (!ctx.progress_enabled)
12983                 fprintf(stderr, "checking extents\n");
12984         if (check_mode == CHECK_MODE_LOWMEM)
12985                 ret = check_chunks_and_extents_v2(root);
12986         else
12987                 ret = check_chunks_and_extents(root);
12988         err |= !!ret;
12989         if (ret)
12990                 error(
12991                 "errors found in extent allocation tree or chunk allocation");
12992
12993         ret = repair_root_items(info);
12994         err |= !!ret;
12995         if (ret < 0) {
12996                 error("failed to repair root items: %s", strerror(-ret));
12997                 goto close_out;
12998         }
12999         if (repair) {
13000                 fprintf(stderr, "Fixed %d roots.\n", ret);
13001                 ret = 0;
13002         } else if (ret > 0) {
13003                 fprintf(stderr,
13004                        "Found %d roots with an outdated root item.\n",
13005                        ret);
13006                 fprintf(stderr,
13007                         "Please run a filesystem check with the option --repair to fix them.\n");
13008                 ret = 1;
13009                 err |= !!ret;
13010                 goto close_out;
13011         }
13012
13013         if (!ctx.progress_enabled) {
13014                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13015                         fprintf(stderr, "checking free space tree\n");
13016                 else
13017                         fprintf(stderr, "checking free space cache\n");
13018         }
13019         ret = check_space_cache(root);
13020         err |= !!ret;
13021         if (ret) {
13022                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13023                         error("errors found in free space tree");
13024                 else
13025                         error("errors found in free space cache");
13026                 goto out;
13027         }
13028
13029         /*
13030          * We used to have to have these hole extents in between our real
13031          * extents so if we don't have this flag set we need to make sure there
13032          * are no gaps in the file extents for inodes, otherwise we can just
13033          * ignore it when this happens.
13034          */
13035         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13036         if (!ctx.progress_enabled)
13037                 fprintf(stderr, "checking fs roots\n");
13038         if (check_mode == CHECK_MODE_LOWMEM)
13039                 ret = check_fs_roots_v2(root->fs_info);
13040         else
13041                 ret = check_fs_roots(root, &root_cache);
13042         err |= !!ret;
13043         if (ret) {
13044                 error("errors found in fs roots");
13045                 goto out;
13046         }
13047
13048         fprintf(stderr, "checking csums\n");
13049         ret = check_csums(root);
13050         err |= !!ret;
13051         if (ret) {
13052                 error("errors found in csum tree");
13053                 goto out;
13054         }
13055
13056         fprintf(stderr, "checking root refs\n");
13057         /* For low memory mode, check_fs_roots_v2 handles root refs */
13058         if (check_mode != CHECK_MODE_LOWMEM) {
13059                 ret = check_root_refs(root, &root_cache);
13060                 err |= !!ret;
13061                 if (ret) {
13062                         error("errors found in root refs");
13063                         goto out;
13064                 }
13065         }
13066
13067         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13068                 struct extent_buffer *eb;
13069
13070                 eb = list_first_entry(&root->fs_info->recow_ebs,
13071                                       struct extent_buffer, recow);
13072                 list_del_init(&eb->recow);
13073                 ret = recow_extent_buffer(root, eb);
13074                 err |= !!ret;
13075                 if (ret) {
13076                         error("fails to fix transid errors");
13077                         break;
13078                 }
13079         }
13080
13081         while (!list_empty(&delete_items)) {
13082                 struct bad_item *bad;
13083
13084                 bad = list_first_entry(&delete_items, struct bad_item, list);
13085                 list_del_init(&bad->list);
13086                 if (repair) {
13087                         ret = delete_bad_item(root, bad);
13088                         err |= !!ret;
13089                 }
13090                 free(bad);
13091         }
13092
13093         if (info->quota_enabled) {
13094                 fprintf(stderr, "checking quota groups\n");
13095                 ret = qgroup_verify_all(info);
13096                 err |= !!ret;
13097                 if (ret) {
13098                         error("failed to check quota groups");
13099                         goto out;
13100                 }
13101                 report_qgroups(0);
13102                 ret = repair_qgroups(info, &qgroups_repaired);
13103                 err |= !!ret;
13104                 if (err) {
13105                         error("failed to repair quota groups");
13106                         goto out;
13107                 }
13108                 ret = 0;
13109         }
13110
13111         if (!list_empty(&root->fs_info->recow_ebs)) {
13112                 error("transid errors in file system");
13113                 ret = 1;
13114                 err |= !!ret;
13115         }
13116 out:
13117         if (found_old_backref) { /*
13118                  * there was a disk format change when mixed
13119                  * backref was in testing tree. The old format
13120                  * existed about one week.
13121                  */
13122                 printf("\n * Found old mixed backref format. "
13123                        "The old format is not supported! *"
13124                        "\n * Please mount the FS in readonly mode, "
13125                        "backup data and re-format the FS. *\n\n");
13126                 err |= 1;
13127         }
13128         printf("found %llu bytes used, ",
13129                (unsigned long long)bytes_used);
13130         if (err)
13131                 printf("error(s) found\n");
13132         else
13133                 printf("no error found\n");
13134         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13135         printf("total tree bytes: %llu\n",
13136                (unsigned long long)total_btree_bytes);
13137         printf("total fs tree bytes: %llu\n",
13138                (unsigned long long)total_fs_tree_bytes);
13139         printf("total extent tree bytes: %llu\n",
13140                (unsigned long long)total_extent_tree_bytes);
13141         printf("btree space waste bytes: %llu\n",
13142                (unsigned long long)btree_space_waste);
13143         printf("file data blocks allocated: %llu\n referenced %llu\n",
13144                 (unsigned long long)data_bytes_allocated,
13145                 (unsigned long long)data_bytes_referenced);
13146
13147         free_qgroup_counts();
13148         free_root_recs_tree(&root_cache);
13149 close_out:
13150         close_ctree(root);
13151 err_out:
13152         if (ctx.progress_enabled)
13153                 task_deinit(ctx.info);
13154
13155         return err;
13156 }