btrfs-progs: check: Fix heap use after free
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 /*
1872  * Returns >0  Found error, not fatal, should continue
1873  * Returns <0  Fatal error, must exit the whole check
1874  * Returns 0   No errors found
1875  */
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877                                struct node_refs *nrefs, int *level, int ext_ref)
1878 {
1879         struct extent_buffer *cur = path->nodes[0];
1880         struct btrfs_key key;
1881         u64 cur_bytenr;
1882         u32 nritems;
1883         u64 first_ino = 0;
1884         int root_level = btrfs_header_level(root->node);
1885         int i;
1886         int ret = 0; /* Final return value */
1887         int err = 0; /* Positive error bitmap */
1888
1889         cur_bytenr = cur->start;
1890
1891         /* skip to first inode item or the first inode number change */
1892         nritems = btrfs_header_nritems(cur);
1893         for (i = 0; i < nritems; i++) {
1894                 btrfs_item_key_to_cpu(cur, &key, i);
1895                 if (i == 0)
1896                         first_ino = key.objectid;
1897                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898                     (first_ino && first_ino != key.objectid))
1899                         break;
1900         }
1901         if (i == nritems) {
1902                 path->slots[0] = nritems;
1903                 return 0;
1904         }
1905         path->slots[0] = i;
1906
1907 again:
1908         err |= check_inode_item(root, path, ext_ref);
1909
1910         if (err & LAST_ITEM)
1911                 goto out;
1912
1913         /* still have inode items in thie leaf */
1914         if (cur->start == cur_bytenr)
1915                 goto again;
1916
1917         /*
1918          * we have switched to another leaf, above nodes may
1919          * have changed, here walk down the path, if a node
1920          * or leaf is shared, check whether we can skip this
1921          * node or leaf.
1922          */
1923         for (i = root_level; i >= 0; i--) {
1924                 if (path->nodes[i]->start == nrefs->bytenr[i])
1925                         continue;
1926
1927                 ret = update_nodes_refs(root,
1928                                 path->nodes[i]->start,
1929                                 nrefs, i);
1930                 if (ret)
1931                         goto out;
1932
1933                 if (!nrefs->need_check[i]) {
1934                         *level += 1;
1935                         break;
1936                 }
1937         }
1938
1939         for (i = 0; i < *level; i++) {
1940                 free_extent_buffer(path->nodes[i]);
1941                 path->nodes[i] = NULL;
1942         }
1943 out:
1944         err &= ~LAST_ITEM;
1945         if (err && !ret)
1946                 ret = err;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         free_extent_buffer(next);
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 /*
2217  * Returns >0  Found error, should continue
2218  * Returns <0  Fatal error, must exit the whole check
2219  * Returns 0   No errors found
2220  */
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222                              int *level, struct node_refs *nrefs, int ext_ref)
2223 {
2224         enum btrfs_tree_block_status status;
2225         u64 bytenr;
2226         u64 ptr_gen;
2227         struct extent_buffer *next;
2228         struct extent_buffer *cur;
2229         u32 blocksize;
2230         int ret;
2231
2232         WARN_ON(*level < 0);
2233         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2234
2235         ret = update_nodes_refs(root, path->nodes[*level]->start,
2236                                 nrefs, *level);
2237         if (ret < 0)
2238                 return ret;
2239
2240         while (*level >= 0) {
2241                 WARN_ON(*level < 0);
2242                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243                 cur = path->nodes[*level];
2244
2245                 if (btrfs_header_level(cur) != *level)
2246                         WARN_ON(1);
2247
2248                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249                         break;
2250                 /* Don't forgot to check leaf/node validation */
2251                 if (*level == 0) {
2252                         ret = btrfs_check_leaf(root, NULL, cur);
2253                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2254                                 ret = -EIO;
2255                                 break;
2256                         }
2257                         ret = process_one_leaf_v2(root, path, nrefs,
2258                                                   level, ext_ref);
2259                         break;
2260                 } else {
2261                         ret = btrfs_check_node(root, NULL, cur);
2262                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2263                                 ret = -EIO;
2264                                 break;
2265                         }
2266                 }
2267                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269                 blocksize = root->nodesize;
2270
2271                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2272                 if (ret)
2273                         break;
2274                 if (!nrefs->need_check[*level - 1]) {
2275                         path->slots[*level]++;
2276                         continue;
2277                 }
2278
2279                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root, bytenr, blocksize,
2284                                                ptr_gen);
2285                         if (!extent_buffer_uptodate(next)) {
2286                                 struct btrfs_key node_key;
2287
2288                                 btrfs_node_key_to_cpu(path->nodes[*level],
2289                                                       &node_key,
2290                                                       path->slots[*level]);
2291                                 btrfs_add_corrupt_extent_record(root->fs_info,
2292                                                 &node_key,
2293                                                 path->nodes[*level]->start,
2294                                                 root->nodesize, *level);
2295                                 ret = -EIO;
2296                                 break;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret < 0) 
2302                         break;
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         ret = -EIO;
2311                         break;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319         return ret;
2320 }
2321
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323                         struct walk_control *wc, int *level)
2324 {
2325         int i;
2326         struct extent_buffer *leaf;
2327
2328         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329                 leaf = path->nodes[i];
2330                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2331                         path->slots[i]++;
2332                         *level = i;
2333                         return 0;
2334                 } else {
2335                         free_extent_buffer(path->nodes[*level]);
2336                         path->nodes[*level] = NULL;
2337                         BUG_ON(*level > wc->active_node);
2338                         if (*level == wc->active_node)
2339                                 leave_shared_node(root, wc, *level);
2340                         *level = i + 1;
2341                 }
2342         }
2343         return 1;
2344 }
2345
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2347                            int *level)
2348 {
2349         int i;
2350         struct extent_buffer *leaf;
2351
2352         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353                 leaf = path->nodes[i];
2354                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355                         path->slots[i]++;
2356                         *level = i;
2357                         return 0;
2358                 } else {
2359                         free_extent_buffer(path->nodes[*level]);
2360                         path->nodes[*level] = NULL;
2361                         *level = i + 1;
2362                 }
2363         }
2364         return 1;
2365 }
2366
2367 static int check_root_dir(struct inode_record *rec)
2368 {
2369         struct inode_backref *backref;
2370         int ret = -1;
2371
2372         if (!rec->found_inode_item || rec->errors)
2373                 goto out;
2374         if (rec->nlink != 1 || rec->found_link != 0)
2375                 goto out;
2376         if (list_empty(&rec->backrefs))
2377                 goto out;
2378         backref = to_inode_backref(rec->backrefs.next);
2379         if (!backref->found_inode_ref)
2380                 goto out;
2381         if (backref->index != 0 || backref->namelen != 2 ||
2382             memcmp(backref->name, "..", 2))
2383                 goto out;
2384         if (backref->found_dir_index || backref->found_dir_item)
2385                 goto out;
2386         ret = 0;
2387 out:
2388         return ret;
2389 }
2390
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392                               struct btrfs_root *root, struct btrfs_path *path,
2393                               struct inode_record *rec)
2394 {
2395         struct btrfs_inode_item *ei;
2396         struct btrfs_key key;
2397         int ret;
2398
2399         key.objectid = rec->ino;
2400         key.type = BTRFS_INODE_ITEM_KEY;
2401         key.offset = (u64)-1;
2402
2403         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2404         if (ret < 0)
2405                 goto out;
2406         if (ret) {
2407                 if (!path->slots[0]) {
2408                         ret = -ENOENT;
2409                         goto out;
2410                 }
2411                 path->slots[0]--;
2412                 ret = 0;
2413         }
2414         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415         if (key.objectid != rec->ino) {
2416                 ret = -ENOENT;
2417                 goto out;
2418         }
2419
2420         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421                             struct btrfs_inode_item);
2422         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423         btrfs_mark_buffer_dirty(path->nodes[0]);
2424         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426                root->root_key.objectid);
2427 out:
2428         btrfs_release_path(path);
2429         return ret;
2430 }
2431
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433                                     struct btrfs_root *root,
2434                                     struct btrfs_path *path,
2435                                     struct inode_record *rec)
2436 {
2437         int ret;
2438
2439         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440         btrfs_release_path(path);
2441         if (!ret)
2442                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2443         return ret;
2444 }
2445
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447                                struct btrfs_root *root,
2448                                struct btrfs_path *path,
2449                                struct inode_record *rec)
2450 {
2451         struct btrfs_inode_item *ei;
2452         struct btrfs_key key;
2453         int ret = 0;
2454
2455         key.objectid = rec->ino;
2456         key.type = BTRFS_INODE_ITEM_KEY;
2457         key.offset = 0;
2458
2459         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2460         if (ret) {
2461                 if (ret > 0)
2462                         ret = -ENOENT;
2463                 goto out;
2464         }
2465
2466         /* Since ret == 0, no need to check anything */
2467         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468                             struct btrfs_inode_item);
2469         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470         btrfs_mark_buffer_dirty(path->nodes[0]);
2471         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472         printf("reset nbytes for ino %llu root %llu\n",
2473                rec->ino, root->root_key.objectid);
2474 out:
2475         btrfs_release_path(path);
2476         return ret;
2477 }
2478
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480                                  struct cache_tree *inode_cache,
2481                                  struct inode_record *rec,
2482                                  struct inode_backref *backref)
2483 {
2484         struct btrfs_path path;
2485         struct btrfs_trans_handle *trans;
2486         struct btrfs_dir_item *dir_item;
2487         struct extent_buffer *leaf;
2488         struct btrfs_key key;
2489         struct btrfs_disk_key disk_key;
2490         struct inode_record *dir_rec;
2491         unsigned long name_ptr;
2492         u32 data_size = sizeof(*dir_item) + backref->namelen;
2493         int ret;
2494
2495         trans = btrfs_start_transaction(root, 1);
2496         if (IS_ERR(trans))
2497                 return PTR_ERR(trans);
2498
2499         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500                 (unsigned long long)rec->ino);
2501
2502         btrfs_init_path(&path);
2503         key.objectid = backref->dir;
2504         key.type = BTRFS_DIR_INDEX_KEY;
2505         key.offset = backref->index;
2506         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2507         BUG_ON(ret);
2508
2509         leaf = path.nodes[0];
2510         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2511
2512         disk_key.objectid = cpu_to_le64(rec->ino);
2513         disk_key.type = BTRFS_INODE_ITEM_KEY;
2514         disk_key.offset = 0;
2515
2516         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518         btrfs_set_dir_data_len(leaf, dir_item, 0);
2519         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520         name_ptr = (unsigned long)(dir_item + 1);
2521         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522         btrfs_mark_buffer_dirty(leaf);
2523         btrfs_release_path(&path);
2524         btrfs_commit_transaction(trans, root);
2525
2526         backref->found_dir_index = 1;
2527         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528         BUG_ON(IS_ERR(dir_rec));
2529         if (!dir_rec)
2530                 return 0;
2531         dir_rec->found_size += backref->namelen;
2532         if (dir_rec->found_size == dir_rec->isize &&
2533             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535         if (dir_rec->found_size != dir_rec->isize)
2536                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2537
2538         return 0;
2539 }
2540
2541 static int delete_dir_index(struct btrfs_root *root,
2542                             struct inode_backref *backref)
2543 {
2544         struct btrfs_trans_handle *trans;
2545         struct btrfs_dir_item *di;
2546         struct btrfs_path path;
2547         int ret = 0;
2548
2549         trans = btrfs_start_transaction(root, 1);
2550         if (IS_ERR(trans))
2551                 return PTR_ERR(trans);
2552
2553         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554                 (unsigned long long)backref->dir,
2555                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556                 (unsigned long long)root->objectid);
2557
2558         btrfs_init_path(&path);
2559         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560                                     backref->name, backref->namelen,
2561                                     backref->index, -1);
2562         if (IS_ERR(di)) {
2563                 ret = PTR_ERR(di);
2564                 btrfs_release_path(&path);
2565                 btrfs_commit_transaction(trans, root);
2566                 if (ret == -ENOENT)
2567                         return 0;
2568                 return ret;
2569         }
2570
2571         if (!di)
2572                 ret = btrfs_del_item(trans, root, &path);
2573         else
2574                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2575         BUG_ON(ret);
2576         btrfs_release_path(&path);
2577         btrfs_commit_transaction(trans, root);
2578         return ret;
2579 }
2580
2581 static int create_inode_item(struct btrfs_root *root,
2582                              struct inode_record *rec,
2583                              int root_dir)
2584 {
2585         struct btrfs_trans_handle *trans;
2586         struct btrfs_inode_item inode_item;
2587         time_t now = time(NULL);
2588         int ret;
2589
2590         trans = btrfs_start_transaction(root, 1);
2591         if (IS_ERR(trans)) {
2592                 ret = PTR_ERR(trans);
2593                 return ret;
2594         }
2595
2596         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597                 "be incomplete, please check permissions and content after "
2598                 "the fsck completes.\n", (unsigned long long)root->objectid,
2599                 (unsigned long long)rec->ino);
2600
2601         memset(&inode_item, 0, sizeof(inode_item));
2602         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2603         if (root_dir)
2604                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2605         else
2606                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608         if (rec->found_dir_item) {
2609                 if (rec->found_file_extent)
2610                         fprintf(stderr, "root %llu inode %llu has both a dir "
2611                                 "item and extents, unsure if it is a dir or a "
2612                                 "regular file so setting it as a directory\n",
2613                                 (unsigned long long)root->objectid,
2614                                 (unsigned long long)rec->ino);
2615                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617         } else if (!rec->found_dir_item) {
2618                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2620         }
2621         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2629
2630         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2631         BUG_ON(ret);
2632         btrfs_commit_transaction(trans, root);
2633         return 0;
2634 }
2635
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637                                  struct inode_record *rec,
2638                                  struct cache_tree *inode_cache,
2639                                  int delete)
2640 {
2641         struct inode_backref *tmp, *backref;
2642         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2643         int ret = 0;
2644         int repaired = 0;
2645
2646         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647                 if (!delete && rec->ino == root_dirid) {
2648                         if (!rec->found_inode_item) {
2649                                 ret = create_inode_item(root, rec, 1);
2650                                 if (ret)
2651                                         break;
2652                                 repaired++;
2653                         }
2654                 }
2655
2656                 /* Index 0 for root dir's are special, don't mess with it */
2657                 if (rec->ino == root_dirid && backref->index == 0)
2658                         continue;
2659
2660                 if (delete &&
2661                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2662                      (backref->found_dir_index && backref->found_inode_ref &&
2663                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664                         ret = delete_dir_index(root, backref);
2665                         if (ret)
2666                                 break;
2667                         repaired++;
2668                         list_del(&backref->list);
2669                         free(backref);
2670                         continue;
2671                 }
2672
2673                 if (!delete && !backref->found_dir_index &&
2674                     backref->found_dir_item && backref->found_inode_ref) {
2675                         ret = add_missing_dir_index(root, inode_cache, rec,
2676                                                     backref);
2677                         if (ret)
2678                                 break;
2679                         repaired++;
2680                         if (backref->found_dir_item &&
2681                             backref->found_dir_index) {
2682                                 if (!backref->errors &&
2683                                     backref->found_inode_ref) {
2684                                         list_del(&backref->list);
2685                                         free(backref);
2686                                         continue;
2687                                 }
2688                         }
2689                 }
2690
2691                 if (!delete && (!backref->found_dir_index &&
2692                                 !backref->found_dir_item &&
2693                                 backref->found_inode_ref)) {
2694                         struct btrfs_trans_handle *trans;
2695                         struct btrfs_key location;
2696
2697                         ret = check_dir_conflict(root, backref->name,
2698                                                  backref->namelen,
2699                                                  backref->dir,
2700                                                  backref->index);
2701                         if (ret) {
2702                                 /*
2703                                  * let nlink fixing routine to handle it,
2704                                  * which can do it better.
2705                                  */
2706                                 ret = 0;
2707                                 break;
2708                         }
2709                         location.objectid = rec->ino;
2710                         location.type = BTRFS_INODE_ITEM_KEY;
2711                         location.offset = 0;
2712
2713                         trans = btrfs_start_transaction(root, 1);
2714                         if (IS_ERR(trans)) {
2715                                 ret = PTR_ERR(trans);
2716                                 break;
2717                         }
2718                         fprintf(stderr, "adding missing dir index/item pair "
2719                                 "for inode %llu\n",
2720                                 (unsigned long long)rec->ino);
2721                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2722                                                     backref->namelen,
2723                                                     backref->dir, &location,
2724                                                     imode_to_type(rec->imode),
2725                                                     backref->index);
2726                         BUG_ON(ret);
2727                         btrfs_commit_transaction(trans, root);
2728                         repaired++;
2729                 }
2730
2731                 if (!delete && (backref->found_inode_ref &&
2732                                 backref->found_dir_index &&
2733                                 backref->found_dir_item &&
2734                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2735                                 !rec->found_inode_item)) {
2736                         ret = create_inode_item(root, rec, 0);
2737                         if (ret)
2738                                 break;
2739                         repaired++;
2740                 }
2741
2742         }
2743         return ret ? ret : repaired;
2744 }
2745
2746 /*
2747  * To determine the file type for nlink/inode_item repair
2748  *
2749  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2750  * Return -ENOENT if file type is not found.
2751  */
2752 static int find_file_type(struct inode_record *rec, u8 *type)
2753 {
2754         struct inode_backref *backref;
2755
2756         /* For inode item recovered case */
2757         if (rec->found_inode_item) {
2758                 *type = imode_to_type(rec->imode);
2759                 return 0;
2760         }
2761
2762         list_for_each_entry(backref, &rec->backrefs, list) {
2763                 if (backref->found_dir_index || backref->found_dir_item) {
2764                         *type = backref->filetype;
2765                         return 0;
2766                 }
2767         }
2768         return -ENOENT;
2769 }
2770
2771 /*
2772  * To determine the file name for nlink repair
2773  *
2774  * Return 0 if file name is found, set name and namelen.
2775  * Return -ENOENT if file name is not found.
2776  */
2777 static int find_file_name(struct inode_record *rec,
2778                           char *name, int *namelen)
2779 {
2780         struct inode_backref *backref;
2781
2782         list_for_each_entry(backref, &rec->backrefs, list) {
2783                 if (backref->found_dir_index || backref->found_dir_item ||
2784                     backref->found_inode_ref) {
2785                         memcpy(name, backref->name, backref->namelen);
2786                         *namelen = backref->namelen;
2787                         return 0;
2788                 }
2789         }
2790         return -ENOENT;
2791 }
2792
2793 /* Reset the nlink of the inode to the correct one */
2794 static int reset_nlink(struct btrfs_trans_handle *trans,
2795                        struct btrfs_root *root,
2796                        struct btrfs_path *path,
2797                        struct inode_record *rec)
2798 {
2799         struct inode_backref *backref;
2800         struct inode_backref *tmp;
2801         struct btrfs_key key;
2802         struct btrfs_inode_item *inode_item;
2803         int ret = 0;
2804
2805         /* We don't believe this either, reset it and iterate backref */
2806         rec->found_link = 0;
2807
2808         /* Remove all backref including the valid ones */
2809         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2810                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2811                                    backref->index, backref->name,
2812                                    backref->namelen, 0);
2813                 if (ret < 0)
2814                         goto out;
2815
2816                 /* remove invalid backref, so it won't be added back */
2817                 if (!(backref->found_dir_index &&
2818                       backref->found_dir_item &&
2819                       backref->found_inode_ref)) {
2820                         list_del(&backref->list);
2821                         free(backref);
2822                 } else {
2823                         rec->found_link++;
2824                 }
2825         }
2826
2827         /* Set nlink to 0 */
2828         key.objectid = rec->ino;
2829         key.type = BTRFS_INODE_ITEM_KEY;
2830         key.offset = 0;
2831         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2832         if (ret < 0)
2833                 goto out;
2834         if (ret > 0) {
2835                 ret = -ENOENT;
2836                 goto out;
2837         }
2838         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2839                                     struct btrfs_inode_item);
2840         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2841         btrfs_mark_buffer_dirty(path->nodes[0]);
2842         btrfs_release_path(path);
2843
2844         /*
2845          * Add back valid inode_ref/dir_item/dir_index,
2846          * add_link() will handle the nlink inc, so new nlink must be correct
2847          */
2848         list_for_each_entry(backref, &rec->backrefs, list) {
2849                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2850                                      backref->name, backref->namelen,
2851                                      backref->filetype, &backref->index, 1);
2852                 if (ret < 0)
2853                         goto out;
2854         }
2855 out:
2856         btrfs_release_path(path);
2857         return ret;
2858 }
2859
2860 static int get_highest_inode(struct btrfs_trans_handle *trans,
2861                                 struct btrfs_root *root,
2862                                 struct btrfs_path *path,
2863                                 u64 *highest_ino)
2864 {
2865         struct btrfs_key key, found_key;
2866         int ret;
2867
2868         btrfs_init_path(path);
2869         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2870         key.offset = -1;
2871         key.type = BTRFS_INODE_ITEM_KEY;
2872         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2873         if (ret == 1) {
2874                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2875                                 path->slots[0] - 1);
2876                 *highest_ino = found_key.objectid;
2877                 ret = 0;
2878         }
2879         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2880                 ret = -EOVERFLOW;
2881         btrfs_release_path(path);
2882         return ret;
2883 }
2884
2885 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2886                                struct btrfs_root *root,
2887                                struct btrfs_path *path,
2888                                struct inode_record *rec)
2889 {
2890         char *dir_name = "lost+found";
2891         char namebuf[BTRFS_NAME_LEN] = {0};
2892         u64 lost_found_ino;
2893         u32 mode = 0700;
2894         u8 type = 0;
2895         int namelen = 0;
2896         int name_recovered = 0;
2897         int type_recovered = 0;
2898         int ret = 0;
2899
2900         /*
2901          * Get file name and type first before these invalid inode ref
2902          * are deleted by remove_all_invalid_backref()
2903          */
2904         name_recovered = !find_file_name(rec, namebuf, &namelen);
2905         type_recovered = !find_file_type(rec, &type);
2906
2907         if (!name_recovered) {
2908                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2909                        rec->ino, rec->ino);
2910                 namelen = count_digits(rec->ino);
2911                 sprintf(namebuf, "%llu", rec->ino);
2912                 name_recovered = 1;
2913         }
2914         if (!type_recovered) {
2915                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2916                        rec->ino);
2917                 type = BTRFS_FT_REG_FILE;
2918                 type_recovered = 1;
2919         }
2920
2921         ret = reset_nlink(trans, root, path, rec);
2922         if (ret < 0) {
2923                 fprintf(stderr,
2924                         "Failed to reset nlink for inode %llu: %s\n",
2925                         rec->ino, strerror(-ret));
2926                 goto out;
2927         }
2928
2929         if (rec->found_link == 0) {
2930                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2931                 if (ret < 0)
2932                         goto out;
2933                 lost_found_ino++;
2934                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2935                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2936                                   mode);
2937                 if (ret < 0) {
2938                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2939                                 dir_name, strerror(-ret));
2940                         goto out;
2941                 }
2942                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2943                                      namebuf, namelen, type, NULL, 1);
2944                 /*
2945                  * Add ".INO" suffix several times to handle case where
2946                  * "FILENAME.INO" is already taken by another file.
2947                  */
2948                 while (ret == -EEXIST) {
2949                         /*
2950                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2951                          */
2952                         if (namelen + count_digits(rec->ino) + 1 >
2953                             BTRFS_NAME_LEN) {
2954                                 ret = -EFBIG;
2955                                 goto out;
2956                         }
2957                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2958                                  ".%llu", rec->ino);
2959                         namelen += count_digits(rec->ino) + 1;
2960                         ret = btrfs_add_link(trans, root, rec->ino,
2961                                              lost_found_ino, namebuf,
2962                                              namelen, type, NULL, 1);
2963                 }
2964                 if (ret < 0) {
2965                         fprintf(stderr,
2966                                 "Failed to link the inode %llu to %s dir: %s\n",
2967                                 rec->ino, dir_name, strerror(-ret));
2968                         goto out;
2969                 }
2970                 /*
2971                  * Just increase the found_link, don't actually add the
2972                  * backref. This will make things easier and this inode
2973                  * record will be freed after the repair is done.
2974                  * So fsck will not report problem about this inode.
2975                  */
2976                 rec->found_link++;
2977                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2978                        namelen, namebuf, dir_name);
2979         }
2980         printf("Fixed the nlink of inode %llu\n", rec->ino);
2981 out:
2982         /*
2983          * Clear the flag anyway, or we will loop forever for the same inode
2984          * as it will not be removed from the bad inode list and the dead loop
2985          * happens.
2986          */
2987         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2988         btrfs_release_path(path);
2989         return ret;
2990 }
2991
2992 /*
2993  * Check if there is any normal(reg or prealloc) file extent for given
2994  * ino.
2995  * This is used to determine the file type when neither its dir_index/item or
2996  * inode_item exists.
2997  *
2998  * This will *NOT* report error, if any error happens, just consider it does
2999  * not have any normal file extent.
3000  */
3001 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3002 {
3003         struct btrfs_path path;
3004         struct btrfs_key key;
3005         struct btrfs_key found_key;
3006         struct btrfs_file_extent_item *fi;
3007         u8 type;
3008         int ret = 0;
3009
3010         btrfs_init_path(&path);
3011         key.objectid = ino;
3012         key.type = BTRFS_EXTENT_DATA_KEY;
3013         key.offset = 0;
3014
3015         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3016         if (ret < 0) {
3017                 ret = 0;
3018                 goto out;
3019         }
3020         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3021                 ret = btrfs_next_leaf(root, &path);
3022                 if (ret) {
3023                         ret = 0;
3024                         goto out;
3025                 }
3026         }
3027         while (1) {
3028                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3029                                       path.slots[0]);
3030                 if (found_key.objectid != ino ||
3031                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3032                         break;
3033                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3034                                     struct btrfs_file_extent_item);
3035                 type = btrfs_file_extent_type(path.nodes[0], fi);
3036                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3037                         ret = 1;
3038                         goto out;
3039                 }
3040         }
3041 out:
3042         btrfs_release_path(&path);
3043         return ret;
3044 }
3045
3046 static u32 btrfs_type_to_imode(u8 type)
3047 {
3048         static u32 imode_by_btrfs_type[] = {
3049                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3050                 [BTRFS_FT_DIR]          = S_IFDIR,
3051                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3052                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3053                 [BTRFS_FT_FIFO]         = S_IFIFO,
3054                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3055                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3056         };
3057
3058         return imode_by_btrfs_type[(type)];
3059 }
3060
3061 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3062                                 struct btrfs_root *root,
3063                                 struct btrfs_path *path,
3064                                 struct inode_record *rec)
3065 {
3066         u8 filetype;
3067         u32 mode = 0700;
3068         int type_recovered = 0;
3069         int ret = 0;
3070
3071         printf("Trying to rebuild inode:%llu\n", rec->ino);
3072
3073         type_recovered = !find_file_type(rec, &filetype);
3074
3075         /*
3076          * Try to determine inode type if type not found.
3077          *
3078          * For found regular file extent, it must be FILE.
3079          * For found dir_item/index, it must be DIR.
3080          *
3081          * For undetermined one, use FILE as fallback.
3082          *
3083          * TODO:
3084          * 1. If found backref(inode_index/item is already handled) to it,
3085          *    it must be DIR.
3086          *    Need new inode-inode ref structure to allow search for that.
3087          */
3088         if (!type_recovered) {
3089                 if (rec->found_file_extent &&
3090                     find_normal_file_extent(root, rec->ino)) {
3091                         type_recovered = 1;
3092                         filetype = BTRFS_FT_REG_FILE;
3093                 } else if (rec->found_dir_item) {
3094                         type_recovered = 1;
3095                         filetype = BTRFS_FT_DIR;
3096                 } else if (!list_empty(&rec->orphan_extents)) {
3097                         type_recovered = 1;
3098                         filetype = BTRFS_FT_REG_FILE;
3099                 } else{
3100                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3101                                rec->ino);
3102                         type_recovered = 1;
3103                         filetype = BTRFS_FT_REG_FILE;
3104                 }
3105         }
3106
3107         ret = btrfs_new_inode(trans, root, rec->ino,
3108                               mode | btrfs_type_to_imode(filetype));
3109         if (ret < 0)
3110                 goto out;
3111
3112         /*
3113          * Here inode rebuild is done, we only rebuild the inode item,
3114          * don't repair the nlink(like move to lost+found).
3115          * That is the job of nlink repair.
3116          *
3117          * We just fill the record and return
3118          */
3119         rec->found_dir_item = 1;
3120         rec->imode = mode | btrfs_type_to_imode(filetype);
3121         rec->nlink = 0;
3122         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3123         /* Ensure the inode_nlinks repair function will be called */
3124         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3125 out:
3126         return ret;
3127 }
3128
3129 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3130                                       struct btrfs_root *root,
3131                                       struct btrfs_path *path,
3132                                       struct inode_record *rec)
3133 {
3134         struct orphan_data_extent *orphan;
3135         struct orphan_data_extent *tmp;
3136         int ret = 0;
3137
3138         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3139                 /*
3140                  * Check for conflicting file extents
3141                  *
3142                  * Here we don't know whether the extents is compressed or not,
3143                  * so we can only assume it not compressed nor data offset,
3144                  * and use its disk_len as extent length.
3145                  */
3146                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3147                                        orphan->offset, orphan->disk_len, 0);
3148                 btrfs_release_path(path);
3149                 if (ret < 0)
3150                         goto out;
3151                 if (!ret) {
3152                         fprintf(stderr,
3153                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3154                                 orphan->disk_bytenr, orphan->disk_len);
3155                         ret = btrfs_free_extent(trans,
3156                                         root->fs_info->extent_root,
3157                                         orphan->disk_bytenr, orphan->disk_len,
3158                                         0, root->objectid, orphan->objectid,
3159                                         orphan->offset);
3160                         if (ret < 0)
3161                                 goto out;
3162                 }
3163                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3164                                 orphan->offset, orphan->disk_bytenr,
3165                                 orphan->disk_len, orphan->disk_len);
3166                 if (ret < 0)
3167                         goto out;
3168
3169                 /* Update file size info */
3170                 rec->found_size += orphan->disk_len;
3171                 if (rec->found_size == rec->nbytes)
3172                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3173
3174                 /* Update the file extent hole info too */
3175                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3176                                            orphan->disk_len);
3177                 if (ret < 0)
3178                         goto out;
3179                 if (RB_EMPTY_ROOT(&rec->holes))
3180                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3181
3182                 list_del(&orphan->list);
3183                 free(orphan);
3184         }
3185         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3186 out:
3187         return ret;
3188 }
3189
3190 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3191                                         struct btrfs_root *root,
3192                                         struct btrfs_path *path,
3193                                         struct inode_record *rec)
3194 {
3195         struct rb_node *node;
3196         struct file_extent_hole *hole;
3197         int found = 0;
3198         int ret = 0;
3199
3200         node = rb_first(&rec->holes);
3201
3202         while (node) {
3203                 found = 1;
3204                 hole = rb_entry(node, struct file_extent_hole, node);
3205                 ret = btrfs_punch_hole(trans, root, rec->ino,
3206                                        hole->start, hole->len);
3207                 if (ret < 0)
3208                         goto out;
3209                 ret = del_file_extent_hole(&rec->holes, hole->start,
3210                                            hole->len);
3211                 if (ret < 0)
3212                         goto out;
3213                 if (RB_EMPTY_ROOT(&rec->holes))
3214                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3215                 node = rb_first(&rec->holes);
3216         }
3217         /* special case for a file losing all its file extent */
3218         if (!found) {
3219                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3220                                        round_up(rec->isize, root->sectorsize));
3221                 if (ret < 0)
3222                         goto out;
3223         }
3224         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3225                rec->ino, root->objectid);
3226 out:
3227         return ret;
3228 }
3229
3230 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3231 {
3232         struct btrfs_trans_handle *trans;
3233         struct btrfs_path path;
3234         int ret = 0;
3235
3236         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3237                              I_ERR_NO_ORPHAN_ITEM |
3238                              I_ERR_LINK_COUNT_WRONG |
3239                              I_ERR_NO_INODE_ITEM |
3240                              I_ERR_FILE_EXTENT_ORPHAN |
3241                              I_ERR_FILE_EXTENT_DISCOUNT|
3242                              I_ERR_FILE_NBYTES_WRONG)))
3243                 return rec->errors;
3244
3245         /*
3246          * For nlink repair, it may create a dir and add link, so
3247          * 2 for parent(256)'s dir_index and dir_item
3248          * 2 for lost+found dir's inode_item and inode_ref
3249          * 1 for the new inode_ref of the file
3250          * 2 for lost+found dir's dir_index and dir_item for the file
3251          */
3252         trans = btrfs_start_transaction(root, 7);
3253         if (IS_ERR(trans))
3254                 return PTR_ERR(trans);
3255
3256         btrfs_init_path(&path);
3257         if (rec->errors & I_ERR_NO_INODE_ITEM)
3258                 ret = repair_inode_no_item(trans, root, &path, rec);
3259         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3260                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3261         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3262                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3263         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3264                 ret = repair_inode_isize(trans, root, &path, rec);
3265         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3266                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3267         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3268                 ret = repair_inode_nlinks(trans, root, &path, rec);
3269         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3270                 ret = repair_inode_nbytes(trans, root, &path, rec);
3271         btrfs_commit_transaction(trans, root);
3272         btrfs_release_path(&path);
3273         return ret;
3274 }
3275
3276 static int check_inode_recs(struct btrfs_root *root,
3277                             struct cache_tree *inode_cache)
3278 {
3279         struct cache_extent *cache;
3280         struct ptr_node *node;
3281         struct inode_record *rec;
3282         struct inode_backref *backref;
3283         int stage = 0;
3284         int ret = 0;
3285         int err = 0;
3286         u64 error = 0;
3287         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3288
3289         if (btrfs_root_refs(&root->root_item) == 0) {
3290                 if (!cache_tree_empty(inode_cache))
3291                         fprintf(stderr, "warning line %d\n", __LINE__);
3292                 return 0;
3293         }
3294
3295         /*
3296          * We need to repair backrefs first because we could change some of the
3297          * errors in the inode recs.
3298          *
3299          * We also need to go through and delete invalid backrefs first and then
3300          * add the correct ones second.  We do this because we may get EEXIST
3301          * when adding back the correct index because we hadn't yet deleted the
3302          * invalid index.
3303          *
3304          * For example, if we were missing a dir index then the directories
3305          * isize would be wrong, so if we fixed the isize to what we thought it
3306          * would be and then fixed the backref we'd still have a invalid fs, so
3307          * we need to add back the dir index and then check to see if the isize
3308          * is still wrong.
3309          */
3310         while (stage < 3) {
3311                 stage++;
3312                 if (stage == 3 && !err)
3313                         break;
3314
3315                 cache = search_cache_extent(inode_cache, 0);
3316                 while (repair && cache) {
3317                         node = container_of(cache, struct ptr_node, cache);
3318                         rec = node->data;
3319                         cache = next_cache_extent(cache);
3320
3321                         /* Need to free everything up and rescan */
3322                         if (stage == 3) {
3323                                 remove_cache_extent(inode_cache, &node->cache);
3324                                 free(node);
3325                                 free_inode_rec(rec);
3326                                 continue;
3327                         }
3328
3329                         if (list_empty(&rec->backrefs))
3330                                 continue;
3331
3332                         ret = repair_inode_backrefs(root, rec, inode_cache,
3333                                                     stage == 1);
3334                         if (ret < 0) {
3335                                 err = ret;
3336                                 stage = 2;
3337                                 break;
3338                         } if (ret > 0) {
3339                                 err = -EAGAIN;
3340                         }
3341                 }
3342         }
3343         if (err)
3344                 return err;
3345
3346         rec = get_inode_rec(inode_cache, root_dirid, 0);
3347         BUG_ON(IS_ERR(rec));
3348         if (rec) {
3349                 ret = check_root_dir(rec);
3350                 if (ret) {
3351                         fprintf(stderr, "root %llu root dir %llu error\n",
3352                                 (unsigned long long)root->root_key.objectid,
3353                                 (unsigned long long)root_dirid);
3354                         print_inode_error(root, rec);
3355                         error++;
3356                 }
3357         } else {
3358                 if (repair) {
3359                         struct btrfs_trans_handle *trans;
3360
3361                         trans = btrfs_start_transaction(root, 1);
3362                         if (IS_ERR(trans)) {
3363                                 err = PTR_ERR(trans);
3364                                 return err;
3365                         }
3366
3367                         fprintf(stderr,
3368                                 "root %llu missing its root dir, recreating\n",
3369                                 (unsigned long long)root->objectid);
3370
3371                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3372                         BUG_ON(ret);
3373
3374                         btrfs_commit_transaction(trans, root);
3375                         return -EAGAIN;
3376                 }
3377
3378                 fprintf(stderr, "root %llu root dir %llu not found\n",
3379                         (unsigned long long)root->root_key.objectid,
3380                         (unsigned long long)root_dirid);
3381         }
3382
3383         while (1) {
3384                 cache = search_cache_extent(inode_cache, 0);
3385                 if (!cache)
3386                         break;
3387                 node = container_of(cache, struct ptr_node, cache);
3388                 rec = node->data;
3389                 remove_cache_extent(inode_cache, &node->cache);
3390                 free(node);
3391                 if (rec->ino == root_dirid ||
3392                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3393                         free_inode_rec(rec);
3394                         continue;
3395                 }
3396
3397                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3398                         ret = check_orphan_item(root, rec->ino);
3399                         if (ret == 0)
3400                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3401                         if (can_free_inode_rec(rec)) {
3402                                 free_inode_rec(rec);
3403                                 continue;
3404                         }
3405                 }
3406
3407                 if (!rec->found_inode_item)
3408                         rec->errors |= I_ERR_NO_INODE_ITEM;
3409                 if (rec->found_link != rec->nlink)
3410                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3411                 if (repair) {
3412                         ret = try_repair_inode(root, rec);
3413                         if (ret == 0 && can_free_inode_rec(rec)) {
3414                                 free_inode_rec(rec);
3415                                 continue;
3416                         }
3417                         ret = 0;
3418                 }
3419
3420                 if (!(repair && ret == 0))
3421                         error++;
3422                 print_inode_error(root, rec);
3423                 list_for_each_entry(backref, &rec->backrefs, list) {
3424                         if (!backref->found_dir_item)
3425                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3426                         if (!backref->found_dir_index)
3427                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3428                         if (!backref->found_inode_ref)
3429                                 backref->errors |= REF_ERR_NO_INODE_REF;
3430                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3431                                 " namelen %u name %s filetype %d errors %x",
3432                                 (unsigned long long)backref->dir,
3433                                 (unsigned long long)backref->index,
3434                                 backref->namelen, backref->name,
3435                                 backref->filetype, backref->errors);
3436                         print_ref_error(backref->errors);
3437                 }
3438                 free_inode_rec(rec);
3439         }
3440         return (error > 0) ? -1 : 0;
3441 }
3442
3443 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3444                                         u64 objectid)
3445 {
3446         struct cache_extent *cache;
3447         struct root_record *rec = NULL;
3448         int ret;
3449
3450         cache = lookup_cache_extent(root_cache, objectid, 1);
3451         if (cache) {
3452                 rec = container_of(cache, struct root_record, cache);
3453         } else {
3454                 rec = calloc(1, sizeof(*rec));
3455                 if (!rec)
3456                         return ERR_PTR(-ENOMEM);
3457                 rec->objectid = objectid;
3458                 INIT_LIST_HEAD(&rec->backrefs);
3459                 rec->cache.start = objectid;
3460                 rec->cache.size = 1;
3461
3462                 ret = insert_cache_extent(root_cache, &rec->cache);
3463                 if (ret)
3464                         return ERR_PTR(-EEXIST);
3465         }
3466         return rec;
3467 }
3468
3469 static struct root_backref *get_root_backref(struct root_record *rec,
3470                                              u64 ref_root, u64 dir, u64 index,
3471                                              const char *name, int namelen)
3472 {
3473         struct root_backref *backref;
3474
3475         list_for_each_entry(backref, &rec->backrefs, list) {
3476                 if (backref->ref_root != ref_root || backref->dir != dir ||
3477                     backref->namelen != namelen)
3478                         continue;
3479                 if (memcmp(name, backref->name, namelen))
3480                         continue;
3481                 return backref;
3482         }
3483
3484         backref = calloc(1, sizeof(*backref) + namelen + 1);
3485         if (!backref)
3486                 return NULL;
3487         backref->ref_root = ref_root;
3488         backref->dir = dir;
3489         backref->index = index;
3490         backref->namelen = namelen;
3491         memcpy(backref->name, name, namelen);
3492         backref->name[namelen] = '\0';
3493         list_add_tail(&backref->list, &rec->backrefs);
3494         return backref;
3495 }
3496
3497 static void free_root_record(struct cache_extent *cache)
3498 {
3499         struct root_record *rec;
3500         struct root_backref *backref;
3501
3502         rec = container_of(cache, struct root_record, cache);
3503         while (!list_empty(&rec->backrefs)) {
3504                 backref = to_root_backref(rec->backrefs.next);
3505                 list_del(&backref->list);
3506                 free(backref);
3507         }
3508
3509         free(rec);
3510 }
3511
3512 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3513
3514 static int add_root_backref(struct cache_tree *root_cache,
3515                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3516                             const char *name, int namelen,
3517                             int item_type, int errors)
3518 {
3519         struct root_record *rec;
3520         struct root_backref *backref;
3521
3522         rec = get_root_rec(root_cache, root_id);
3523         BUG_ON(IS_ERR(rec));
3524         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3525         BUG_ON(!backref);
3526
3527         backref->errors |= errors;
3528
3529         if (item_type != BTRFS_DIR_ITEM_KEY) {
3530                 if (backref->found_dir_index || backref->found_back_ref ||
3531                     backref->found_forward_ref) {
3532                         if (backref->index != index)
3533                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3534                 } else {
3535                         backref->index = index;
3536                 }
3537         }
3538
3539         if (item_type == BTRFS_DIR_ITEM_KEY) {
3540                 if (backref->found_forward_ref)
3541                         rec->found_ref++;
3542                 backref->found_dir_item = 1;
3543         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3544                 backref->found_dir_index = 1;
3545         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3546                 if (backref->found_forward_ref)
3547                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3548                 else if (backref->found_dir_item)
3549                         rec->found_ref++;
3550                 backref->found_forward_ref = 1;
3551         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3552                 if (backref->found_back_ref)
3553                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3554                 backref->found_back_ref = 1;
3555         } else {
3556                 BUG_ON(1);
3557         }
3558
3559         if (backref->found_forward_ref && backref->found_dir_item)
3560                 backref->reachable = 1;
3561         return 0;
3562 }
3563
3564 static int merge_root_recs(struct btrfs_root *root,
3565                            struct cache_tree *src_cache,
3566                            struct cache_tree *dst_cache)
3567 {
3568         struct cache_extent *cache;
3569         struct ptr_node *node;
3570         struct inode_record *rec;
3571         struct inode_backref *backref;
3572         int ret = 0;
3573
3574         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3575                 free_inode_recs_tree(src_cache);
3576                 return 0;
3577         }
3578
3579         while (1) {
3580                 cache = search_cache_extent(src_cache, 0);
3581                 if (!cache)
3582                         break;
3583                 node = container_of(cache, struct ptr_node, cache);
3584                 rec = node->data;
3585                 remove_cache_extent(src_cache, &node->cache);
3586                 free(node);
3587
3588                 ret = is_child_root(root, root->objectid, rec->ino);
3589                 if (ret < 0)
3590                         break;
3591                 else if (ret == 0)
3592                         goto skip;
3593
3594                 list_for_each_entry(backref, &rec->backrefs, list) {
3595                         BUG_ON(backref->found_inode_ref);
3596                         if (backref->found_dir_item)
3597                                 add_root_backref(dst_cache, rec->ino,
3598                                         root->root_key.objectid, backref->dir,
3599                                         backref->index, backref->name,
3600                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3601                                         backref->errors);
3602                         if (backref->found_dir_index)
3603                                 add_root_backref(dst_cache, rec->ino,
3604                                         root->root_key.objectid, backref->dir,
3605                                         backref->index, backref->name,
3606                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3607                                         backref->errors);
3608                 }
3609 skip:
3610                 free_inode_rec(rec);
3611         }
3612         if (ret < 0)
3613                 return ret;
3614         return 0;
3615 }
3616
3617 static int check_root_refs(struct btrfs_root *root,
3618                            struct cache_tree *root_cache)
3619 {
3620         struct root_record *rec;
3621         struct root_record *ref_root;
3622         struct root_backref *backref;
3623         struct cache_extent *cache;
3624         int loop = 1;
3625         int ret;
3626         int error;
3627         int errors = 0;
3628
3629         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3630         BUG_ON(IS_ERR(rec));
3631         rec->found_ref = 1;
3632
3633         /* fixme: this can not detect circular references */
3634         while (loop) {
3635                 loop = 0;
3636                 cache = search_cache_extent(root_cache, 0);
3637                 while (1) {
3638                         if (!cache)
3639                                 break;
3640                         rec = container_of(cache, struct root_record, cache);
3641                         cache = next_cache_extent(cache);
3642
3643                         if (rec->found_ref == 0)
3644                                 continue;
3645
3646                         list_for_each_entry(backref, &rec->backrefs, list) {
3647                                 if (!backref->reachable)
3648                                         continue;
3649
3650                                 ref_root = get_root_rec(root_cache,
3651                                                         backref->ref_root);
3652                                 BUG_ON(IS_ERR(ref_root));
3653                                 if (ref_root->found_ref > 0)
3654                                         continue;
3655
3656                                 backref->reachable = 0;
3657                                 rec->found_ref--;
3658                                 if (rec->found_ref == 0)
3659                                         loop = 1;
3660                         }
3661                 }
3662         }
3663
3664         cache = search_cache_extent(root_cache, 0);
3665         while (1) {
3666                 if (!cache)
3667                         break;
3668                 rec = container_of(cache, struct root_record, cache);
3669                 cache = next_cache_extent(cache);
3670
3671                 if (rec->found_ref == 0 &&
3672                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3673                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3674                         ret = check_orphan_item(root->fs_info->tree_root,
3675                                                 rec->objectid);
3676                         if (ret == 0)
3677                                 continue;
3678
3679                         /*
3680                          * If we don't have a root item then we likely just have
3681                          * a dir item in a snapshot for this root but no actual
3682                          * ref key or anything so it's meaningless.
3683                          */
3684                         if (!rec->found_root_item)
3685                                 continue;
3686                         errors++;
3687                         fprintf(stderr, "fs tree %llu not referenced\n",
3688                                 (unsigned long long)rec->objectid);
3689                 }
3690
3691                 error = 0;
3692                 if (rec->found_ref > 0 && !rec->found_root_item)
3693                         error = 1;
3694                 list_for_each_entry(backref, &rec->backrefs, list) {
3695                         if (!backref->found_dir_item)
3696                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3697                         if (!backref->found_dir_index)
3698                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3699                         if (!backref->found_back_ref)
3700                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3701                         if (!backref->found_forward_ref)
3702                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3703                         if (backref->reachable && backref->errors)
3704                                 error = 1;
3705                 }
3706                 if (!error)
3707                         continue;
3708
3709                 errors++;
3710                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3711                         (unsigned long long)rec->objectid, rec->found_ref,
3712                          rec->found_root_item ? "" : "not found");
3713
3714                 list_for_each_entry(backref, &rec->backrefs, list) {
3715                         if (!backref->reachable)
3716                                 continue;
3717                         if (!backref->errors && rec->found_root_item)
3718                                 continue;
3719                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3720                                 " index %llu namelen %u name %s errors %x\n",
3721                                 (unsigned long long)backref->ref_root,
3722                                 (unsigned long long)backref->dir,
3723                                 (unsigned long long)backref->index,
3724                                 backref->namelen, backref->name,
3725                                 backref->errors);
3726                         print_ref_error(backref->errors);
3727                 }
3728         }
3729         return errors > 0 ? 1 : 0;
3730 }
3731
3732 static int process_root_ref(struct extent_buffer *eb, int slot,
3733                             struct btrfs_key *key,
3734                             struct cache_tree *root_cache)
3735 {
3736         u64 dirid;
3737         u64 index;
3738         u32 len;
3739         u32 name_len;
3740         struct btrfs_root_ref *ref;
3741         char namebuf[BTRFS_NAME_LEN];
3742         int error;
3743
3744         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3745
3746         dirid = btrfs_root_ref_dirid(eb, ref);
3747         index = btrfs_root_ref_sequence(eb, ref);
3748         name_len = btrfs_root_ref_name_len(eb, ref);
3749
3750         if (name_len <= BTRFS_NAME_LEN) {
3751                 len = name_len;
3752                 error = 0;
3753         } else {
3754                 len = BTRFS_NAME_LEN;
3755                 error = REF_ERR_NAME_TOO_LONG;
3756         }
3757         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3758
3759         if (key->type == BTRFS_ROOT_REF_KEY) {
3760                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3761                                  index, namebuf, len, key->type, error);
3762         } else {
3763                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3764                                  index, namebuf, len, key->type, error);
3765         }
3766         return 0;
3767 }
3768
3769 static void free_corrupt_block(struct cache_extent *cache)
3770 {
3771         struct btrfs_corrupt_block *corrupt;
3772
3773         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3774         free(corrupt);
3775 }
3776
3777 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3778
3779 /*
3780  * Repair the btree of the given root.
3781  *
3782  * The fix is to remove the node key in corrupt_blocks cache_tree.
3783  * and rebalance the tree.
3784  * After the fix, the btree should be writeable.
3785  */
3786 static int repair_btree(struct btrfs_root *root,
3787                         struct cache_tree *corrupt_blocks)
3788 {
3789         struct btrfs_trans_handle *trans;
3790         struct btrfs_path path;
3791         struct btrfs_corrupt_block *corrupt;
3792         struct cache_extent *cache;
3793         struct btrfs_key key;
3794         u64 offset;
3795         int level;
3796         int ret = 0;
3797
3798         if (cache_tree_empty(corrupt_blocks))
3799                 return 0;
3800
3801         trans = btrfs_start_transaction(root, 1);
3802         if (IS_ERR(trans)) {
3803                 ret = PTR_ERR(trans);
3804                 fprintf(stderr, "Error starting transaction: %s\n",
3805                         strerror(-ret));
3806                 return ret;
3807         }
3808         btrfs_init_path(&path);
3809         cache = first_cache_extent(corrupt_blocks);
3810         while (cache) {
3811                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3812                                        cache);
3813                 level = corrupt->level;
3814                 path.lowest_level = level;
3815                 key.objectid = corrupt->key.objectid;
3816                 key.type = corrupt->key.type;
3817                 key.offset = corrupt->key.offset;
3818
3819                 /*
3820                  * Here we don't want to do any tree balance, since it may
3821                  * cause a balance with corrupted brother leaf/node,
3822                  * so ins_len set to 0 here.
3823                  * Balance will be done after all corrupt node/leaf is deleted.
3824                  */
3825                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3826                 if (ret < 0)
3827                         goto out;
3828                 offset = btrfs_node_blockptr(path.nodes[level],
3829                                              path.slots[level]);
3830
3831                 /* Remove the ptr */
3832                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3833                 if (ret < 0)
3834                         goto out;
3835                 /*
3836                  * Remove the corresponding extent
3837                  * return value is not concerned.
3838                  */
3839                 btrfs_release_path(&path);
3840                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3841                                         0, root->root_key.objectid,
3842                                         level - 1, 0);
3843                 cache = next_cache_extent(cache);
3844         }
3845
3846         /* Balance the btree using btrfs_search_slot() */
3847         cache = first_cache_extent(corrupt_blocks);
3848         while (cache) {
3849                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3850                                        cache);
3851                 memcpy(&key, &corrupt->key, sizeof(key));
3852                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3853                 if (ret < 0)
3854                         goto out;
3855                 /* return will always >0 since it won't find the item */
3856                 ret = 0;
3857                 btrfs_release_path(&path);
3858                 cache = next_cache_extent(cache);
3859         }
3860 out:
3861         btrfs_commit_transaction(trans, root);
3862         btrfs_release_path(&path);
3863         return ret;
3864 }
3865
3866 static int check_fs_root(struct btrfs_root *root,
3867                          struct cache_tree *root_cache,
3868                          struct walk_control *wc)
3869 {
3870         int ret = 0;
3871         int err = 0;
3872         int wret;
3873         int level;
3874         struct btrfs_path path;
3875         struct shared_node root_node;
3876         struct root_record *rec;
3877         struct btrfs_root_item *root_item = &root->root_item;
3878         struct cache_tree corrupt_blocks;
3879         struct orphan_data_extent *orphan;
3880         struct orphan_data_extent *tmp;
3881         enum btrfs_tree_block_status status;
3882         struct node_refs nrefs;
3883
3884         /*
3885          * Reuse the corrupt_block cache tree to record corrupted tree block
3886          *
3887          * Unlike the usage in extent tree check, here we do it in a per
3888          * fs/subvol tree base.
3889          */
3890         cache_tree_init(&corrupt_blocks);
3891         root->fs_info->corrupt_blocks = &corrupt_blocks;
3892
3893         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3894                 rec = get_root_rec(root_cache, root->root_key.objectid);
3895                 BUG_ON(IS_ERR(rec));
3896                 if (btrfs_root_refs(root_item) > 0)
3897                         rec->found_root_item = 1;
3898         }
3899
3900         btrfs_init_path(&path);
3901         memset(&root_node, 0, sizeof(root_node));
3902         cache_tree_init(&root_node.root_cache);
3903         cache_tree_init(&root_node.inode_cache);
3904         memset(&nrefs, 0, sizeof(nrefs));
3905
3906         /* Move the orphan extent record to corresponding inode_record */
3907         list_for_each_entry_safe(orphan, tmp,
3908                                  &root->orphan_data_extents, list) {
3909                 struct inode_record *inode;
3910
3911                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3912                                       1);
3913                 BUG_ON(IS_ERR(inode));
3914                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3915                 list_move(&orphan->list, &inode->orphan_extents);
3916         }
3917
3918         level = btrfs_header_level(root->node);
3919         memset(wc->nodes, 0, sizeof(wc->nodes));
3920         wc->nodes[level] = &root_node;
3921         wc->active_node = level;
3922         wc->root_level = level;
3923
3924         /* We may not have checked the root block, lets do that now */
3925         if (btrfs_is_leaf(root->node))
3926                 status = btrfs_check_leaf(root, NULL, root->node);
3927         else
3928                 status = btrfs_check_node(root, NULL, root->node);
3929         if (status != BTRFS_TREE_BLOCK_CLEAN)
3930                 return -EIO;
3931
3932         if (btrfs_root_refs(root_item) > 0 ||
3933             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3934                 path.nodes[level] = root->node;
3935                 extent_buffer_get(root->node);
3936                 path.slots[level] = 0;
3937         } else {
3938                 struct btrfs_key key;
3939                 struct btrfs_disk_key found_key;
3940
3941                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3942                 level = root_item->drop_level;
3943                 path.lowest_level = level;
3944                 if (level > btrfs_header_level(root->node) ||
3945                     level >= BTRFS_MAX_LEVEL) {
3946                         error("ignoring invalid drop level: %u", level);
3947                         goto skip_walking;
3948                 }
3949                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3950                 if (wret < 0)
3951                         goto skip_walking;
3952                 btrfs_node_key(path.nodes[level], &found_key,
3953                                 path.slots[level]);
3954                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3955                                         sizeof(found_key)));
3956         }
3957
3958         while (1) {
3959                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3960                 if (wret < 0)
3961                         ret = wret;
3962                 if (wret != 0)
3963                         break;
3964
3965                 wret = walk_up_tree(root, &path, wc, &level);
3966                 if (wret < 0)
3967                         ret = wret;
3968                 if (wret != 0)
3969                         break;
3970         }
3971 skip_walking:
3972         btrfs_release_path(&path);
3973
3974         if (!cache_tree_empty(&corrupt_blocks)) {
3975                 struct cache_extent *cache;
3976                 struct btrfs_corrupt_block *corrupt;
3977
3978                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3979                        root->root_key.objectid);
3980                 cache = first_cache_extent(&corrupt_blocks);
3981                 while (cache) {
3982                         corrupt = container_of(cache,
3983                                                struct btrfs_corrupt_block,
3984                                                cache);
3985                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3986                                cache->start, corrupt->level,
3987                                corrupt->key.objectid, corrupt->key.type,
3988                                corrupt->key.offset);
3989                         cache = next_cache_extent(cache);
3990                 }
3991                 if (repair) {
3992                         printf("Try to repair the btree for root %llu\n",
3993                                root->root_key.objectid);
3994                         ret = repair_btree(root, &corrupt_blocks);
3995                         if (ret < 0)
3996                                 fprintf(stderr, "Failed to repair btree: %s\n",
3997                                         strerror(-ret));
3998                         if (!ret)
3999                                 printf("Btree for root %llu is fixed\n",
4000                                        root->root_key.objectid);
4001                 }
4002         }
4003
4004         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4005         if (err < 0)
4006                 ret = err;
4007
4008         if (root_node.current) {
4009                 root_node.current->checked = 1;
4010                 maybe_free_inode_rec(&root_node.inode_cache,
4011                                 root_node.current);
4012         }
4013
4014         err = check_inode_recs(root, &root_node.inode_cache);
4015         if (!ret)
4016                 ret = err;
4017
4018         free_corrupt_blocks_tree(&corrupt_blocks);
4019         root->fs_info->corrupt_blocks = NULL;
4020         free_orphan_data_extents(&root->orphan_data_extents);
4021         return ret;
4022 }
4023
4024 static int fs_root_objectid(u64 objectid)
4025 {
4026         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4027             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4028                 return 1;
4029         return is_fstree(objectid);
4030 }
4031
4032 static int check_fs_roots(struct btrfs_root *root,
4033                           struct cache_tree *root_cache)
4034 {
4035         struct btrfs_path path;
4036         struct btrfs_key key;
4037         struct walk_control wc;
4038         struct extent_buffer *leaf, *tree_node;
4039         struct btrfs_root *tmp_root;
4040         struct btrfs_root *tree_root = root->fs_info->tree_root;
4041         int ret;
4042         int err = 0;
4043
4044         if (ctx.progress_enabled) {
4045                 ctx.tp = TASK_FS_ROOTS;
4046                 task_start(ctx.info);
4047         }
4048
4049         /*
4050          * Just in case we made any changes to the extent tree that weren't
4051          * reflected into the free space cache yet.
4052          */
4053         if (repair)
4054                 reset_cached_block_groups(root->fs_info);
4055         memset(&wc, 0, sizeof(wc));
4056         cache_tree_init(&wc.shared);
4057         btrfs_init_path(&path);
4058
4059 again:
4060         key.offset = 0;
4061         key.objectid = 0;
4062         key.type = BTRFS_ROOT_ITEM_KEY;
4063         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4064         if (ret < 0) {
4065                 err = 1;
4066                 goto out;
4067         }
4068         tree_node = tree_root->node;
4069         while (1) {
4070                 if (tree_node != tree_root->node) {
4071                         free_root_recs_tree(root_cache);
4072                         btrfs_release_path(&path);
4073                         goto again;
4074                 }
4075                 leaf = path.nodes[0];
4076                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4077                         ret = btrfs_next_leaf(tree_root, &path);
4078                         if (ret) {
4079                                 if (ret < 0)
4080                                         err = 1;
4081                                 break;
4082                         }
4083                         leaf = path.nodes[0];
4084                 }
4085                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4086                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4087                     fs_root_objectid(key.objectid)) {
4088                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4089                                 tmp_root = btrfs_read_fs_root_no_cache(
4090                                                 root->fs_info, &key);
4091                         } else {
4092                                 key.offset = (u64)-1;
4093                                 tmp_root = btrfs_read_fs_root(
4094                                                 root->fs_info, &key);
4095                         }
4096                         if (IS_ERR(tmp_root)) {
4097                                 err = 1;
4098                                 goto next;
4099                         }
4100                         ret = check_fs_root(tmp_root, root_cache, &wc);
4101                         if (ret == -EAGAIN) {
4102                                 free_root_recs_tree(root_cache);
4103                                 btrfs_release_path(&path);
4104                                 goto again;
4105                         }
4106                         if (ret)
4107                                 err = 1;
4108                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4109                                 btrfs_free_fs_root(tmp_root);
4110                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4111                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4112                         process_root_ref(leaf, path.slots[0], &key,
4113                                          root_cache);
4114                 }
4115 next:
4116                 path.slots[0]++;
4117         }
4118 out:
4119         btrfs_release_path(&path);
4120         if (err)
4121                 free_extent_cache_tree(&wc.shared);
4122         if (!cache_tree_empty(&wc.shared))
4123                 fprintf(stderr, "warning line %d\n", __LINE__);
4124
4125         task_stop(ctx.info);
4126
4127         return err;
4128 }
4129
4130 /*
4131  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4132  * INODE_REF/INODE_EXTREF match.
4133  *
4134  * @root:       the root of the fs/file tree
4135  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4136  * @key:        the key of the DIR_ITEM/DIR_INDEX
4137  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4138  *              distinguish root_dir between normal dir/file
4139  * @name:       the name in the INODE_REF/INODE_EXTREF
4140  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4141  * @mode:       the st_mode of INODE_ITEM
4142  *
4143  * Return 0 if no error occurred.
4144  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4145  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4146  * dir/file.
4147  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4148  * not match for normal dir/file.
4149  */
4150 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4151                          struct btrfs_key *key, u64 index, char *name,
4152                          u32 namelen, u32 mode)
4153 {
4154         struct btrfs_path path;
4155         struct extent_buffer *node;
4156         struct btrfs_dir_item *di;
4157         struct btrfs_key location;
4158         char namebuf[BTRFS_NAME_LEN] = {0};
4159         u32 total;
4160         u32 cur = 0;
4161         u32 len;
4162         u32 name_len;
4163         u32 data_len;
4164         u8 filetype;
4165         int slot;
4166         int ret;
4167
4168         btrfs_init_path(&path);
4169         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4170         if (ret < 0) {
4171                 ret = DIR_ITEM_MISSING;
4172                 goto out;
4173         }
4174
4175         /* Process root dir and goto out*/
4176         if (index == 0) {
4177                 if (ret == 0) {
4178                         ret = ROOT_DIR_ERROR;
4179                         error(
4180                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4181                                 root->objectid,
4182                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4183                                         "REF" : "EXTREF",
4184                                 ref_key->objectid, ref_key->offset,
4185                                 key->type == BTRFS_DIR_ITEM_KEY ?
4186                                         "DIR_ITEM" : "DIR_INDEX");
4187                 } else {
4188                         ret = 0;
4189                 }
4190
4191                 goto out;
4192         }
4193
4194         /* Process normal file/dir */
4195         if (ret > 0) {
4196                 ret = DIR_ITEM_MISSING;
4197                 error(
4198                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4199                         root->objectid,
4200                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4201                         ref_key->objectid, ref_key->offset,
4202                         key->type == BTRFS_DIR_ITEM_KEY ?
4203                                 "DIR_ITEM" : "DIR_INDEX",
4204                         key->objectid, key->offset, namelen, name,
4205                         imode_to_type(mode));
4206                 goto out;
4207         }
4208
4209         /* Check whether inode_id/filetype/name match */
4210         node = path.nodes[0];
4211         slot = path.slots[0];
4212         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4213         total = btrfs_item_size_nr(node, slot);
4214         while (cur < total) {
4215                 ret = DIR_ITEM_MISMATCH;
4216                 name_len = btrfs_dir_name_len(node, di);
4217                 data_len = btrfs_dir_data_len(node, di);
4218
4219                 btrfs_dir_item_key_to_cpu(node, di, &location);
4220                 if (location.objectid != ref_key->objectid ||
4221                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4222                     location.offset != 0)
4223                         goto next;
4224
4225                 filetype = btrfs_dir_type(node, di);
4226                 if (imode_to_type(mode) != filetype)
4227                         goto next;
4228
4229                 if (name_len <= BTRFS_NAME_LEN) {
4230                         len = name_len;
4231                 } else {
4232                         len = BTRFS_NAME_LEN;
4233                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4234                         root->objectid,
4235                         key->type == BTRFS_DIR_ITEM_KEY ?
4236                         "DIR_ITEM" : "DIR_INDEX",
4237                         key->objectid, key->offset, name_len);
4238                 }
4239                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4240                 if (len != namelen || strncmp(namebuf, name, len))
4241                         goto next;
4242
4243                 ret = 0;
4244                 goto out;
4245 next:
4246                 len = sizeof(*di) + name_len + data_len;
4247                 di = (struct btrfs_dir_item *)((char *)di + len);
4248                 cur += len;
4249         }
4250         if (ret == DIR_ITEM_MISMATCH)
4251                 error(
4252                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4253                         root->objectid,
4254                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4255                         ref_key->objectid, ref_key->offset,
4256                         key->type == BTRFS_DIR_ITEM_KEY ?
4257                                 "DIR_ITEM" : "DIR_INDEX",
4258                         key->objectid, key->offset, namelen, name,
4259                         imode_to_type(mode));
4260 out:
4261         btrfs_release_path(&path);
4262         return ret;
4263 }
4264
4265 /*
4266  * Traverse the given INODE_REF and call find_dir_item() to find related
4267  * DIR_ITEM/DIR_INDEX.
4268  *
4269  * @root:       the root of the fs/file tree
4270  * @ref_key:    the key of the INODE_REF
4271  * @refs:       the count of INODE_REF
4272  * @mode:       the st_mode of INODE_ITEM
4273  *
4274  * Return 0 if no error occurred.
4275  */
4276 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4277                            struct extent_buffer *node, int slot, u64 *refs,
4278                            int mode)
4279 {
4280         struct btrfs_key key;
4281         struct btrfs_inode_ref *ref;
4282         char namebuf[BTRFS_NAME_LEN] = {0};
4283         u32 total;
4284         u32 cur = 0;
4285         u32 len;
4286         u32 name_len;
4287         u64 index;
4288         int ret, err = 0;
4289
4290         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4291         total = btrfs_item_size_nr(node, slot);
4292
4293 next:
4294         /* Update inode ref count */
4295         (*refs)++;
4296
4297         index = btrfs_inode_ref_index(node, ref);
4298         name_len = btrfs_inode_ref_name_len(node, ref);
4299         if (name_len <= BTRFS_NAME_LEN) {
4300                 len = name_len;
4301         } else {
4302                 len = BTRFS_NAME_LEN;
4303                 warning("root %llu INODE_REF[%llu %llu] name too long",
4304                         root->objectid, ref_key->objectid, ref_key->offset);
4305         }
4306
4307         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4308
4309         /* Check root dir ref name */
4310         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4311                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4312                       root->objectid, ref_key->objectid, ref_key->offset,
4313                       namebuf);
4314                 err |= ROOT_DIR_ERROR;
4315         }
4316
4317         /* Find related DIR_INDEX */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_INDEX_KEY;
4320         key.offset = index;
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         /* Find related dir_item */
4325         key.objectid = ref_key->offset;
4326         key.type = BTRFS_DIR_ITEM_KEY;
4327         key.offset = btrfs_name_hash(namebuf, len);
4328         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4329         err |= ret;
4330
4331         len = sizeof(*ref) + name_len;
4332         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4333         cur += len;
4334         if (cur < total)
4335                 goto next;
4336
4337         return err;
4338 }
4339
4340 /*
4341  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4342  * DIR_ITEM/DIR_INDEX.
4343  *
4344  * @root:       the root of the fs/file tree
4345  * @ref_key:    the key of the INODE_EXTREF
4346  * @refs:       the count of INODE_EXTREF
4347  * @mode:       the st_mode of INODE_ITEM
4348  *
4349  * Return 0 if no error occurred.
4350  */
4351 static int check_inode_extref(struct btrfs_root *root,
4352                               struct btrfs_key *ref_key,
4353                               struct extent_buffer *node, int slot, u64 *refs,
4354                               int mode)
4355 {
4356         struct btrfs_key key;
4357         struct btrfs_inode_extref *extref;
4358         char namebuf[BTRFS_NAME_LEN] = {0};
4359         u32 total;
4360         u32 cur = 0;
4361         u32 len;
4362         u32 name_len;
4363         u64 index;
4364         u64 parent;
4365         int ret;
4366         int err = 0;
4367
4368         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4369         total = btrfs_item_size_nr(node, slot);
4370
4371 next:
4372         /* update inode ref count */
4373         (*refs)++;
4374         name_len = btrfs_inode_extref_name_len(node, extref);
4375         index = btrfs_inode_extref_index(node, extref);
4376         parent = btrfs_inode_extref_parent(node, extref);
4377         if (name_len <= BTRFS_NAME_LEN) {
4378                 len = name_len;
4379         } else {
4380                 len = BTRFS_NAME_LEN;
4381                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4382                         root->objectid, ref_key->objectid, ref_key->offset);
4383         }
4384         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4385
4386         /* Check root dir ref name */
4387         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4388                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4389                       root->objectid, ref_key->objectid, ref_key->offset,
4390                       namebuf);
4391                 err |= ROOT_DIR_ERROR;
4392         }
4393
4394         /* find related dir_index */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_INDEX_KEY;
4397         key.offset = index;
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         /* find related dir_item */
4402         key.objectid = parent;
4403         key.type = BTRFS_DIR_ITEM_KEY;
4404         key.offset = btrfs_name_hash(namebuf, len);
4405         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4406         err |= ret;
4407
4408         len = sizeof(*extref) + name_len;
4409         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4410         cur += len;
4411
4412         if (cur < total)
4413                 goto next;
4414
4415         return err;
4416 }
4417
4418 /*
4419  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4420  * DIR_ITEM/DIR_INDEX match.
4421  *
4422  * @root:       the root of the fs/file tree
4423  * @key:        the key of the INODE_REF/INODE_EXTREF
4424  * @name:       the name in the INODE_REF/INODE_EXTREF
4425  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4426  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4427  * to (u64)-1
4428  * @ext_ref:    the EXTENDED_IREF feature
4429  *
4430  * Return 0 if no error occurred.
4431  * Return >0 for error bitmap
4432  */
4433 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4434                           char *name, int namelen, u64 index,
4435                           unsigned int ext_ref)
4436 {
4437         struct btrfs_path path;
4438         struct btrfs_inode_ref *ref;
4439         struct btrfs_inode_extref *extref;
4440         struct extent_buffer *node;
4441         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4442         u32 total;
4443         u32 cur = 0;
4444         u32 len;
4445         u32 ref_namelen;
4446         u64 ref_index;
4447         u64 parent;
4448         u64 dir_id;
4449         int slot;
4450         int ret;
4451
4452         btrfs_init_path(&path);
4453         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4454         if (ret) {
4455                 ret = INODE_REF_MISSING;
4456                 goto extref;
4457         }
4458
4459         node = path.nodes[0];
4460         slot = path.slots[0];
4461
4462         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4463         total = btrfs_item_size_nr(node, slot);
4464
4465         /* Iterate all entry of INODE_REF */
4466         while (cur < total) {
4467                 ret = INODE_REF_MISSING;
4468
4469                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4470                 ref_index = btrfs_inode_ref_index(node, ref);
4471                 if (index != (u64)-1 && index != ref_index)
4472                         goto next_ref;
4473
4474                 if (ref_namelen <= BTRFS_NAME_LEN) {
4475                         len = ref_namelen;
4476                 } else {
4477                         len = BTRFS_NAME_LEN;
4478                         warning("root %llu INODE %s[%llu %llu] name too long",
4479                                 root->objectid,
4480                                 key->type == BTRFS_INODE_REF_KEY ?
4481                                         "REF" : "EXTREF",
4482                                 key->objectid, key->offset);
4483                 }
4484                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4485                                    len);
4486
4487                 if (len != namelen || strncmp(ref_namebuf, name, len))
4488                         goto next_ref;
4489
4490                 ret = 0;
4491                 goto out;
4492 next_ref:
4493                 len = sizeof(*ref) + ref_namelen;
4494                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4495                 cur += len;
4496         }
4497
4498 extref:
4499         /* Skip if not support EXTENDED_IREF feature */
4500         if (!ext_ref)
4501                 goto out;
4502
4503         btrfs_release_path(&path);
4504         btrfs_init_path(&path);
4505
4506         dir_id = key->offset;
4507         key->type = BTRFS_INODE_EXTREF_KEY;
4508         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4509
4510         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4511         if (ret) {
4512                 ret = INODE_REF_MISSING;
4513                 goto out;
4514         }
4515
4516         node = path.nodes[0];
4517         slot = path.slots[0];
4518
4519         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4520         cur = 0;
4521         total = btrfs_item_size_nr(node, slot);
4522
4523         /* Iterate all entry of INODE_EXTREF */
4524         while (cur < total) {
4525                 ret = INODE_REF_MISSING;
4526
4527                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4528                 ref_index = btrfs_inode_extref_index(node, extref);
4529                 parent = btrfs_inode_extref_parent(node, extref);
4530                 if (index != (u64)-1 && index != ref_index)
4531                         goto next_extref;
4532
4533                 if (parent != dir_id)
4534                         goto next_extref;
4535
4536                 if (ref_namelen <= BTRFS_NAME_LEN) {
4537                         len = ref_namelen;
4538                 } else {
4539                         len = BTRFS_NAME_LEN;
4540                         warning("root %llu INODE %s[%llu %llu] name too long",
4541                                 root->objectid,
4542                                 key->type == BTRFS_INODE_REF_KEY ?
4543                                         "REF" : "EXTREF",
4544                                 key->objectid, key->offset);
4545                 }
4546                 read_extent_buffer(node, ref_namebuf,
4547                                    (unsigned long)(extref + 1), len);
4548
4549                 if (len != namelen || strncmp(ref_namebuf, name, len))
4550                         goto next_extref;
4551
4552                 ret = 0;
4553                 goto out;
4554
4555 next_extref:
4556                 len = sizeof(*extref) + ref_namelen;
4557                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4558                 cur += len;
4559
4560         }
4561 out:
4562         btrfs_release_path(&path);
4563         return ret;
4564 }
4565
4566 /*
4567  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4568  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4569  *
4570  * @root:       the root of the fs/file tree
4571  * @key:        the key of the INODE_REF/INODE_EXTREF
4572  * @size:       the st_size of the INODE_ITEM
4573  * @ext_ref:    the EXTENDED_IREF feature
4574  *
4575  * Return 0 if no error occurred.
4576  */
4577 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4578                           struct extent_buffer *node, int slot, u64 *size,
4579                           unsigned int ext_ref)
4580 {
4581         struct btrfs_dir_item *di;
4582         struct btrfs_inode_item *ii;
4583         struct btrfs_path path;
4584         struct btrfs_key location;
4585         char namebuf[BTRFS_NAME_LEN] = {0};
4586         u32 total;
4587         u32 cur = 0;
4588         u32 len;
4589         u32 name_len;
4590         u32 data_len;
4591         u8 filetype;
4592         u32 mode;
4593         u64 index;
4594         int ret;
4595         int err = 0;
4596
4597         /*
4598          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4599          * ignore index check.
4600          */
4601         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4602
4603         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4604         total = btrfs_item_size_nr(node, slot);
4605
4606         while (cur < total) {
4607                 data_len = btrfs_dir_data_len(node, di);
4608                 if (data_len)
4609                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4610                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4611                               "DIR_ITEM" : "DIR_INDEX",
4612                               key->objectid, key->offset, data_len);
4613
4614                 name_len = btrfs_dir_name_len(node, di);
4615                 if (name_len <= BTRFS_NAME_LEN) {
4616                         len = name_len;
4617                 } else {
4618                         len = BTRFS_NAME_LEN;
4619                         warning("root %llu %s[%llu %llu] name too long",
4620                                 root->objectid,
4621                                 key->type == BTRFS_DIR_ITEM_KEY ?
4622                                 "DIR_ITEM" : "DIR_INDEX",
4623                                 key->objectid, key->offset);
4624                 }
4625                 (*size) += name_len;
4626
4627                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4628                 filetype = btrfs_dir_type(node, di);
4629
4630                 btrfs_init_path(&path);
4631                 btrfs_dir_item_key_to_cpu(node, di, &location);
4632
4633                 /* Ignore related ROOT_ITEM check */
4634                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4635                         goto next;
4636
4637                 /* Check relative INODE_ITEM(existence/filetype) */
4638                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4639                 if (ret) {
4640                         err |= INODE_ITEM_MISSING;
4641                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4642                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4644                               key->offset, location.objectid, name_len,
4645                               namebuf, filetype);
4646                         goto next;
4647                 }
4648
4649                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4650                                     struct btrfs_inode_item);
4651                 mode = btrfs_inode_mode(path.nodes[0], ii);
4652
4653                 if (imode_to_type(mode) != filetype) {
4654                         err |= INODE_ITEM_MISMATCH;
4655                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4656                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4657                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4658                               key->offset, name_len, namebuf, filetype);
4659                 }
4660
4661                 /* Check relative INODE_REF/INODE_EXTREF */
4662                 location.type = BTRFS_INODE_REF_KEY;
4663                 location.offset = key->objectid;
4664                 ret = find_inode_ref(root, &location, namebuf, len,
4665                                        index, ext_ref);
4666                 err |= ret;
4667                 if (ret & INODE_REF_MISSING)
4668                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4669                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4670                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4671                               key->offset, name_len, namebuf, filetype);
4672
4673 next:
4674                 btrfs_release_path(&path);
4675                 len = sizeof(*di) + name_len + data_len;
4676                 di = (struct btrfs_dir_item *)((char *)di + len);
4677                 cur += len;
4678
4679                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4680                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4681                               root->objectid, key->objectid, key->offset);
4682                         break;
4683                 }
4684         }
4685
4686         return err;
4687 }
4688
4689 /*
4690  * Check file extent datasum/hole, update the size of the file extents,
4691  * check and update the last offset of the file extent.
4692  *
4693  * @root:       the root of fs/file tree.
4694  * @fkey:       the key of the file extent.
4695  * @nodatasum:  INODE_NODATASUM feature.
4696  * @size:       the sum of all EXTENT_DATA items size for this inode.
4697  * @end:        the offset of the last extent.
4698  *
4699  * Return 0 if no error occurred.
4700  */
4701 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4702                              struct extent_buffer *node, int slot,
4703                              unsigned int nodatasum, u64 *size, u64 *end)
4704 {
4705         struct btrfs_file_extent_item *fi;
4706         u64 disk_bytenr;
4707         u64 disk_num_bytes;
4708         u64 extent_num_bytes;
4709         u64 extent_offset;
4710         u64 csum_found;         /* In byte size, sectorsize aligned */
4711         u64 search_start;       /* Logical range start we search for csum */
4712         u64 search_len;         /* Logical range len we search for csum */
4713         unsigned int extent_type;
4714         unsigned int is_hole;
4715         int compressed = 0;
4716         int ret;
4717         int err = 0;
4718
4719         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4720
4721         /* Check inline extent */
4722         extent_type = btrfs_file_extent_type(node, fi);
4723         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4724                 struct btrfs_item *e = btrfs_item_nr(slot);
4725                 u32 item_inline_len;
4726
4727                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4728                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4729                 compressed = btrfs_file_extent_compression(node, fi);
4730                 if (extent_num_bytes == 0) {
4731                         error(
4732                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4733                                 root->objectid, fkey->objectid, fkey->offset);
4734                         err |= FILE_EXTENT_ERROR;
4735                 }
4736                 if (!compressed && extent_num_bytes != item_inline_len) {
4737                         error(
4738                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4739                                 root->objectid, fkey->objectid, fkey->offset,
4740                                 extent_num_bytes, item_inline_len);
4741                         err |= FILE_EXTENT_ERROR;
4742                 }
4743                 *size += extent_num_bytes;
4744                 return err;
4745         }
4746
4747         /* Check extent type */
4748         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4749                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4750                 err |= FILE_EXTENT_ERROR;
4751                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4752                       root->objectid, fkey->objectid, fkey->offset);
4753                 return err;
4754         }
4755
4756         /* Check REG_EXTENT/PREALLOC_EXTENT */
4757         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4758         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4759         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4760         extent_offset = btrfs_file_extent_offset(node, fi);
4761         compressed = btrfs_file_extent_compression(node, fi);
4762         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4763
4764         /*
4765          * Check EXTENT_DATA csum
4766          *
4767          * For plain (uncompressed) extent, we should only check the range
4768          * we're referring to, as it's possible that part of prealloc extent
4769          * has been written, and has csum:
4770          *
4771          * |<--- Original large preallocated extent A ---->|
4772          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4773          *      No csum                         Has csum
4774          *
4775          * For compressed extent, we should check the whole range.
4776          */
4777         if (!compressed) {
4778                 search_start = disk_bytenr + extent_offset;
4779                 search_len = extent_num_bytes;
4780         } else {
4781                 search_start = disk_bytenr;
4782                 search_len = disk_num_bytes;
4783         }
4784         ret = count_csum_range(root, search_start, search_len, &csum_found);
4785         if (csum_found > 0 && nodatasum) {
4786                 err |= ODD_CSUM_ITEM;
4787                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4788                       root->objectid, fkey->objectid, fkey->offset);
4789         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4790                    !is_hole && (ret < 0 || csum_found < search_len)) {
4791                 err |= CSUM_ITEM_MISSING;
4792                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4793                       root->objectid, fkey->objectid, fkey->offset,
4794                       csum_found, search_len);
4795         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4796                 err |= ODD_CSUM_ITEM;
4797                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4798                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4799         }
4800
4801         /* Check EXTENT_DATA hole */
4802         if (no_holes && is_hole) {
4803                 err |= FILE_EXTENT_ERROR;
4804                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4805                       root->objectid, fkey->objectid, fkey->offset);
4806         } else if (!no_holes && *end != fkey->offset) {
4807                 err |= FILE_EXTENT_ERROR;
4808                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4809                       root->objectid, fkey->objectid, fkey->offset);
4810         }
4811
4812         *end += extent_num_bytes;
4813         if (!is_hole)
4814                 *size += extent_num_bytes;
4815
4816         return err;
4817 }
4818
4819 /*
4820  * Check INODE_ITEM and related ITEMs (the same inode number)
4821  * 1. check link count
4822  * 2. check inode ref/extref
4823  * 3. check dir item/index
4824  *
4825  * @ext_ref:    the EXTENDED_IREF feature
4826  *
4827  * Return 0 if no error occurred.
4828  * Return >0 for error or hit the traversal is done(by error bitmap)
4829  */
4830 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4831                             unsigned int ext_ref)
4832 {
4833         struct extent_buffer *node;
4834         struct btrfs_inode_item *ii;
4835         struct btrfs_key key;
4836         u64 inode_id;
4837         u32 mode;
4838         u64 nlink;
4839         u64 nbytes;
4840         u64 isize;
4841         u64 size = 0;
4842         u64 refs = 0;
4843         u64 extent_end = 0;
4844         u64 extent_size = 0;
4845         unsigned int dir;
4846         unsigned int nodatasum;
4847         int slot;
4848         int ret;
4849         int err = 0;
4850
4851         node = path->nodes[0];
4852         slot = path->slots[0];
4853
4854         btrfs_item_key_to_cpu(node, &key, slot);
4855         inode_id = key.objectid;
4856
4857         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4858                 ret = btrfs_next_item(root, path);
4859                 if (ret > 0)
4860                         err |= LAST_ITEM;
4861                 return err;
4862         }
4863
4864         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4865         isize = btrfs_inode_size(node, ii);
4866         nbytes = btrfs_inode_nbytes(node, ii);
4867         mode = btrfs_inode_mode(node, ii);
4868         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4869         nlink = btrfs_inode_nlink(node, ii);
4870         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4871
4872         while (1) {
4873                 ret = btrfs_next_item(root, path);
4874                 if (ret < 0) {
4875                         /* out will fill 'err' rusing current statistics */
4876                         goto out;
4877                 } else if (ret > 0) {
4878                         err |= LAST_ITEM;
4879                         goto out;
4880                 }
4881
4882                 node = path->nodes[0];
4883                 slot = path->slots[0];
4884                 btrfs_item_key_to_cpu(node, &key, slot);
4885                 if (key.objectid != inode_id)
4886                         goto out;
4887
4888                 switch (key.type) {
4889                 case BTRFS_INODE_REF_KEY:
4890                         ret = check_inode_ref(root, &key, node, slot, &refs,
4891                                               mode);
4892                         err |= ret;
4893                         break;
4894                 case BTRFS_INODE_EXTREF_KEY:
4895                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4896                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4897                                         root->objectid, key.objectid,
4898                                         key.offset);
4899                         ret = check_inode_extref(root, &key, node, slot, &refs,
4900                                                  mode);
4901                         err |= ret;
4902                         break;
4903                 case BTRFS_DIR_ITEM_KEY:
4904                 case BTRFS_DIR_INDEX_KEY:
4905                         if (!dir) {
4906                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4907                                         root->objectid, inode_id,
4908                                         imode_to_type(mode), key.objectid,
4909                                         key.offset);
4910                         }
4911                         ret = check_dir_item(root, &key, node, slot, &size,
4912                                              ext_ref);
4913                         err |= ret;
4914                         break;
4915                 case BTRFS_EXTENT_DATA_KEY:
4916                         if (dir) {
4917                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4918                                         root->objectid, inode_id, key.objectid,
4919                                         key.offset);
4920                         }
4921                         ret = check_file_extent(root, &key, node, slot,
4922                                                 nodatasum, &extent_size,
4923                                                 &extent_end);
4924                         err |= ret;
4925                         break;
4926                 case BTRFS_XATTR_ITEM_KEY:
4927                         break;
4928                 default:
4929                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4930                               key.objectid, key.type, key.offset);
4931                 }
4932         }
4933
4934 out:
4935         /* verify INODE_ITEM nlink/isize/nbytes */
4936         if (dir) {
4937                 if (nlink != 1) {
4938                         err |= LINK_COUNT_ERROR;
4939                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4940                               root->objectid, inode_id, nlink);
4941                 }
4942
4943                 /*
4944                  * Just a warning, as dir inode nbytes is just an
4945                  * instructive value.
4946                  */
4947                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4948                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4949                                 root->objectid, inode_id, root->nodesize);
4950                 }
4951
4952                 if (isize != size) {
4953                         err |= ISIZE_ERROR;
4954                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4955                               root->objectid, inode_id, isize, size);
4956                 }
4957         } else {
4958                 if (nlink != refs) {
4959                         err |= LINK_COUNT_ERROR;
4960                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4961                               root->objectid, inode_id, nlink, refs);
4962                 } else if (!nlink) {
4963                         err |= ORPHAN_ITEM;
4964                 }
4965
4966                 if (!nbytes && !no_holes && extent_end < isize) {
4967                         err |= NBYTES_ERROR;
4968                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4969                               root->objectid, inode_id, isize);
4970                 }
4971
4972                 if (nbytes != extent_size) {
4973                         err |= NBYTES_ERROR;
4974                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4975                               root->objectid, inode_id, nbytes, extent_size);
4976                 }
4977         }
4978
4979         return err;
4980 }
4981
4982 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4983 {
4984         struct btrfs_path path;
4985         struct btrfs_key key;
4986         int err = 0;
4987         int ret;
4988
4989         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4990         key.type = BTRFS_INODE_ITEM_KEY;
4991         key.offset = 0;
4992
4993         /* For root being dropped, we don't need to check first inode */
4994         if (btrfs_root_refs(&root->root_item) == 0 &&
4995             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4996             key.objectid)
4997                 return 0;
4998
4999         btrfs_init_path(&path);
5000
5001         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5002         if (ret < 0)
5003                 goto out;
5004         if (ret > 0) {
5005                 ret = 0;
5006                 err |= INODE_ITEM_MISSING;
5007                 error("first inode item of root %llu is missing",
5008                       root->objectid);
5009         }
5010
5011         err |= check_inode_item(root, &path, ext_ref);
5012         err &= ~LAST_ITEM;
5013         if (err && !ret)
5014                 ret = -EIO;
5015 out:
5016         btrfs_release_path(&path);
5017         return ret;
5018 }
5019
5020 /*
5021  * Iterate all item on the tree and call check_inode_item() to check.
5022  *
5023  * @root:       the root of the tree to be checked.
5024  * @ext_ref:    the EXTENDED_IREF feature
5025  *
5026  * Return 0 if no error found.
5027  * Return <0 for error.
5028  */
5029 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5030 {
5031         struct btrfs_path path;
5032         struct node_refs nrefs;
5033         struct btrfs_root_item *root_item = &root->root_item;
5034         int ret;
5035         int level;
5036         int err = 0;
5037
5038         /*
5039          * We need to manually check the first inode item(256)
5040          * As the following traversal function will only start from
5041          * the first inode item in the leaf, if inode item(256) is missing
5042          * we will just skip it forever.
5043          */
5044         ret = check_fs_first_inode(root, ext_ref);
5045         if (ret < 0)
5046                 return ret;
5047
5048         memset(&nrefs, 0, sizeof(nrefs));
5049         level = btrfs_header_level(root->node);
5050         btrfs_init_path(&path);
5051
5052         if (btrfs_root_refs(root_item) > 0 ||
5053             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5054                 path.nodes[level] = root->node;
5055                 path.slots[level] = 0;
5056                 extent_buffer_get(root->node);
5057         } else {
5058                 struct btrfs_key key;
5059
5060                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5061                 level = root_item->drop_level;
5062                 path.lowest_level = level;
5063                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5064                 if (ret < 0)
5065                         goto out;
5066                 ret = 0;
5067         }
5068
5069         while (1) {
5070                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5071                 err |= !!ret;
5072
5073                 /* if ret is negative, walk shall stop */
5074                 if (ret < 0) {
5075                         ret = err;
5076                         break;
5077                 }
5078
5079                 ret = walk_up_tree_v2(root, &path, &level);
5080                 if (ret != 0) {
5081                         /* Normal exit, reset ret to err */
5082                         ret = err;
5083                         break;
5084                 }
5085         }
5086
5087 out:
5088         btrfs_release_path(&path);
5089         return ret;
5090 }
5091
5092 /*
5093  * Find the relative ref for root_ref and root_backref.
5094  *
5095  * @root:       the root of the root tree.
5096  * @ref_key:    the key of the root ref.
5097  *
5098  * Return 0 if no error occurred.
5099  */
5100 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5101                           struct extent_buffer *node, int slot)
5102 {
5103         struct btrfs_path path;
5104         struct btrfs_key key;
5105         struct btrfs_root_ref *ref;
5106         struct btrfs_root_ref *backref;
5107         char ref_name[BTRFS_NAME_LEN] = {0};
5108         char backref_name[BTRFS_NAME_LEN] = {0};
5109         u64 ref_dirid;
5110         u64 ref_seq;
5111         u32 ref_namelen;
5112         u64 backref_dirid;
5113         u64 backref_seq;
5114         u32 backref_namelen;
5115         u32 len;
5116         int ret;
5117         int err = 0;
5118
5119         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5120         ref_dirid = btrfs_root_ref_dirid(node, ref);
5121         ref_seq = btrfs_root_ref_sequence(node, ref);
5122         ref_namelen = btrfs_root_ref_name_len(node, ref);
5123
5124         if (ref_namelen <= BTRFS_NAME_LEN) {
5125                 len = ref_namelen;
5126         } else {
5127                 len = BTRFS_NAME_LEN;
5128                 warning("%s[%llu %llu] ref_name too long",
5129                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5130                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5131                         ref_key->offset);
5132         }
5133         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5134
5135         /* Find relative root_ref */
5136         key.objectid = ref_key->offset;
5137         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5138         key.offset = ref_key->objectid;
5139
5140         btrfs_init_path(&path);
5141         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5142         if (ret) {
5143                 err |= ROOT_REF_MISSING;
5144                 error("%s[%llu %llu] couldn't find relative ref",
5145                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5146                       "ROOT_REF" : "ROOT_BACKREF",
5147                       ref_key->objectid, ref_key->offset);
5148                 goto out;
5149         }
5150
5151         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5152                                  struct btrfs_root_ref);
5153         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5154         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5155         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5156
5157         if (backref_namelen <= BTRFS_NAME_LEN) {
5158                 len = backref_namelen;
5159         } else {
5160                 len = BTRFS_NAME_LEN;
5161                 warning("%s[%llu %llu] ref_name too long",
5162                         key.type == BTRFS_ROOT_REF_KEY ?
5163                         "ROOT_REF" : "ROOT_BACKREF",
5164                         key.objectid, key.offset);
5165         }
5166         read_extent_buffer(path.nodes[0], backref_name,
5167                            (unsigned long)(backref + 1), len);
5168
5169         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5170             ref_namelen != backref_namelen ||
5171             strncmp(ref_name, backref_name, len)) {
5172                 err |= ROOT_REF_MISMATCH;
5173                 error("%s[%llu %llu] mismatch relative ref",
5174                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5175                       "ROOT_REF" : "ROOT_BACKREF",
5176                       ref_key->objectid, ref_key->offset);
5177         }
5178 out:
5179         btrfs_release_path(&path);
5180         return err;
5181 }
5182
5183 /*
5184  * Check all fs/file tree in low_memory mode.
5185  *
5186  * 1. for fs tree root item, call check_fs_root_v2()
5187  * 2. for fs tree root ref/backref, call check_root_ref()
5188  *
5189  * Return 0 if no error occurred.
5190  */
5191 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5192 {
5193         struct btrfs_root *tree_root = fs_info->tree_root;
5194         struct btrfs_root *cur_root = NULL;
5195         struct btrfs_path path;
5196         struct btrfs_key key;
5197         struct extent_buffer *node;
5198         unsigned int ext_ref;
5199         int slot;
5200         int ret;
5201         int err = 0;
5202
5203         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5204
5205         btrfs_init_path(&path);
5206         key.objectid = BTRFS_FS_TREE_OBJECTID;
5207         key.offset = 0;
5208         key.type = BTRFS_ROOT_ITEM_KEY;
5209
5210         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5211         if (ret < 0) {
5212                 err = ret;
5213                 goto out;
5214         } else if (ret > 0) {
5215                 err = -ENOENT;
5216                 goto out;
5217         }
5218
5219         while (1) {
5220                 node = path.nodes[0];
5221                 slot = path.slots[0];
5222                 btrfs_item_key_to_cpu(node, &key, slot);
5223                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5224                         goto out;
5225                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5226                     fs_root_objectid(key.objectid)) {
5227                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5228                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5229                                                                        &key);
5230                         } else {
5231                                 key.offset = (u64)-1;
5232                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5233                         }
5234
5235                         if (IS_ERR(cur_root)) {
5236                                 error("Fail to read fs/subvol tree: %lld",
5237                                       key.objectid);
5238                                 err = -EIO;
5239                                 goto next;
5240                         }
5241
5242                         ret = check_fs_root_v2(cur_root, ext_ref);
5243                         err |= ret;
5244
5245                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5246                                 btrfs_free_fs_root(cur_root);
5247                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5248                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5249                         ret = check_root_ref(tree_root, &key, node, slot);
5250                         err |= ret;
5251                 }
5252 next:
5253                 ret = btrfs_next_item(tree_root, &path);
5254                 if (ret > 0)
5255                         goto out;
5256                 if (ret < 0) {
5257                         err = ret;
5258                         goto out;
5259                 }
5260         }
5261
5262 out:
5263         btrfs_release_path(&path);
5264         return err;
5265 }
5266
5267 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5268 {
5269         struct list_head *cur = rec->backrefs.next;
5270         struct extent_backref *back;
5271         struct tree_backref *tback;
5272         struct data_backref *dback;
5273         u64 found = 0;
5274         int err = 0;
5275
5276         while(cur != &rec->backrefs) {
5277                 back = to_extent_backref(cur);
5278                 cur = cur->next;
5279                 if (!back->found_extent_tree) {
5280                         err = 1;
5281                         if (!print_errs)
5282                                 goto out;
5283                         if (back->is_data) {
5284                                 dback = to_data_backref(back);
5285                                 fprintf(stderr, "Backref %llu %s %llu"
5286                                         " owner %llu offset %llu num_refs %lu"
5287                                         " not found in extent tree\n",
5288                                         (unsigned long long)rec->start,
5289                                         back->full_backref ?
5290                                         "parent" : "root",
5291                                         back->full_backref ?
5292                                         (unsigned long long)dback->parent:
5293                                         (unsigned long long)dback->root,
5294                                         (unsigned long long)dback->owner,
5295                                         (unsigned long long)dback->offset,
5296                                         (unsigned long)dback->num_refs);
5297                         } else {
5298                                 tback = to_tree_backref(back);
5299                                 fprintf(stderr, "Backref %llu parent %llu"
5300                                         " root %llu not found in extent tree\n",
5301                                         (unsigned long long)rec->start,
5302                                         (unsigned long long)tback->parent,
5303                                         (unsigned long long)tback->root);
5304                         }
5305                 }
5306                 if (!back->is_data && !back->found_ref) {
5307                         err = 1;
5308                         if (!print_errs)
5309                                 goto out;
5310                         tback = to_tree_backref(back);
5311                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5312                                 (unsigned long long)rec->start,
5313                                 back->full_backref ? "parent" : "root",
5314                                 back->full_backref ?
5315                                 (unsigned long long)tback->parent :
5316                                 (unsigned long long)tback->root, back);
5317                 }
5318                 if (back->is_data) {
5319                         dback = to_data_backref(back);
5320                         if (dback->found_ref != dback->num_refs) {
5321                                 err = 1;
5322                                 if (!print_errs)
5323                                         goto out;
5324                                 fprintf(stderr, "Incorrect local backref count"
5325                                         " on %llu %s %llu owner %llu"
5326                                         " offset %llu found %u wanted %u back %p\n",
5327                                         (unsigned long long)rec->start,
5328                                         back->full_backref ?
5329                                         "parent" : "root",
5330                                         back->full_backref ?
5331                                         (unsigned long long)dback->parent:
5332                                         (unsigned long long)dback->root,
5333                                         (unsigned long long)dback->owner,
5334                                         (unsigned long long)dback->offset,
5335                                         dback->found_ref, dback->num_refs, back);
5336                         }
5337                         if (dback->disk_bytenr != rec->start) {
5338                                 err = 1;
5339                                 if (!print_errs)
5340                                         goto out;
5341                                 fprintf(stderr, "Backref disk bytenr does not"
5342                                         " match extent record, bytenr=%llu, "
5343                                         "ref bytenr=%llu\n",
5344                                         (unsigned long long)rec->start,
5345                                         (unsigned long long)dback->disk_bytenr);
5346                         }
5347
5348                         if (dback->bytes != rec->nr) {
5349                                 err = 1;
5350                                 if (!print_errs)
5351                                         goto out;
5352                                 fprintf(stderr, "Backref bytes do not match "
5353                                         "extent backref, bytenr=%llu, ref "
5354                                         "bytes=%llu, backref bytes=%llu\n",
5355                                         (unsigned long long)rec->start,
5356                                         (unsigned long long)rec->nr,
5357                                         (unsigned long long)dback->bytes);
5358                         }
5359                 }
5360                 if (!back->is_data) {
5361                         found += 1;
5362                 } else {
5363                         dback = to_data_backref(back);
5364                         found += dback->found_ref;
5365                 }
5366         }
5367         if (found != rec->refs) {
5368                 err = 1;
5369                 if (!print_errs)
5370                         goto out;
5371                 fprintf(stderr, "Incorrect global backref count "
5372                         "on %llu found %llu wanted %llu\n",
5373                         (unsigned long long)rec->start,
5374                         (unsigned long long)found,
5375                         (unsigned long long)rec->refs);
5376         }
5377 out:
5378         return err;
5379 }
5380
5381 static int free_all_extent_backrefs(struct extent_record *rec)
5382 {
5383         struct extent_backref *back;
5384         struct list_head *cur;
5385         while (!list_empty(&rec->backrefs)) {
5386                 cur = rec->backrefs.next;
5387                 back = to_extent_backref(cur);
5388                 list_del(cur);
5389                 free(back);
5390         }
5391         return 0;
5392 }
5393
5394 static void free_extent_record_cache(struct cache_tree *extent_cache)
5395 {
5396         struct cache_extent *cache;
5397         struct extent_record *rec;
5398
5399         while (1) {
5400                 cache = first_cache_extent(extent_cache);
5401                 if (!cache)
5402                         break;
5403                 rec = container_of(cache, struct extent_record, cache);
5404                 remove_cache_extent(extent_cache, cache);
5405                 free_all_extent_backrefs(rec);
5406                 free(rec);
5407         }
5408 }
5409
5410 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5411                                  struct extent_record *rec)
5412 {
5413         if (rec->content_checked && rec->owner_ref_checked &&
5414             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5415             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5416             !rec->bad_full_backref && !rec->crossing_stripes &&
5417             !rec->wrong_chunk_type) {
5418                 remove_cache_extent(extent_cache, &rec->cache);
5419                 free_all_extent_backrefs(rec);
5420                 list_del_init(&rec->list);
5421                 free(rec);
5422         }
5423         return 0;
5424 }
5425
5426 static int check_owner_ref(struct btrfs_root *root,
5427                             struct extent_record *rec,
5428                             struct extent_buffer *buf)
5429 {
5430         struct extent_backref *node;
5431         struct tree_backref *back;
5432         struct btrfs_root *ref_root;
5433         struct btrfs_key key;
5434         struct btrfs_path path;
5435         struct extent_buffer *parent;
5436         int level;
5437         int found = 0;
5438         int ret;
5439
5440         list_for_each_entry(node, &rec->backrefs, list) {
5441                 if (node->is_data)
5442                         continue;
5443                 if (!node->found_ref)
5444                         continue;
5445                 if (node->full_backref)
5446                         continue;
5447                 back = to_tree_backref(node);
5448                 if (btrfs_header_owner(buf) == back->root)
5449                         return 0;
5450         }
5451         BUG_ON(rec->is_root);
5452
5453         /* try to find the block by search corresponding fs tree */
5454         key.objectid = btrfs_header_owner(buf);
5455         key.type = BTRFS_ROOT_ITEM_KEY;
5456         key.offset = (u64)-1;
5457
5458         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5459         if (IS_ERR(ref_root))
5460                 return 1;
5461
5462         level = btrfs_header_level(buf);
5463         if (level == 0)
5464                 btrfs_item_key_to_cpu(buf, &key, 0);
5465         else
5466                 btrfs_node_key_to_cpu(buf, &key, 0);
5467
5468         btrfs_init_path(&path);
5469         path.lowest_level = level + 1;
5470         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5471         if (ret < 0)
5472                 return 0;
5473
5474         parent = path.nodes[level + 1];
5475         if (parent && buf->start == btrfs_node_blockptr(parent,
5476                                                         path.slots[level + 1]))
5477                 found = 1;
5478
5479         btrfs_release_path(&path);
5480         return found ? 0 : 1;
5481 }
5482
5483 static int is_extent_tree_record(struct extent_record *rec)
5484 {
5485         struct list_head *cur = rec->backrefs.next;
5486         struct extent_backref *node;
5487         struct tree_backref *back;
5488         int is_extent = 0;
5489
5490         while(cur != &rec->backrefs) {
5491                 node = to_extent_backref(cur);
5492                 cur = cur->next;
5493                 if (node->is_data)
5494                         return 0;
5495                 back = to_tree_backref(node);
5496                 if (node->full_backref)
5497                         return 0;
5498                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5499                         is_extent = 1;
5500         }
5501         return is_extent;
5502 }
5503
5504
5505 static int record_bad_block_io(struct btrfs_fs_info *info,
5506                                struct cache_tree *extent_cache,
5507                                u64 start, u64 len)
5508 {
5509         struct extent_record *rec;
5510         struct cache_extent *cache;
5511         struct btrfs_key key;
5512
5513         cache = lookup_cache_extent(extent_cache, start, len);
5514         if (!cache)
5515                 return 0;
5516
5517         rec = container_of(cache, struct extent_record, cache);
5518         if (!is_extent_tree_record(rec))
5519                 return 0;
5520
5521         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5522         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5523 }
5524
5525 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5526                        struct extent_buffer *buf, int slot)
5527 {
5528         if (btrfs_header_level(buf)) {
5529                 struct btrfs_key_ptr ptr1, ptr2;
5530
5531                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5532                                    sizeof(struct btrfs_key_ptr));
5533                 read_extent_buffer(buf, &ptr2,
5534                                    btrfs_node_key_ptr_offset(slot + 1),
5535                                    sizeof(struct btrfs_key_ptr));
5536                 write_extent_buffer(buf, &ptr1,
5537                                     btrfs_node_key_ptr_offset(slot + 1),
5538                                     sizeof(struct btrfs_key_ptr));
5539                 write_extent_buffer(buf, &ptr2,
5540                                     btrfs_node_key_ptr_offset(slot),
5541                                     sizeof(struct btrfs_key_ptr));
5542                 if (slot == 0) {
5543                         struct btrfs_disk_key key;
5544                         btrfs_node_key(buf, &key, 0);
5545                         btrfs_fixup_low_keys(root, path, &key,
5546                                              btrfs_header_level(buf) + 1);
5547                 }
5548         } else {
5549                 struct btrfs_item *item1, *item2;
5550                 struct btrfs_key k1, k2;
5551                 char *item1_data, *item2_data;
5552                 u32 item1_offset, item2_offset, item1_size, item2_size;
5553
5554                 item1 = btrfs_item_nr(slot);
5555                 item2 = btrfs_item_nr(slot + 1);
5556                 btrfs_item_key_to_cpu(buf, &k1, slot);
5557                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5558                 item1_offset = btrfs_item_offset(buf, item1);
5559                 item2_offset = btrfs_item_offset(buf, item2);
5560                 item1_size = btrfs_item_size(buf, item1);
5561                 item2_size = btrfs_item_size(buf, item2);
5562
5563                 item1_data = malloc(item1_size);
5564                 if (!item1_data)
5565                         return -ENOMEM;
5566                 item2_data = malloc(item2_size);
5567                 if (!item2_data) {
5568                         free(item1_data);
5569                         return -ENOMEM;
5570                 }
5571
5572                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5573                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5574
5575                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5576                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5577                 free(item1_data);
5578                 free(item2_data);
5579
5580                 btrfs_set_item_offset(buf, item1, item2_offset);
5581                 btrfs_set_item_offset(buf, item2, item1_offset);
5582                 btrfs_set_item_size(buf, item1, item2_size);
5583                 btrfs_set_item_size(buf, item2, item1_size);
5584
5585                 path->slots[0] = slot;
5586                 btrfs_set_item_key_unsafe(root, path, &k2);
5587                 path->slots[0] = slot + 1;
5588                 btrfs_set_item_key_unsafe(root, path, &k1);
5589         }
5590         return 0;
5591 }
5592
5593 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5594 {
5595         struct extent_buffer *buf;
5596         struct btrfs_key k1, k2;
5597         int i;
5598         int level = path->lowest_level;
5599         int ret = -EIO;
5600
5601         buf = path->nodes[level];
5602         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5603                 if (level) {
5604                         btrfs_node_key_to_cpu(buf, &k1, i);
5605                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5606                 } else {
5607                         btrfs_item_key_to_cpu(buf, &k1, i);
5608                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5609                 }
5610                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5611                         continue;
5612                 ret = swap_values(root, path, buf, i);
5613                 if (ret)
5614                         break;
5615                 btrfs_mark_buffer_dirty(buf);
5616                 i = 0;
5617         }
5618         return ret;
5619 }
5620
5621 static int delete_bogus_item(struct btrfs_root *root,
5622                              struct btrfs_path *path,
5623                              struct extent_buffer *buf, int slot)
5624 {
5625         struct btrfs_key key;
5626         int nritems = btrfs_header_nritems(buf);
5627
5628         btrfs_item_key_to_cpu(buf, &key, slot);
5629
5630         /* These are all the keys we can deal with missing. */
5631         if (key.type != BTRFS_DIR_INDEX_KEY &&
5632             key.type != BTRFS_EXTENT_ITEM_KEY &&
5633             key.type != BTRFS_METADATA_ITEM_KEY &&
5634             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5635             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5636                 return -1;
5637
5638         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5639                (unsigned long long)key.objectid, key.type,
5640                (unsigned long long)key.offset, slot, buf->start);
5641         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5642                               btrfs_item_nr_offset(slot + 1),
5643                               sizeof(struct btrfs_item) *
5644                               (nritems - slot - 1));
5645         btrfs_set_header_nritems(buf, nritems - 1);
5646         if (slot == 0) {
5647                 struct btrfs_disk_key disk_key;
5648
5649                 btrfs_item_key(buf, &disk_key, 0);
5650                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5651         }
5652         btrfs_mark_buffer_dirty(buf);
5653         return 0;
5654 }
5655
5656 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5657 {
5658         struct extent_buffer *buf;
5659         int i;
5660         int ret = 0;
5661
5662         /* We should only get this for leaves */
5663         BUG_ON(path->lowest_level);
5664         buf = path->nodes[0];
5665 again:
5666         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5667                 unsigned int shift = 0, offset;
5668
5669                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5670                     BTRFS_LEAF_DATA_SIZE(root)) {
5671                         if (btrfs_item_end_nr(buf, i) >
5672                             BTRFS_LEAF_DATA_SIZE(root)) {
5673                                 ret = delete_bogus_item(root, path, buf, i);
5674                                 if (!ret)
5675                                         goto again;
5676                                 fprintf(stderr, "item is off the end of the "
5677                                         "leaf, can't fix\n");
5678                                 ret = -EIO;
5679                                 break;
5680                         }
5681                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5682                                 btrfs_item_end_nr(buf, i);
5683                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5684                            btrfs_item_offset_nr(buf, i - 1)) {
5685                         if (btrfs_item_end_nr(buf, i) >
5686                             btrfs_item_offset_nr(buf, i - 1)) {
5687                                 ret = delete_bogus_item(root, path, buf, i);
5688                                 if (!ret)
5689                                         goto again;
5690                                 fprintf(stderr, "items overlap, can't fix\n");
5691                                 ret = -EIO;
5692                                 break;
5693                         }
5694                         shift = btrfs_item_offset_nr(buf, i - 1) -
5695                                 btrfs_item_end_nr(buf, i);
5696                 }
5697                 if (!shift)
5698                         continue;
5699
5700                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5701                        i, shift, (unsigned long long)buf->start);
5702                 offset = btrfs_item_offset_nr(buf, i);
5703                 memmove_extent_buffer(buf,
5704                                       btrfs_leaf_data(buf) + offset + shift,
5705                                       btrfs_leaf_data(buf) + offset,
5706                                       btrfs_item_size_nr(buf, i));
5707                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5708                                       offset + shift);
5709                 btrfs_mark_buffer_dirty(buf);
5710         }
5711
5712         /*
5713          * We may have moved things, in which case we want to exit so we don't
5714          * write those changes out.  Once we have proper abort functionality in
5715          * progs this can be changed to something nicer.
5716          */
5717         BUG_ON(ret);
5718         return ret;
5719 }
5720
5721 /*
5722  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5723  * then just return -EIO.
5724  */
5725 static int try_to_fix_bad_block(struct btrfs_root *root,
5726                                 struct extent_buffer *buf,
5727                                 enum btrfs_tree_block_status status)
5728 {
5729         struct btrfs_trans_handle *trans;
5730         struct ulist *roots;
5731         struct ulist_node *node;
5732         struct btrfs_root *search_root;
5733         struct btrfs_path path;
5734         struct ulist_iterator iter;
5735         struct btrfs_key root_key, key;
5736         int ret;
5737
5738         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5739             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5740                 return -EIO;
5741
5742         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5743         if (ret)
5744                 return -EIO;
5745
5746         btrfs_init_path(&path);
5747         ULIST_ITER_INIT(&iter);
5748         while ((node = ulist_next(roots, &iter))) {
5749                 root_key.objectid = node->val;
5750                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5751                 root_key.offset = (u64)-1;
5752
5753                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5754                 if (IS_ERR(root)) {
5755                         ret = -EIO;
5756                         break;
5757                 }
5758
5759
5760                 trans = btrfs_start_transaction(search_root, 0);
5761                 if (IS_ERR(trans)) {
5762                         ret = PTR_ERR(trans);
5763                         break;
5764                 }
5765
5766                 path.lowest_level = btrfs_header_level(buf);
5767                 path.skip_check_block = 1;
5768                 if (path.lowest_level)
5769                         btrfs_node_key_to_cpu(buf, &key, 0);
5770                 else
5771                         btrfs_item_key_to_cpu(buf, &key, 0);
5772                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5773                 if (ret) {
5774                         ret = -EIO;
5775                         btrfs_commit_transaction(trans, search_root);
5776                         break;
5777                 }
5778                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5779                         ret = fix_key_order(search_root, &path);
5780                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5781                         ret = fix_item_offset(search_root, &path);
5782                 if (ret) {
5783                         btrfs_commit_transaction(trans, search_root);
5784                         break;
5785                 }
5786                 btrfs_release_path(&path);
5787                 btrfs_commit_transaction(trans, search_root);
5788         }
5789         ulist_free(roots);
5790         btrfs_release_path(&path);
5791         return ret;
5792 }
5793
5794 static int check_block(struct btrfs_root *root,
5795                        struct cache_tree *extent_cache,
5796                        struct extent_buffer *buf, u64 flags)
5797 {
5798         struct extent_record *rec;
5799         struct cache_extent *cache;
5800         struct btrfs_key key;
5801         enum btrfs_tree_block_status status;
5802         int ret = 0;
5803         int level;
5804
5805         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5806         if (!cache)
5807                 return 1;
5808         rec = container_of(cache, struct extent_record, cache);
5809         rec->generation = btrfs_header_generation(buf);
5810
5811         level = btrfs_header_level(buf);
5812         if (btrfs_header_nritems(buf) > 0) {
5813
5814                 if (level == 0)
5815                         btrfs_item_key_to_cpu(buf, &key, 0);
5816                 else
5817                         btrfs_node_key_to_cpu(buf, &key, 0);
5818
5819                 rec->info_objectid = key.objectid;
5820         }
5821         rec->info_level = level;
5822
5823         if (btrfs_is_leaf(buf))
5824                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5825         else
5826                 status = btrfs_check_node(root, &rec->parent_key, buf);
5827
5828         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5829                 if (repair)
5830                         status = try_to_fix_bad_block(root, buf, status);
5831                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5832                         ret = -EIO;
5833                         fprintf(stderr, "bad block %llu\n",
5834                                 (unsigned long long)buf->start);
5835                 } else {
5836                         /*
5837                          * Signal to callers we need to start the scan over
5838                          * again since we'll have cowed blocks.
5839                          */
5840                         ret = -EAGAIN;
5841                 }
5842         } else {
5843                 rec->content_checked = 1;
5844                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5845                         rec->owner_ref_checked = 1;
5846                 else {
5847                         ret = check_owner_ref(root, rec, buf);
5848                         if (!ret)
5849                                 rec->owner_ref_checked = 1;
5850                 }
5851         }
5852         if (!ret)
5853                 maybe_free_extent_rec(extent_cache, rec);
5854         return ret;
5855 }
5856
5857 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5858                                                 u64 parent, u64 root)
5859 {
5860         struct list_head *cur = rec->backrefs.next;
5861         struct extent_backref *node;
5862         struct tree_backref *back;
5863
5864         while(cur != &rec->backrefs) {
5865                 node = to_extent_backref(cur);
5866                 cur = cur->next;
5867                 if (node->is_data)
5868                         continue;
5869                 back = to_tree_backref(node);
5870                 if (parent > 0) {
5871                         if (!node->full_backref)
5872                                 continue;
5873                         if (parent == back->parent)
5874                                 return back;
5875                 } else {
5876                         if (node->full_backref)
5877                                 continue;
5878                         if (back->root == root)
5879                                 return back;
5880                 }
5881         }
5882         return NULL;
5883 }
5884
5885 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5886                                                 u64 parent, u64 root)
5887 {
5888         struct tree_backref *ref = malloc(sizeof(*ref));
5889
5890         if (!ref)
5891                 return NULL;
5892         memset(&ref->node, 0, sizeof(ref->node));
5893         if (parent > 0) {
5894                 ref->parent = parent;
5895                 ref->node.full_backref = 1;
5896         } else {
5897                 ref->root = root;
5898                 ref->node.full_backref = 0;
5899         }
5900         list_add_tail(&ref->node.list, &rec->backrefs);
5901
5902         return ref;
5903 }
5904
5905 static struct data_backref *find_data_backref(struct extent_record *rec,
5906                                                 u64 parent, u64 root,
5907                                                 u64 owner, u64 offset,
5908                                                 int found_ref,
5909                                                 u64 disk_bytenr, u64 bytes)
5910 {
5911         struct list_head *cur = rec->backrefs.next;
5912         struct extent_backref *node;
5913         struct data_backref *back;
5914
5915         while(cur != &rec->backrefs) {
5916                 node = to_extent_backref(cur);
5917                 cur = cur->next;
5918                 if (!node->is_data)
5919                         continue;
5920                 back = to_data_backref(node);
5921                 if (parent > 0) {
5922                         if (!node->full_backref)
5923                                 continue;
5924                         if (parent == back->parent)
5925                                 return back;
5926                 } else {
5927                         if (node->full_backref)
5928                                 continue;
5929                         if (back->root == root && back->owner == owner &&
5930                             back->offset == offset) {
5931                                 if (found_ref && node->found_ref &&
5932                                     (back->bytes != bytes ||
5933                                     back->disk_bytenr != disk_bytenr))
5934                                         continue;
5935                                 return back;
5936                         }
5937                 }
5938         }
5939         return NULL;
5940 }
5941
5942 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5943                                                 u64 parent, u64 root,
5944                                                 u64 owner, u64 offset,
5945                                                 u64 max_size)
5946 {
5947         struct data_backref *ref = malloc(sizeof(*ref));
5948
5949         if (!ref)
5950                 return NULL;
5951         memset(&ref->node, 0, sizeof(ref->node));
5952         ref->node.is_data = 1;
5953
5954         if (parent > 0) {
5955                 ref->parent = parent;
5956                 ref->owner = 0;
5957                 ref->offset = 0;
5958                 ref->node.full_backref = 1;
5959         } else {
5960                 ref->root = root;
5961                 ref->owner = owner;
5962                 ref->offset = offset;
5963                 ref->node.full_backref = 0;
5964         }
5965         ref->bytes = max_size;
5966         ref->found_ref = 0;
5967         ref->num_refs = 0;
5968         list_add_tail(&ref->node.list, &rec->backrefs);
5969         if (max_size > rec->max_size)
5970                 rec->max_size = max_size;
5971         return ref;
5972 }
5973
5974 /* Check if the type of extent matches with its chunk */
5975 static void check_extent_type(struct extent_record *rec)
5976 {
5977         struct btrfs_block_group_cache *bg_cache;
5978
5979         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5980         if (!bg_cache)
5981                 return;
5982
5983         /* data extent, check chunk directly*/
5984         if (!rec->metadata) {
5985                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5986                         rec->wrong_chunk_type = 1;
5987                 return;
5988         }
5989
5990         /* metadata extent, check the obvious case first */
5991         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5992                                  BTRFS_BLOCK_GROUP_METADATA))) {
5993                 rec->wrong_chunk_type = 1;
5994                 return;
5995         }
5996
5997         /*
5998          * Check SYSTEM extent, as it's also marked as metadata, we can only
5999          * make sure it's a SYSTEM extent by its backref
6000          */
6001         if (!list_empty(&rec->backrefs)) {
6002                 struct extent_backref *node;
6003                 struct tree_backref *tback;
6004                 u64 bg_type;
6005
6006                 node = to_extent_backref(rec->backrefs.next);
6007                 if (node->is_data) {
6008                         /* tree block shouldn't have data backref */
6009                         rec->wrong_chunk_type = 1;
6010                         return;
6011                 }
6012                 tback = container_of(node, struct tree_backref, node);
6013
6014                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6015                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6016                 else
6017                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6018                 if (!(bg_cache->flags & bg_type))
6019                         rec->wrong_chunk_type = 1;
6020         }
6021 }
6022
6023 /*
6024  * Allocate a new extent record, fill default values from @tmpl and insert int
6025  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6026  * the cache, otherwise it fails.
6027  */
6028 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6029                 struct extent_record *tmpl)
6030 {
6031         struct extent_record *rec;
6032         int ret = 0;
6033
6034         BUG_ON(tmpl->max_size == 0);
6035         rec = malloc(sizeof(*rec));
6036         if (!rec)
6037                 return -ENOMEM;
6038         rec->start = tmpl->start;
6039         rec->max_size = tmpl->max_size;
6040         rec->nr = max(tmpl->nr, tmpl->max_size);
6041         rec->found_rec = tmpl->found_rec;
6042         rec->content_checked = tmpl->content_checked;
6043         rec->owner_ref_checked = tmpl->owner_ref_checked;
6044         rec->num_duplicates = 0;
6045         rec->metadata = tmpl->metadata;
6046         rec->flag_block_full_backref = FLAG_UNSET;
6047         rec->bad_full_backref = 0;
6048         rec->crossing_stripes = 0;
6049         rec->wrong_chunk_type = 0;
6050         rec->is_root = tmpl->is_root;
6051         rec->refs = tmpl->refs;
6052         rec->extent_item_refs = tmpl->extent_item_refs;
6053         rec->parent_generation = tmpl->parent_generation;
6054         INIT_LIST_HEAD(&rec->backrefs);
6055         INIT_LIST_HEAD(&rec->dups);
6056         INIT_LIST_HEAD(&rec->list);
6057         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6058         rec->cache.start = tmpl->start;
6059         rec->cache.size = tmpl->nr;
6060         ret = insert_cache_extent(extent_cache, &rec->cache);
6061         if (ret) {
6062                 free(rec);
6063                 return ret;
6064         }
6065         bytes_used += rec->nr;
6066
6067         if (tmpl->metadata)
6068                 rec->crossing_stripes = check_crossing_stripes(global_info,
6069                                 rec->start, global_info->tree_root->nodesize);
6070         check_extent_type(rec);
6071         return ret;
6072 }
6073
6074 /*
6075  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6076  * some are hints:
6077  * - refs              - if found, increase refs
6078  * - is_root           - if found, set
6079  * - content_checked   - if found, set
6080  * - owner_ref_checked - if found, set
6081  *
6082  * If not found, create a new one, initialize and insert.
6083  */
6084 static int add_extent_rec(struct cache_tree *extent_cache,
6085                 struct extent_record *tmpl)
6086 {
6087         struct extent_record *rec;
6088         struct cache_extent *cache;
6089         int ret = 0;
6090         int dup = 0;
6091
6092         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6093         if (cache) {
6094                 rec = container_of(cache, struct extent_record, cache);
6095                 if (tmpl->refs)
6096                         rec->refs++;
6097                 if (rec->nr == 1)
6098                         rec->nr = max(tmpl->nr, tmpl->max_size);
6099
6100                 /*
6101                  * We need to make sure to reset nr to whatever the extent
6102                  * record says was the real size, this way we can compare it to
6103                  * the backrefs.
6104                  */
6105                 if (tmpl->found_rec) {
6106                         if (tmpl->start != rec->start || rec->found_rec) {
6107                                 struct extent_record *tmp;
6108
6109                                 dup = 1;
6110                                 if (list_empty(&rec->list))
6111                                         list_add_tail(&rec->list,
6112                                                       &duplicate_extents);
6113
6114                                 /*
6115                                  * We have to do this song and dance in case we
6116                                  * find an extent record that falls inside of
6117                                  * our current extent record but does not have
6118                                  * the same objectid.
6119                                  */
6120                                 tmp = malloc(sizeof(*tmp));
6121                                 if (!tmp)
6122                                         return -ENOMEM;
6123                                 tmp->start = tmpl->start;
6124                                 tmp->max_size = tmpl->max_size;
6125                                 tmp->nr = tmpl->nr;
6126                                 tmp->found_rec = 1;
6127                                 tmp->metadata = tmpl->metadata;
6128                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6129                                 INIT_LIST_HEAD(&tmp->list);
6130                                 list_add_tail(&tmp->list, &rec->dups);
6131                                 rec->num_duplicates++;
6132                         } else {
6133                                 rec->nr = tmpl->nr;
6134                                 rec->found_rec = 1;
6135                         }
6136                 }
6137
6138                 if (tmpl->extent_item_refs && !dup) {
6139                         if (rec->extent_item_refs) {
6140                                 fprintf(stderr, "block %llu rec "
6141                                         "extent_item_refs %llu, passed %llu\n",
6142                                         (unsigned long long)tmpl->start,
6143                                         (unsigned long long)
6144                                                         rec->extent_item_refs,
6145                                         (unsigned long long)tmpl->extent_item_refs);
6146                         }
6147                         rec->extent_item_refs = tmpl->extent_item_refs;
6148                 }
6149                 if (tmpl->is_root)
6150                         rec->is_root = 1;
6151                 if (tmpl->content_checked)
6152                         rec->content_checked = 1;
6153                 if (tmpl->owner_ref_checked)
6154                         rec->owner_ref_checked = 1;
6155                 memcpy(&rec->parent_key, &tmpl->parent_key,
6156                                 sizeof(tmpl->parent_key));
6157                 if (tmpl->parent_generation)
6158                         rec->parent_generation = tmpl->parent_generation;
6159                 if (rec->max_size < tmpl->max_size)
6160                         rec->max_size = tmpl->max_size;
6161
6162                 /*
6163                  * A metadata extent can't cross stripe_len boundary, otherwise
6164                  * kernel scrub won't be able to handle it.
6165                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6166                  * it.
6167                  */
6168                 if (tmpl->metadata)
6169                         rec->crossing_stripes = check_crossing_stripes(
6170                                         global_info, rec->start,
6171                                         global_info->tree_root->nodesize);
6172                 check_extent_type(rec);
6173                 maybe_free_extent_rec(extent_cache, rec);
6174                 return ret;
6175         }
6176
6177         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6178
6179         return ret;
6180 }
6181
6182 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6183                             u64 parent, u64 root, int found_ref)
6184 {
6185         struct extent_record *rec;
6186         struct tree_backref *back;
6187         struct cache_extent *cache;
6188         int ret;
6189
6190         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6191         if (!cache) {
6192                 struct extent_record tmpl;
6193
6194                 memset(&tmpl, 0, sizeof(tmpl));
6195                 tmpl.start = bytenr;
6196                 tmpl.nr = 1;
6197                 tmpl.metadata = 1;
6198                 tmpl.max_size = 1;
6199
6200                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6201                 if (ret)
6202                         return ret;
6203
6204                 /* really a bug in cache_extent implement now */
6205                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6206                 if (!cache)
6207                         return -ENOENT;
6208         }
6209
6210         rec = container_of(cache, struct extent_record, cache);
6211         if (rec->start != bytenr) {
6212                 /*
6213                  * Several cause, from unaligned bytenr to over lapping extents
6214                  */
6215                 return -EEXIST;
6216         }
6217
6218         back = find_tree_backref(rec, parent, root);
6219         if (!back) {
6220                 back = alloc_tree_backref(rec, parent, root);
6221                 if (!back)
6222                         return -ENOMEM;
6223         }
6224
6225         if (found_ref) {
6226                 if (back->node.found_ref) {
6227                         fprintf(stderr, "Extent back ref already exists "
6228                                 "for %llu parent %llu root %llu \n",
6229                                 (unsigned long long)bytenr,
6230                                 (unsigned long long)parent,
6231                                 (unsigned long long)root);
6232                 }
6233                 back->node.found_ref = 1;
6234         } else {
6235                 if (back->node.found_extent_tree) {
6236                         fprintf(stderr, "Extent back ref already exists "
6237                                 "for %llu parent %llu root %llu \n",
6238                                 (unsigned long long)bytenr,
6239                                 (unsigned long long)parent,
6240                                 (unsigned long long)root);
6241                 }
6242                 back->node.found_extent_tree = 1;
6243         }
6244         check_extent_type(rec);
6245         maybe_free_extent_rec(extent_cache, rec);
6246         return 0;
6247 }
6248
6249 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6250                             u64 parent, u64 root, u64 owner, u64 offset,
6251                             u32 num_refs, int found_ref, u64 max_size)
6252 {
6253         struct extent_record *rec;
6254         struct data_backref *back;
6255         struct cache_extent *cache;
6256         int ret;
6257
6258         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6259         if (!cache) {
6260                 struct extent_record tmpl;
6261
6262                 memset(&tmpl, 0, sizeof(tmpl));
6263                 tmpl.start = bytenr;
6264                 tmpl.nr = 1;
6265                 tmpl.max_size = max_size;
6266
6267                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6268                 if (ret)
6269                         return ret;
6270
6271                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6272                 if (!cache)
6273                         abort();
6274         }
6275
6276         rec = container_of(cache, struct extent_record, cache);
6277         if (rec->max_size < max_size)
6278                 rec->max_size = max_size;
6279
6280         /*
6281          * If found_ref is set then max_size is the real size and must match the
6282          * existing refs.  So if we have already found a ref then we need to
6283          * make sure that this ref matches the existing one, otherwise we need
6284          * to add a new backref so we can notice that the backrefs don't match
6285          * and we need to figure out who is telling the truth.  This is to
6286          * account for that awful fsync bug I introduced where we'd end up with
6287          * a btrfs_file_extent_item that would have its length include multiple
6288          * prealloc extents or point inside of a prealloc extent.
6289          */
6290         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6291                                  bytenr, max_size);
6292         if (!back) {
6293                 back = alloc_data_backref(rec, parent, root, owner, offset,
6294                                           max_size);
6295                 BUG_ON(!back);
6296         }
6297
6298         if (found_ref) {
6299                 BUG_ON(num_refs != 1);
6300                 if (back->node.found_ref)
6301                         BUG_ON(back->bytes != max_size);
6302                 back->node.found_ref = 1;
6303                 back->found_ref += 1;
6304                 back->bytes = max_size;
6305                 back->disk_bytenr = bytenr;
6306                 rec->refs += 1;
6307                 rec->content_checked = 1;
6308                 rec->owner_ref_checked = 1;
6309         } else {
6310                 if (back->node.found_extent_tree) {
6311                         fprintf(stderr, "Extent back ref already exists "
6312                                 "for %llu parent %llu root %llu "
6313                                 "owner %llu offset %llu num_refs %lu\n",
6314                                 (unsigned long long)bytenr,
6315                                 (unsigned long long)parent,
6316                                 (unsigned long long)root,
6317                                 (unsigned long long)owner,
6318                                 (unsigned long long)offset,
6319                                 (unsigned long)num_refs);
6320                 }
6321                 back->num_refs = num_refs;
6322                 back->node.found_extent_tree = 1;
6323         }
6324         maybe_free_extent_rec(extent_cache, rec);
6325         return 0;
6326 }
6327
6328 static int add_pending(struct cache_tree *pending,
6329                        struct cache_tree *seen, u64 bytenr, u32 size)
6330 {
6331         int ret;
6332         ret = add_cache_extent(seen, bytenr, size);
6333         if (ret)
6334                 return ret;
6335         add_cache_extent(pending, bytenr, size);
6336         return 0;
6337 }
6338
6339 static int pick_next_pending(struct cache_tree *pending,
6340                         struct cache_tree *reada,
6341                         struct cache_tree *nodes,
6342                         u64 last, struct block_info *bits, int bits_nr,
6343                         int *reada_bits)
6344 {
6345         unsigned long node_start = last;
6346         struct cache_extent *cache;
6347         int ret;
6348
6349         cache = search_cache_extent(reada, 0);
6350         if (cache) {
6351                 bits[0].start = cache->start;
6352                 bits[0].size = cache->size;
6353                 *reada_bits = 1;
6354                 return 1;
6355         }
6356         *reada_bits = 0;
6357         if (node_start > 32768)
6358                 node_start -= 32768;
6359
6360         cache = search_cache_extent(nodes, node_start);
6361         if (!cache)
6362                 cache = search_cache_extent(nodes, 0);
6363
6364         if (!cache) {
6365                  cache = search_cache_extent(pending, 0);
6366                  if (!cache)
6367                          return 0;
6368                  ret = 0;
6369                  do {
6370                          bits[ret].start = cache->start;
6371                          bits[ret].size = cache->size;
6372                          cache = next_cache_extent(cache);
6373                          ret++;
6374                  } while (cache && ret < bits_nr);
6375                  return ret;
6376         }
6377
6378         ret = 0;
6379         do {
6380                 bits[ret].start = cache->start;
6381                 bits[ret].size = cache->size;
6382                 cache = next_cache_extent(cache);
6383                 ret++;
6384         } while (cache && ret < bits_nr);
6385
6386         if (bits_nr - ret > 8) {
6387                 u64 lookup = bits[0].start + bits[0].size;
6388                 struct cache_extent *next;
6389                 next = search_cache_extent(pending, lookup);
6390                 while(next) {
6391                         if (next->start - lookup > 32768)
6392                                 break;
6393                         bits[ret].start = next->start;
6394                         bits[ret].size = next->size;
6395                         lookup = next->start + next->size;
6396                         ret++;
6397                         if (ret == bits_nr)
6398                                 break;
6399                         next = next_cache_extent(next);
6400                         if (!next)
6401                                 break;
6402                 }
6403         }
6404         return ret;
6405 }
6406
6407 static void free_chunk_record(struct cache_extent *cache)
6408 {
6409         struct chunk_record *rec;
6410
6411         rec = container_of(cache, struct chunk_record, cache);
6412         list_del_init(&rec->list);
6413         list_del_init(&rec->dextents);
6414         free(rec);
6415 }
6416
6417 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6418 {
6419         cache_tree_free_extents(chunk_cache, free_chunk_record);
6420 }
6421
6422 static void free_device_record(struct rb_node *node)
6423 {
6424         struct device_record *rec;
6425
6426         rec = container_of(node, struct device_record, node);
6427         free(rec);
6428 }
6429
6430 FREE_RB_BASED_TREE(device_cache, free_device_record);
6431
6432 int insert_block_group_record(struct block_group_tree *tree,
6433                               struct block_group_record *bg_rec)
6434 {
6435         int ret;
6436
6437         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6438         if (ret)
6439                 return ret;
6440
6441         list_add_tail(&bg_rec->list, &tree->block_groups);
6442         return 0;
6443 }
6444
6445 static void free_block_group_record(struct cache_extent *cache)
6446 {
6447         struct block_group_record *rec;
6448
6449         rec = container_of(cache, struct block_group_record, cache);
6450         list_del_init(&rec->list);
6451         free(rec);
6452 }
6453
6454 void free_block_group_tree(struct block_group_tree *tree)
6455 {
6456         cache_tree_free_extents(&tree->tree, free_block_group_record);
6457 }
6458
6459 int insert_device_extent_record(struct device_extent_tree *tree,
6460                                 struct device_extent_record *de_rec)
6461 {
6462         int ret;
6463
6464         /*
6465          * Device extent is a bit different from the other extents, because
6466          * the extents which belong to the different devices may have the
6467          * same start and size, so we need use the special extent cache
6468          * search/insert functions.
6469          */
6470         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6471         if (ret)
6472                 return ret;
6473
6474         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6475         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6476         return 0;
6477 }
6478
6479 static void free_device_extent_record(struct cache_extent *cache)
6480 {
6481         struct device_extent_record *rec;
6482
6483         rec = container_of(cache, struct device_extent_record, cache);
6484         if (!list_empty(&rec->chunk_list))
6485                 list_del_init(&rec->chunk_list);
6486         if (!list_empty(&rec->device_list))
6487                 list_del_init(&rec->device_list);
6488         free(rec);
6489 }
6490
6491 void free_device_extent_tree(struct device_extent_tree *tree)
6492 {
6493         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6494 }
6495
6496 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6497 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6498                                  struct extent_buffer *leaf, int slot)
6499 {
6500         struct btrfs_extent_ref_v0 *ref0;
6501         struct btrfs_key key;
6502         int ret;
6503
6504         btrfs_item_key_to_cpu(leaf, &key, slot);
6505         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6506         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6507                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6508                                 0, 0);
6509         } else {
6510                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6511                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6512         }
6513         return ret;
6514 }
6515 #endif
6516
6517 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6518                                             struct btrfs_key *key,
6519                                             int slot)
6520 {
6521         struct btrfs_chunk *ptr;
6522         struct chunk_record *rec;
6523         int num_stripes, i;
6524
6525         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6526         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6527
6528         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6529         if (!rec) {
6530                 fprintf(stderr, "memory allocation failed\n");
6531                 exit(-1);
6532         }
6533
6534         INIT_LIST_HEAD(&rec->list);
6535         INIT_LIST_HEAD(&rec->dextents);
6536         rec->bg_rec = NULL;
6537
6538         rec->cache.start = key->offset;
6539         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6540
6541         rec->generation = btrfs_header_generation(leaf);
6542
6543         rec->objectid = key->objectid;
6544         rec->type = key->type;
6545         rec->offset = key->offset;
6546
6547         rec->length = rec->cache.size;
6548         rec->owner = btrfs_chunk_owner(leaf, ptr);
6549         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6550         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6551         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6552         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6553         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6554         rec->num_stripes = num_stripes;
6555         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6556
6557         for (i = 0; i < rec->num_stripes; ++i) {
6558                 rec->stripes[i].devid =
6559                         btrfs_stripe_devid_nr(leaf, ptr, i);
6560                 rec->stripes[i].offset =
6561                         btrfs_stripe_offset_nr(leaf, ptr, i);
6562                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6563                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6564                                 BTRFS_UUID_SIZE);
6565         }
6566
6567         return rec;
6568 }
6569
6570 static int process_chunk_item(struct cache_tree *chunk_cache,
6571                               struct btrfs_key *key, struct extent_buffer *eb,
6572                               int slot)
6573 {
6574         struct chunk_record *rec;
6575         struct btrfs_chunk *chunk;
6576         int ret = 0;
6577
6578         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6579         /*
6580          * Do extra check for this chunk item,
6581          *
6582          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6583          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6584          * and owner<->key_type check.
6585          */
6586         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6587                                       key->offset);
6588         if (ret < 0) {
6589                 error("chunk(%llu, %llu) is not valid, ignore it",
6590                       key->offset, btrfs_chunk_length(eb, chunk));
6591                 return 0;
6592         }
6593         rec = btrfs_new_chunk_record(eb, key, slot);
6594         ret = insert_cache_extent(chunk_cache, &rec->cache);
6595         if (ret) {
6596                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6597                         rec->offset, rec->length);
6598                 free(rec);
6599         }
6600
6601         return ret;
6602 }
6603
6604 static int process_device_item(struct rb_root *dev_cache,
6605                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6606 {
6607         struct btrfs_dev_item *ptr;
6608         struct device_record *rec;
6609         int ret = 0;
6610
6611         ptr = btrfs_item_ptr(eb,
6612                 slot, struct btrfs_dev_item);
6613
6614         rec = malloc(sizeof(*rec));
6615         if (!rec) {
6616                 fprintf(stderr, "memory allocation failed\n");
6617                 return -ENOMEM;
6618         }
6619
6620         rec->devid = key->offset;
6621         rec->generation = btrfs_header_generation(eb);
6622
6623         rec->objectid = key->objectid;
6624         rec->type = key->type;
6625         rec->offset = key->offset;
6626
6627         rec->devid = btrfs_device_id(eb, ptr);
6628         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6629         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6630
6631         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6632         if (ret) {
6633                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6634                 free(rec);
6635         }
6636
6637         return ret;
6638 }
6639
6640 struct block_group_record *
6641 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6642                              int slot)
6643 {
6644         struct btrfs_block_group_item *ptr;
6645         struct block_group_record *rec;
6646
6647         rec = calloc(1, sizeof(*rec));
6648         if (!rec) {
6649                 fprintf(stderr, "memory allocation failed\n");
6650                 exit(-1);
6651         }
6652
6653         rec->cache.start = key->objectid;
6654         rec->cache.size = key->offset;
6655
6656         rec->generation = btrfs_header_generation(leaf);
6657
6658         rec->objectid = key->objectid;
6659         rec->type = key->type;
6660         rec->offset = key->offset;
6661
6662         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6663         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6664
6665         INIT_LIST_HEAD(&rec->list);
6666
6667         return rec;
6668 }
6669
6670 static int process_block_group_item(struct block_group_tree *block_group_cache,
6671                                     struct btrfs_key *key,
6672                                     struct extent_buffer *eb, int slot)
6673 {
6674         struct block_group_record *rec;
6675         int ret = 0;
6676
6677         rec = btrfs_new_block_group_record(eb, key, slot);
6678         ret = insert_block_group_record(block_group_cache, rec);
6679         if (ret) {
6680                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6681                         rec->objectid, rec->offset);
6682                 free(rec);
6683         }
6684
6685         return ret;
6686 }
6687
6688 struct device_extent_record *
6689 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6690                                struct btrfs_key *key, int slot)
6691 {
6692         struct device_extent_record *rec;
6693         struct btrfs_dev_extent *ptr;
6694
6695         rec = calloc(1, sizeof(*rec));
6696         if (!rec) {
6697                 fprintf(stderr, "memory allocation failed\n");
6698                 exit(-1);
6699         }
6700
6701         rec->cache.objectid = key->objectid;
6702         rec->cache.start = key->offset;
6703
6704         rec->generation = btrfs_header_generation(leaf);
6705
6706         rec->objectid = key->objectid;
6707         rec->type = key->type;
6708         rec->offset = key->offset;
6709
6710         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6711         rec->chunk_objecteid =
6712                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6713         rec->chunk_offset =
6714                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6715         rec->length = btrfs_dev_extent_length(leaf, ptr);
6716         rec->cache.size = rec->length;
6717
6718         INIT_LIST_HEAD(&rec->chunk_list);
6719         INIT_LIST_HEAD(&rec->device_list);
6720
6721         return rec;
6722 }
6723
6724 static int
6725 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6726                            struct btrfs_key *key, struct extent_buffer *eb,
6727                            int slot)
6728 {
6729         struct device_extent_record *rec;
6730         int ret;
6731
6732         rec = btrfs_new_device_extent_record(eb, key, slot);
6733         ret = insert_device_extent_record(dev_extent_cache, rec);
6734         if (ret) {
6735                 fprintf(stderr,
6736                         "Device extent[%llu, %llu, %llu] existed.\n",
6737                         rec->objectid, rec->offset, rec->length);
6738                 free(rec);
6739         }
6740
6741         return ret;
6742 }
6743
6744 static int process_extent_item(struct btrfs_root *root,
6745                                struct cache_tree *extent_cache,
6746                                struct extent_buffer *eb, int slot)
6747 {
6748         struct btrfs_extent_item *ei;
6749         struct btrfs_extent_inline_ref *iref;
6750         struct btrfs_extent_data_ref *dref;
6751         struct btrfs_shared_data_ref *sref;
6752         struct btrfs_key key;
6753         struct extent_record tmpl;
6754         unsigned long end;
6755         unsigned long ptr;
6756         int ret;
6757         int type;
6758         u32 item_size = btrfs_item_size_nr(eb, slot);
6759         u64 refs = 0;
6760         u64 offset;
6761         u64 num_bytes;
6762         int metadata = 0;
6763
6764         btrfs_item_key_to_cpu(eb, &key, slot);
6765
6766         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6767                 metadata = 1;
6768                 num_bytes = root->nodesize;
6769         } else {
6770                 num_bytes = key.offset;
6771         }
6772
6773         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6774                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6775                       key.objectid, root->sectorsize);
6776                 return -EIO;
6777         }
6778         if (item_size < sizeof(*ei)) {
6779 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6780                 struct btrfs_extent_item_v0 *ei0;
6781                 BUG_ON(item_size != sizeof(*ei0));
6782                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6783                 refs = btrfs_extent_refs_v0(eb, ei0);
6784 #else
6785                 BUG();
6786 #endif
6787                 memset(&tmpl, 0, sizeof(tmpl));
6788                 tmpl.start = key.objectid;
6789                 tmpl.nr = num_bytes;
6790                 tmpl.extent_item_refs = refs;
6791                 tmpl.metadata = metadata;
6792                 tmpl.found_rec = 1;
6793                 tmpl.max_size = num_bytes;
6794
6795                 return add_extent_rec(extent_cache, &tmpl);
6796         }
6797
6798         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6799         refs = btrfs_extent_refs(eb, ei);
6800         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6801                 metadata = 1;
6802         else
6803                 metadata = 0;
6804         if (metadata && num_bytes != root->nodesize) {
6805                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6806                       num_bytes, root->nodesize);
6807                 return -EIO;
6808         }
6809         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6810                 error("ignore invalid data extent, length %llu is not aligned to %u",
6811                       num_bytes, root->sectorsize);
6812                 return -EIO;
6813         }
6814
6815         memset(&tmpl, 0, sizeof(tmpl));
6816         tmpl.start = key.objectid;
6817         tmpl.nr = num_bytes;
6818         tmpl.extent_item_refs = refs;
6819         tmpl.metadata = metadata;
6820         tmpl.found_rec = 1;
6821         tmpl.max_size = num_bytes;
6822         add_extent_rec(extent_cache, &tmpl);
6823
6824         ptr = (unsigned long)(ei + 1);
6825         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6826             key.type == BTRFS_EXTENT_ITEM_KEY)
6827                 ptr += sizeof(struct btrfs_tree_block_info);
6828
6829         end = (unsigned long)ei + item_size;
6830         while (ptr < end) {
6831                 iref = (struct btrfs_extent_inline_ref *)ptr;
6832                 type = btrfs_extent_inline_ref_type(eb, iref);
6833                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6834                 switch (type) {
6835                 case BTRFS_TREE_BLOCK_REF_KEY:
6836                         ret = add_tree_backref(extent_cache, key.objectid,
6837                                         0, offset, 0);
6838                         if (ret < 0)
6839                                 error(
6840                         "add_tree_backref failed (extent items tree block): %s",
6841                                       strerror(-ret));
6842                         break;
6843                 case BTRFS_SHARED_BLOCK_REF_KEY:
6844                         ret = add_tree_backref(extent_cache, key.objectid,
6845                                         offset, 0, 0);
6846                         if (ret < 0)
6847                                 error(
6848                         "add_tree_backref failed (extent items shared block): %s",
6849                                       strerror(-ret));
6850                         break;
6851                 case BTRFS_EXTENT_DATA_REF_KEY:
6852                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6853                         add_data_backref(extent_cache, key.objectid, 0,
6854                                         btrfs_extent_data_ref_root(eb, dref),
6855                                         btrfs_extent_data_ref_objectid(eb,
6856                                                                        dref),
6857                                         btrfs_extent_data_ref_offset(eb, dref),
6858                                         btrfs_extent_data_ref_count(eb, dref),
6859                                         0, num_bytes);
6860                         break;
6861                 case BTRFS_SHARED_DATA_REF_KEY:
6862                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6863                         add_data_backref(extent_cache, key.objectid, offset,
6864                                         0, 0, 0,
6865                                         btrfs_shared_data_ref_count(eb, sref),
6866                                         0, num_bytes);
6867                         break;
6868                 default:
6869                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6870                                 key.objectid, key.type, num_bytes);
6871                         goto out;
6872                 }
6873                 ptr += btrfs_extent_inline_ref_size(type);
6874         }
6875         WARN_ON(ptr > end);
6876 out:
6877         return 0;
6878 }
6879
6880 static int check_cache_range(struct btrfs_root *root,
6881                              struct btrfs_block_group_cache *cache,
6882                              u64 offset, u64 bytes)
6883 {
6884         struct btrfs_free_space *entry;
6885         u64 *logical;
6886         u64 bytenr;
6887         int stripe_len;
6888         int i, nr, ret;
6889
6890         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6891                 bytenr = btrfs_sb_offset(i);
6892                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6893                                        cache->key.objectid, bytenr, 0,
6894                                        &logical, &nr, &stripe_len);
6895                 if (ret)
6896                         return ret;
6897
6898                 while (nr--) {
6899                         if (logical[nr] + stripe_len <= offset)
6900                                 continue;
6901                         if (offset + bytes <= logical[nr])
6902                                 continue;
6903                         if (logical[nr] == offset) {
6904                                 if (stripe_len >= bytes) {
6905                                         free(logical);
6906                                         return 0;
6907                                 }
6908                                 bytes -= stripe_len;
6909                                 offset += stripe_len;
6910                         } else if (logical[nr] < offset) {
6911                                 if (logical[nr] + stripe_len >=
6912                                     offset + bytes) {
6913                                         free(logical);
6914                                         return 0;
6915                                 }
6916                                 bytes = (offset + bytes) -
6917                                         (logical[nr] + stripe_len);
6918                                 offset = logical[nr] + stripe_len;
6919                         } else {
6920                                 /*
6921                                  * Could be tricky, the super may land in the
6922                                  * middle of the area we're checking.  First
6923                                  * check the easiest case, it's at the end.
6924                                  */
6925                                 if (logical[nr] + stripe_len >=
6926                                     bytes + offset) {
6927                                         bytes = logical[nr] - offset;
6928                                         continue;
6929                                 }
6930
6931                                 /* Check the left side */
6932                                 ret = check_cache_range(root, cache,
6933                                                         offset,
6934                                                         logical[nr] - offset);
6935                                 if (ret) {
6936                                         free(logical);
6937                                         return ret;
6938                                 }
6939
6940                                 /* Now we continue with the right side */
6941                                 bytes = (offset + bytes) -
6942                                         (logical[nr] + stripe_len);
6943                                 offset = logical[nr] + stripe_len;
6944                         }
6945                 }
6946
6947                 free(logical);
6948         }
6949
6950         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6951         if (!entry) {
6952                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6953                         offset, offset+bytes);
6954                 return -EINVAL;
6955         }
6956
6957         if (entry->offset != offset) {
6958                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6959                         entry->offset);
6960                 return -EINVAL;
6961         }
6962
6963         if (entry->bytes != bytes) {
6964                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6965                         bytes, entry->bytes, offset);
6966                 return -EINVAL;
6967         }
6968
6969         unlink_free_space(cache->free_space_ctl, entry);
6970         free(entry);
6971         return 0;
6972 }
6973
6974 static int verify_space_cache(struct btrfs_root *root,
6975                               struct btrfs_block_group_cache *cache)
6976 {
6977         struct btrfs_path path;
6978         struct extent_buffer *leaf;
6979         struct btrfs_key key;
6980         u64 last;
6981         int ret = 0;
6982
6983         root = root->fs_info->extent_root;
6984
6985         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6986
6987         btrfs_init_path(&path);
6988         key.objectid = last;
6989         key.offset = 0;
6990         key.type = BTRFS_EXTENT_ITEM_KEY;
6991         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6992         if (ret < 0)
6993                 goto out;
6994         ret = 0;
6995         while (1) {
6996                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6997                         ret = btrfs_next_leaf(root, &path);
6998                         if (ret < 0)
6999                                 goto out;
7000                         if (ret > 0) {
7001                                 ret = 0;
7002                                 break;
7003                         }
7004                 }
7005                 leaf = path.nodes[0];
7006                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7007                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7008                         break;
7009                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7010                     key.type != BTRFS_METADATA_ITEM_KEY) {
7011                         path.slots[0]++;
7012                         continue;
7013                 }
7014
7015                 if (last == key.objectid) {
7016                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7017                                 last = key.objectid + key.offset;
7018                         else
7019                                 last = key.objectid + root->nodesize;
7020                         path.slots[0]++;
7021                         continue;
7022                 }
7023
7024                 ret = check_cache_range(root, cache, last,
7025                                         key.objectid - last);
7026                 if (ret)
7027                         break;
7028                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7029                         last = key.objectid + key.offset;
7030                 else
7031                         last = key.objectid + root->nodesize;
7032                 path.slots[0]++;
7033         }
7034
7035         if (last < cache->key.objectid + cache->key.offset)
7036                 ret = check_cache_range(root, cache, last,
7037                                         cache->key.objectid +
7038                                         cache->key.offset - last);
7039
7040 out:
7041         btrfs_release_path(&path);
7042
7043         if (!ret &&
7044             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7045                 fprintf(stderr, "There are still entries left in the space "
7046                         "cache\n");
7047                 ret = -EINVAL;
7048         }
7049
7050         return ret;
7051 }
7052
7053 static int check_space_cache(struct btrfs_root *root)
7054 {
7055         struct btrfs_block_group_cache *cache;
7056         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7057         int ret;
7058         int error = 0;
7059
7060         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7061             btrfs_super_generation(root->fs_info->super_copy) !=
7062             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7063                 printf("cache and super generation don't match, space cache "
7064                        "will be invalidated\n");
7065                 return 0;
7066         }
7067
7068         if (ctx.progress_enabled) {
7069                 ctx.tp = TASK_FREE_SPACE;
7070                 task_start(ctx.info);
7071         }
7072
7073         while (1) {
7074                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7075                 if (!cache)
7076                         break;
7077
7078                 start = cache->key.objectid + cache->key.offset;
7079                 if (!cache->free_space_ctl) {
7080                         if (btrfs_init_free_space_ctl(cache,
7081                                                       root->sectorsize)) {
7082                                 ret = -ENOMEM;
7083                                 break;
7084                         }
7085                 } else {
7086                         btrfs_remove_free_space_cache(cache);
7087                 }
7088
7089                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7090                         ret = exclude_super_stripes(root, cache);
7091                         if (ret) {
7092                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7093                                         strerror(-ret));
7094                                 error++;
7095                                 continue;
7096                         }
7097                         ret = load_free_space_tree(root->fs_info, cache);
7098                         free_excluded_extents(root, cache);
7099                         if (ret < 0) {
7100                                 fprintf(stderr, "could not load free space tree: %s\n",
7101                                         strerror(-ret));
7102                                 error++;
7103                                 continue;
7104                         }
7105                         error += ret;
7106                 } else {
7107                         ret = load_free_space_cache(root->fs_info, cache);
7108                         if (!ret)
7109                                 continue;
7110                 }
7111
7112                 ret = verify_space_cache(root, cache);
7113                 if (ret) {
7114                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7115                                 cache->key.objectid);
7116                         error++;
7117                 }
7118         }
7119
7120         task_stop(ctx.info);
7121
7122         return error ? -EINVAL : 0;
7123 }
7124
7125 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7126                         u64 num_bytes, unsigned long leaf_offset,
7127                         struct extent_buffer *eb) {
7128
7129         u64 offset = 0;
7130         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7131         char *data;
7132         unsigned long csum_offset;
7133         u32 csum;
7134         u32 csum_expected;
7135         u64 read_len;
7136         u64 data_checked = 0;
7137         u64 tmp;
7138         int ret = 0;
7139         int mirror;
7140         int num_copies;
7141
7142         if (num_bytes % root->sectorsize)
7143                 return -EINVAL;
7144
7145         data = malloc(num_bytes);
7146         if (!data)
7147                 return -ENOMEM;
7148
7149         while (offset < num_bytes) {
7150                 mirror = 0;
7151 again:
7152                 read_len = num_bytes - offset;
7153                 /* read as much space once a time */
7154                 ret = read_extent_data(root, data + offset,
7155                                 bytenr + offset, &read_len, mirror);
7156                 if (ret)
7157                         goto out;
7158                 data_checked = 0;
7159                 /* verify every 4k data's checksum */
7160                 while (data_checked < read_len) {
7161                         csum = ~(u32)0;
7162                         tmp = offset + data_checked;
7163
7164                         csum = btrfs_csum_data((char *)data + tmp,
7165                                                csum, root->sectorsize);
7166                         btrfs_csum_final(csum, (u8 *)&csum);
7167
7168                         csum_offset = leaf_offset +
7169                                  tmp / root->sectorsize * csum_size;
7170                         read_extent_buffer(eb, (char *)&csum_expected,
7171                                            csum_offset, csum_size);
7172                         /* try another mirror */
7173                         if (csum != csum_expected) {
7174                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7175                                                 mirror, bytenr + tmp,
7176                                                 csum, csum_expected);
7177                                 num_copies = btrfs_num_copies(
7178                                                 &root->fs_info->mapping_tree,
7179                                                 bytenr, num_bytes);
7180                                 if (mirror < num_copies - 1) {
7181                                         mirror += 1;
7182                                         goto again;
7183                                 }
7184                         }
7185                         data_checked += root->sectorsize;
7186                 }
7187                 offset += read_len;
7188         }
7189 out:
7190         free(data);
7191         return ret;
7192 }
7193
7194 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7195                                u64 num_bytes)
7196 {
7197         struct btrfs_path path;
7198         struct extent_buffer *leaf;
7199         struct btrfs_key key;
7200         int ret;
7201
7202         btrfs_init_path(&path);
7203         key.objectid = bytenr;
7204         key.type = BTRFS_EXTENT_ITEM_KEY;
7205         key.offset = (u64)-1;
7206
7207 again:
7208         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7209                                 0, 0);
7210         if (ret < 0) {
7211                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7212                 btrfs_release_path(&path);
7213                 return ret;
7214         } else if (ret) {
7215                 if (path.slots[0] > 0) {
7216                         path.slots[0]--;
7217                 } else {
7218                         ret = btrfs_prev_leaf(root, &path);
7219                         if (ret < 0) {
7220                                 goto out;
7221                         } else if (ret > 0) {
7222                                 ret = 0;
7223                                 goto out;
7224                         }
7225                 }
7226         }
7227
7228         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7229
7230         /*
7231          * Block group items come before extent items if they have the same
7232          * bytenr, so walk back one more just in case.  Dear future traveller,
7233          * first congrats on mastering time travel.  Now if it's not too much
7234          * trouble could you go back to 2006 and tell Chris to make the
7235          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7236          * EXTENT_ITEM_KEY please?
7237          */
7238         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7239                 if (path.slots[0] > 0) {
7240                         path.slots[0]--;
7241                 } else {
7242                         ret = btrfs_prev_leaf(root, &path);
7243                         if (ret < 0) {
7244                                 goto out;
7245                         } else if (ret > 0) {
7246                                 ret = 0;
7247                                 goto out;
7248                         }
7249                 }
7250                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7251         }
7252
7253         while (num_bytes) {
7254                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7255                         ret = btrfs_next_leaf(root, &path);
7256                         if (ret < 0) {
7257                                 fprintf(stderr, "Error going to next leaf "
7258                                         "%d\n", ret);
7259                                 btrfs_release_path(&path);
7260                                 return ret;
7261                         } else if (ret) {
7262                                 break;
7263                         }
7264                 }
7265                 leaf = path.nodes[0];
7266                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7267                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7268                         path.slots[0]++;
7269                         continue;
7270                 }
7271                 if (key.objectid + key.offset < bytenr) {
7272                         path.slots[0]++;
7273                         continue;
7274                 }
7275                 if (key.objectid > bytenr + num_bytes)
7276                         break;
7277
7278                 if (key.objectid == bytenr) {
7279                         if (key.offset >= num_bytes) {
7280                                 num_bytes = 0;
7281                                 break;
7282                         }
7283                         num_bytes -= key.offset;
7284                         bytenr += key.offset;
7285                 } else if (key.objectid < bytenr) {
7286                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7287                                 num_bytes = 0;
7288                                 break;
7289                         }
7290                         num_bytes = (bytenr + num_bytes) -
7291                                 (key.objectid + key.offset);
7292                         bytenr = key.objectid + key.offset;
7293                 } else {
7294                         if (key.objectid + key.offset < bytenr + num_bytes) {
7295                                 u64 new_start = key.objectid + key.offset;
7296                                 u64 new_bytes = bytenr + num_bytes - new_start;
7297
7298                                 /*
7299                                  * Weird case, the extent is in the middle of
7300                                  * our range, we'll have to search one side
7301                                  * and then the other.  Not sure if this happens
7302                                  * in real life, but no harm in coding it up
7303                                  * anyway just in case.
7304                                  */
7305                                 btrfs_release_path(&path);
7306                                 ret = check_extent_exists(root, new_start,
7307                                                           new_bytes);
7308                                 if (ret) {
7309                                         fprintf(stderr, "Right section didn't "
7310                                                 "have a record\n");
7311                                         break;
7312                                 }
7313                                 num_bytes = key.objectid - bytenr;
7314                                 goto again;
7315                         }
7316                         num_bytes = key.objectid - bytenr;
7317                 }
7318                 path.slots[0]++;
7319         }
7320         ret = 0;
7321
7322 out:
7323         if (num_bytes && !ret) {
7324                 fprintf(stderr, "There are no extents for csum range "
7325                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7326                 ret = 1;
7327         }
7328
7329         btrfs_release_path(&path);
7330         return ret;
7331 }
7332
7333 static int check_csums(struct btrfs_root *root)
7334 {
7335         struct btrfs_path path;
7336         struct extent_buffer *leaf;
7337         struct btrfs_key key;
7338         u64 offset = 0, num_bytes = 0;
7339         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7340         int errors = 0;
7341         int ret;
7342         u64 data_len;
7343         unsigned long leaf_offset;
7344
7345         root = root->fs_info->csum_root;
7346         if (!extent_buffer_uptodate(root->node)) {
7347                 fprintf(stderr, "No valid csum tree found\n");
7348                 return -ENOENT;
7349         }
7350
7351         btrfs_init_path(&path);
7352         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7353         key.type = BTRFS_EXTENT_CSUM_KEY;
7354         key.offset = 0;
7355         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7356         if (ret < 0) {
7357                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7358                 btrfs_release_path(&path);
7359                 return ret;
7360         }
7361
7362         if (ret > 0 && path.slots[0])
7363                 path.slots[0]--;
7364         ret = 0;
7365
7366         while (1) {
7367                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7368                         ret = btrfs_next_leaf(root, &path);
7369                         if (ret < 0) {
7370                                 fprintf(stderr, "Error going to next leaf "
7371                                         "%d\n", ret);
7372                                 break;
7373                         }
7374                         if (ret)
7375                                 break;
7376                 }
7377                 leaf = path.nodes[0];
7378
7379                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7380                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7381                         path.slots[0]++;
7382                         continue;
7383                 }
7384
7385                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7386                               csum_size) * root->sectorsize;
7387                 if (!check_data_csum)
7388                         goto skip_csum_check;
7389                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7390                 ret = check_extent_csums(root, key.offset, data_len,
7391                                          leaf_offset, leaf);
7392                 if (ret)
7393                         break;
7394 skip_csum_check:
7395                 if (!num_bytes) {
7396                         offset = key.offset;
7397                 } else if (key.offset != offset + num_bytes) {
7398                         ret = check_extent_exists(root, offset, num_bytes);
7399                         if (ret) {
7400                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7401                                         "there is no extent record\n",
7402                                         offset, offset+num_bytes);
7403                                 errors++;
7404                         }
7405                         offset = key.offset;
7406                         num_bytes = 0;
7407                 }
7408                 num_bytes += data_len;
7409                 path.slots[0]++;
7410         }
7411
7412         btrfs_release_path(&path);
7413         return errors;
7414 }
7415
7416 static int is_dropped_key(struct btrfs_key *key,
7417                           struct btrfs_key *drop_key) {
7418         if (key->objectid < drop_key->objectid)
7419                 return 1;
7420         else if (key->objectid == drop_key->objectid) {
7421                 if (key->type < drop_key->type)
7422                         return 1;
7423                 else if (key->type == drop_key->type) {
7424                         if (key->offset < drop_key->offset)
7425                                 return 1;
7426                 }
7427         }
7428         return 0;
7429 }
7430
7431 /*
7432  * Here are the rules for FULL_BACKREF.
7433  *
7434  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7435  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7436  *      FULL_BACKREF set.
7437  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7438  *    if it happened after the relocation occurred since we'll have dropped the
7439  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7440  *    have no real way to know for sure.
7441  *
7442  * We process the blocks one root at a time, and we start from the lowest root
7443  * objectid and go to the highest.  So we can just lookup the owner backref for
7444  * the record and if we don't find it then we know it doesn't exist and we have
7445  * a FULL BACKREF.
7446  *
7447  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7448  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7449  * be set or not and then we can check later once we've gathered all the refs.
7450  */
7451 static int calc_extent_flag(struct cache_tree *extent_cache,
7452                            struct extent_buffer *buf,
7453                            struct root_item_record *ri,
7454                            u64 *flags)
7455 {
7456         struct extent_record *rec;
7457         struct cache_extent *cache;
7458         struct tree_backref *tback;
7459         u64 owner = 0;
7460
7461         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7462         /* we have added this extent before */
7463         if (!cache)
7464                 return -ENOENT;
7465
7466         rec = container_of(cache, struct extent_record, cache);
7467
7468         /*
7469          * Except file/reloc tree, we can not have
7470          * FULL BACKREF MODE
7471          */
7472         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7473                 goto normal;
7474         /*
7475          * root node
7476          */
7477         if (buf->start == ri->bytenr)
7478                 goto normal;
7479
7480         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7481                 goto full_backref;
7482
7483         owner = btrfs_header_owner(buf);
7484         if (owner == ri->objectid)
7485                 goto normal;
7486
7487         tback = find_tree_backref(rec, 0, owner);
7488         if (!tback)
7489                 goto full_backref;
7490 normal:
7491         *flags = 0;
7492         if (rec->flag_block_full_backref != FLAG_UNSET &&
7493             rec->flag_block_full_backref != 0)
7494                 rec->bad_full_backref = 1;
7495         return 0;
7496 full_backref:
7497         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7498         if (rec->flag_block_full_backref != FLAG_UNSET &&
7499             rec->flag_block_full_backref != 1)
7500                 rec->bad_full_backref = 1;
7501         return 0;
7502 }
7503
7504 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7505 {
7506         fprintf(stderr, "Invalid key type(");
7507         print_key_type(stderr, 0, key_type);
7508         fprintf(stderr, ") found in root(");
7509         print_objectid(stderr, rootid, 0);
7510         fprintf(stderr, ")\n");
7511 }
7512
7513 /*
7514  * Check if the key is valid with its extent buffer.
7515  *
7516  * This is a early check in case invalid key exists in a extent buffer
7517  * This is not comprehensive yet, but should prevent wrong key/item passed
7518  * further
7519  */
7520 static int check_type_with_root(u64 rootid, u8 key_type)
7521 {
7522         switch (key_type) {
7523         /* Only valid in chunk tree */
7524         case BTRFS_DEV_ITEM_KEY:
7525         case BTRFS_CHUNK_ITEM_KEY:
7526                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7527                         goto err;
7528                 break;
7529         /* valid in csum and log tree */
7530         case BTRFS_CSUM_TREE_OBJECTID:
7531                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7532                       is_fstree(rootid)))
7533                         goto err;
7534                 break;
7535         case BTRFS_EXTENT_ITEM_KEY:
7536         case BTRFS_METADATA_ITEM_KEY:
7537         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7538                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7539                         goto err;
7540                 break;
7541         case BTRFS_ROOT_ITEM_KEY:
7542                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7543                         goto err;
7544                 break;
7545         case BTRFS_DEV_EXTENT_KEY:
7546                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7547                         goto err;
7548                 break;
7549         }
7550         return 0;
7551 err:
7552         report_mismatch_key_root(key_type, rootid);
7553         return -EINVAL;
7554 }
7555
7556 static int run_next_block(struct btrfs_root *root,
7557                           struct block_info *bits,
7558                           int bits_nr,
7559                           u64 *last,
7560                           struct cache_tree *pending,
7561                           struct cache_tree *seen,
7562                           struct cache_tree *reada,
7563                           struct cache_tree *nodes,
7564                           struct cache_tree *extent_cache,
7565                           struct cache_tree *chunk_cache,
7566                           struct rb_root *dev_cache,
7567                           struct block_group_tree *block_group_cache,
7568                           struct device_extent_tree *dev_extent_cache,
7569                           struct root_item_record *ri)
7570 {
7571         struct extent_buffer *buf;
7572         struct extent_record *rec = NULL;
7573         u64 bytenr;
7574         u32 size;
7575         u64 parent;
7576         u64 owner;
7577         u64 flags;
7578         u64 ptr;
7579         u64 gen = 0;
7580         int ret = 0;
7581         int i;
7582         int nritems;
7583         struct btrfs_key key;
7584         struct cache_extent *cache;
7585         int reada_bits;
7586
7587         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7588                                     bits_nr, &reada_bits);
7589         if (nritems == 0)
7590                 return 1;
7591
7592         if (!reada_bits) {
7593                 for(i = 0; i < nritems; i++) {
7594                         ret = add_cache_extent(reada, bits[i].start,
7595                                                bits[i].size);
7596                         if (ret == -EEXIST)
7597                                 continue;
7598
7599                         /* fixme, get the parent transid */
7600                         readahead_tree_block(root, bits[i].start,
7601                                              bits[i].size, 0);
7602                 }
7603         }
7604         *last = bits[0].start;
7605         bytenr = bits[0].start;
7606         size = bits[0].size;
7607
7608         cache = lookup_cache_extent(pending, bytenr, size);
7609         if (cache) {
7610                 remove_cache_extent(pending, cache);
7611                 free(cache);
7612         }
7613         cache = lookup_cache_extent(reada, bytenr, size);
7614         if (cache) {
7615                 remove_cache_extent(reada, cache);
7616                 free(cache);
7617         }
7618         cache = lookup_cache_extent(nodes, bytenr, size);
7619         if (cache) {
7620                 remove_cache_extent(nodes, cache);
7621                 free(cache);
7622         }
7623         cache = lookup_cache_extent(extent_cache, bytenr, size);
7624         if (cache) {
7625                 rec = container_of(cache, struct extent_record, cache);
7626                 gen = rec->parent_generation;
7627         }
7628
7629         /* fixme, get the real parent transid */
7630         buf = read_tree_block(root, bytenr, size, gen);
7631         if (!extent_buffer_uptodate(buf)) {
7632                 record_bad_block_io(root->fs_info,
7633                                     extent_cache, bytenr, size);
7634                 goto out;
7635         }
7636
7637         nritems = btrfs_header_nritems(buf);
7638
7639         flags = 0;
7640         if (!init_extent_tree) {
7641                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7642                                        btrfs_header_level(buf), 1, NULL,
7643                                        &flags);
7644                 if (ret < 0) {
7645                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7646                         if (ret < 0) {
7647                                 fprintf(stderr, "Couldn't calc extent flags\n");
7648                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7649                         }
7650                 }
7651         } else {
7652                 flags = 0;
7653                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7654                 if (ret < 0) {
7655                         fprintf(stderr, "Couldn't calc extent flags\n");
7656                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7657                 }
7658         }
7659
7660         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7661                 if (ri != NULL &&
7662                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7663                     ri->objectid == btrfs_header_owner(buf)) {
7664                         /*
7665                          * Ok we got to this block from it's original owner and
7666                          * we have FULL_BACKREF set.  Relocation can leave
7667                          * converted blocks over so this is altogether possible,
7668                          * however it's not possible if the generation > the
7669                          * last snapshot, so check for this case.
7670                          */
7671                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7672                             btrfs_header_generation(buf) > ri->last_snapshot) {
7673                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7674                                 rec->bad_full_backref = 1;
7675                         }
7676                 }
7677         } else {
7678                 if (ri != NULL &&
7679                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7680                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7681                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7682                         rec->bad_full_backref = 1;
7683                 }
7684         }
7685
7686         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7687                 rec->flag_block_full_backref = 1;
7688                 parent = bytenr;
7689                 owner = 0;
7690         } else {
7691                 rec->flag_block_full_backref = 0;
7692                 parent = 0;
7693                 owner = btrfs_header_owner(buf);
7694         }
7695
7696         ret = check_block(root, extent_cache, buf, flags);
7697         if (ret)
7698                 goto out;
7699
7700         if (btrfs_is_leaf(buf)) {
7701                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7702                 for (i = 0; i < nritems; i++) {
7703                         struct btrfs_file_extent_item *fi;
7704                         btrfs_item_key_to_cpu(buf, &key, i);
7705                         /*
7706                          * Check key type against the leaf owner.
7707                          * Could filter quite a lot of early error if
7708                          * owner is correct
7709                          */
7710                         if (check_type_with_root(btrfs_header_owner(buf),
7711                                                  key.type)) {
7712                                 fprintf(stderr, "ignoring invalid key\n");
7713                                 continue;
7714                         }
7715                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7716                                 process_extent_item(root, extent_cache, buf,
7717                                                     i);
7718                                 continue;
7719                         }
7720                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7721                                 process_extent_item(root, extent_cache, buf,
7722                                                     i);
7723                                 continue;
7724                         }
7725                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7726                                 total_csum_bytes +=
7727                                         btrfs_item_size_nr(buf, i);
7728                                 continue;
7729                         }
7730                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7731                                 process_chunk_item(chunk_cache, &key, buf, i);
7732                                 continue;
7733                         }
7734                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7735                                 process_device_item(dev_cache, &key, buf, i);
7736                                 continue;
7737                         }
7738                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7739                                 process_block_group_item(block_group_cache,
7740                                         &key, buf, i);
7741                                 continue;
7742                         }
7743                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7744                                 process_device_extent_item(dev_extent_cache,
7745                                         &key, buf, i);
7746                                 continue;
7747
7748                         }
7749                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7750 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7751                                 process_extent_ref_v0(extent_cache, buf, i);
7752 #else
7753                                 BUG();
7754 #endif
7755                                 continue;
7756                         }
7757
7758                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7759                                 ret = add_tree_backref(extent_cache,
7760                                                 key.objectid, 0, key.offset, 0);
7761                                 if (ret < 0)
7762                                         error(
7763                                 "add_tree_backref failed (leaf tree block): %s",
7764                                               strerror(-ret));
7765                                 continue;
7766                         }
7767                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7768                                 ret = add_tree_backref(extent_cache,
7769                                                 key.objectid, key.offset, 0, 0);
7770                                 if (ret < 0)
7771                                         error(
7772                                 "add_tree_backref failed (leaf shared block): %s",
7773                                               strerror(-ret));
7774                                 continue;
7775                         }
7776                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7777                                 struct btrfs_extent_data_ref *ref;
7778                                 ref = btrfs_item_ptr(buf, i,
7779                                                 struct btrfs_extent_data_ref);
7780                                 add_data_backref(extent_cache,
7781                                         key.objectid, 0,
7782                                         btrfs_extent_data_ref_root(buf, ref),
7783                                         btrfs_extent_data_ref_objectid(buf,
7784                                                                        ref),
7785                                         btrfs_extent_data_ref_offset(buf, ref),
7786                                         btrfs_extent_data_ref_count(buf, ref),
7787                                         0, root->sectorsize);
7788                                 continue;
7789                         }
7790                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7791                                 struct btrfs_shared_data_ref *ref;
7792                                 ref = btrfs_item_ptr(buf, i,
7793                                                 struct btrfs_shared_data_ref);
7794                                 add_data_backref(extent_cache,
7795                                         key.objectid, key.offset, 0, 0, 0,
7796                                         btrfs_shared_data_ref_count(buf, ref),
7797                                         0, root->sectorsize);
7798                                 continue;
7799                         }
7800                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7801                                 struct bad_item *bad;
7802
7803                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7804                                         continue;
7805                                 if (!owner)
7806                                         continue;
7807                                 bad = malloc(sizeof(struct bad_item));
7808                                 if (!bad)
7809                                         continue;
7810                                 INIT_LIST_HEAD(&bad->list);
7811                                 memcpy(&bad->key, &key,
7812                                        sizeof(struct btrfs_key));
7813                                 bad->root_id = owner;
7814                                 list_add_tail(&bad->list, &delete_items);
7815                                 continue;
7816                         }
7817                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7818                                 continue;
7819                         fi = btrfs_item_ptr(buf, i,
7820                                             struct btrfs_file_extent_item);
7821                         if (btrfs_file_extent_type(buf, fi) ==
7822                             BTRFS_FILE_EXTENT_INLINE)
7823                                 continue;
7824                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7825                                 continue;
7826
7827                         data_bytes_allocated +=
7828                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7829                         if (data_bytes_allocated < root->sectorsize) {
7830                                 abort();
7831                         }
7832                         data_bytes_referenced +=
7833                                 btrfs_file_extent_num_bytes(buf, fi);
7834                         add_data_backref(extent_cache,
7835                                 btrfs_file_extent_disk_bytenr(buf, fi),
7836                                 parent, owner, key.objectid, key.offset -
7837                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7838                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7839                 }
7840         } else {
7841                 int level;
7842                 struct btrfs_key first_key;
7843
7844                 first_key.objectid = 0;
7845
7846                 if (nritems > 0)
7847                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7848                 level = btrfs_header_level(buf);
7849                 for (i = 0; i < nritems; i++) {
7850                         struct extent_record tmpl;
7851
7852                         ptr = btrfs_node_blockptr(buf, i);
7853                         size = root->nodesize;
7854                         btrfs_node_key_to_cpu(buf, &key, i);
7855                         if (ri != NULL) {
7856                                 if ((level == ri->drop_level)
7857                                     && is_dropped_key(&key, &ri->drop_key)) {
7858                                         continue;
7859                                 }
7860                         }
7861
7862                         memset(&tmpl, 0, sizeof(tmpl));
7863                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7864                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7865                         tmpl.start = ptr;
7866                         tmpl.nr = size;
7867                         tmpl.refs = 1;
7868                         tmpl.metadata = 1;
7869                         tmpl.max_size = size;
7870                         ret = add_extent_rec(extent_cache, &tmpl);
7871                         if (ret < 0)
7872                                 goto out;
7873
7874                         ret = add_tree_backref(extent_cache, ptr, parent,
7875                                         owner, 1);
7876                         if (ret < 0) {
7877                                 error(
7878                                 "add_tree_backref failed (non-leaf block): %s",
7879                                       strerror(-ret));
7880                                 continue;
7881                         }
7882
7883                         if (level > 1) {
7884                                 add_pending(nodes, seen, ptr, size);
7885                         } else {
7886                                 add_pending(pending, seen, ptr, size);
7887                         }
7888                 }
7889                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7890                                       nritems) * sizeof(struct btrfs_key_ptr);
7891         }
7892         total_btree_bytes += buf->len;
7893         if (fs_root_objectid(btrfs_header_owner(buf)))
7894                 total_fs_tree_bytes += buf->len;
7895         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7896                 total_extent_tree_bytes += buf->len;
7897         if (!found_old_backref &&
7898             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7899             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7900             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7901                 found_old_backref = 1;
7902 out:
7903         free_extent_buffer(buf);
7904         return ret;
7905 }
7906
7907 static int add_root_to_pending(struct extent_buffer *buf,
7908                                struct cache_tree *extent_cache,
7909                                struct cache_tree *pending,
7910                                struct cache_tree *seen,
7911                                struct cache_tree *nodes,
7912                                u64 objectid)
7913 {
7914         struct extent_record tmpl;
7915         int ret;
7916
7917         if (btrfs_header_level(buf) > 0)
7918                 add_pending(nodes, seen, buf->start, buf->len);
7919         else
7920                 add_pending(pending, seen, buf->start, buf->len);
7921
7922         memset(&tmpl, 0, sizeof(tmpl));
7923         tmpl.start = buf->start;
7924         tmpl.nr = buf->len;
7925         tmpl.is_root = 1;
7926         tmpl.refs = 1;
7927         tmpl.metadata = 1;
7928         tmpl.max_size = buf->len;
7929         add_extent_rec(extent_cache, &tmpl);
7930
7931         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7932             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7933                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7934                                 0, 1);
7935         else
7936                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7937                                 1);
7938         return ret;
7939 }
7940
7941 /* as we fix the tree, we might be deleting blocks that
7942  * we're tracking for repair.  This hook makes sure we
7943  * remove any backrefs for blocks as we are fixing them.
7944  */
7945 static int free_extent_hook(struct btrfs_trans_handle *trans,
7946                             struct btrfs_root *root,
7947                             u64 bytenr, u64 num_bytes, u64 parent,
7948                             u64 root_objectid, u64 owner, u64 offset,
7949                             int refs_to_drop)
7950 {
7951         struct extent_record *rec;
7952         struct cache_extent *cache;
7953         int is_data;
7954         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7955
7956         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7957         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7958         if (!cache)
7959                 return 0;
7960
7961         rec = container_of(cache, struct extent_record, cache);
7962         if (is_data) {
7963                 struct data_backref *back;
7964                 back = find_data_backref(rec, parent, root_objectid, owner,
7965                                          offset, 1, bytenr, num_bytes);
7966                 if (!back)
7967                         goto out;
7968                 if (back->node.found_ref) {
7969                         back->found_ref -= refs_to_drop;
7970                         if (rec->refs)
7971                                 rec->refs -= refs_to_drop;
7972                 }
7973                 if (back->node.found_extent_tree) {
7974                         back->num_refs -= refs_to_drop;
7975                         if (rec->extent_item_refs)
7976                                 rec->extent_item_refs -= refs_to_drop;
7977                 }
7978                 if (back->found_ref == 0)
7979                         back->node.found_ref = 0;
7980                 if (back->num_refs == 0)
7981                         back->node.found_extent_tree = 0;
7982
7983                 if (!back->node.found_extent_tree && back->node.found_ref) {
7984                         list_del(&back->node.list);
7985                         free(back);
7986                 }
7987         } else {
7988                 struct tree_backref *back;
7989                 back = find_tree_backref(rec, parent, root_objectid);
7990                 if (!back)
7991                         goto out;
7992                 if (back->node.found_ref) {
7993                         if (rec->refs)
7994                                 rec->refs--;
7995                         back->node.found_ref = 0;
7996                 }
7997                 if (back->node.found_extent_tree) {
7998                         if (rec->extent_item_refs)
7999                                 rec->extent_item_refs--;
8000                         back->node.found_extent_tree = 0;
8001                 }
8002                 if (!back->node.found_extent_tree && back->node.found_ref) {
8003                         list_del(&back->node.list);
8004                         free(back);
8005                 }
8006         }
8007         maybe_free_extent_rec(extent_cache, rec);
8008 out:
8009         return 0;
8010 }
8011
8012 static int delete_extent_records(struct btrfs_trans_handle *trans,
8013                                  struct btrfs_root *root,
8014                                  struct btrfs_path *path,
8015                                  u64 bytenr)
8016 {
8017         struct btrfs_key key;
8018         struct btrfs_key found_key;
8019         struct extent_buffer *leaf;
8020         int ret;
8021         int slot;
8022
8023
8024         key.objectid = bytenr;
8025         key.type = (u8)-1;
8026         key.offset = (u64)-1;
8027
8028         while(1) {
8029                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8030                                         &key, path, 0, 1);
8031                 if (ret < 0)
8032                         break;
8033
8034                 if (ret > 0) {
8035                         ret = 0;
8036                         if (path->slots[0] == 0)
8037                                 break;
8038                         path->slots[0]--;
8039                 }
8040                 ret = 0;
8041
8042                 leaf = path->nodes[0];
8043                 slot = path->slots[0];
8044
8045                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8046                 if (found_key.objectid != bytenr)
8047                         break;
8048
8049                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8050                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8051                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8052                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8053                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8054                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8055                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8056                         btrfs_release_path(path);
8057                         if (found_key.type == 0) {
8058                                 if (found_key.offset == 0)
8059                                         break;
8060                                 key.offset = found_key.offset - 1;
8061                                 key.type = found_key.type;
8062                         }
8063                         key.type = found_key.type - 1;
8064                         key.offset = (u64)-1;
8065                         continue;
8066                 }
8067
8068                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8069                         found_key.objectid, found_key.type, found_key.offset);
8070
8071                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8072                 if (ret)
8073                         break;
8074                 btrfs_release_path(path);
8075
8076                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8077                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8078                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8079                                 found_key.offset : root->nodesize;
8080
8081                         ret = btrfs_update_block_group(trans, root, bytenr,
8082                                                        bytes, 0, 0);
8083                         if (ret)
8084                                 break;
8085                 }
8086         }
8087
8088         btrfs_release_path(path);
8089         return ret;
8090 }
8091
8092 /*
8093  * for a single backref, this will allocate a new extent
8094  * and add the backref to it.
8095  */
8096 static int record_extent(struct btrfs_trans_handle *trans,
8097                          struct btrfs_fs_info *info,
8098                          struct btrfs_path *path,
8099                          struct extent_record *rec,
8100                          struct extent_backref *back,
8101                          int allocated, u64 flags)
8102 {
8103         int ret = 0;
8104         struct btrfs_root *extent_root = info->extent_root;
8105         struct extent_buffer *leaf;
8106         struct btrfs_key ins_key;
8107         struct btrfs_extent_item *ei;
8108         struct data_backref *dback;
8109         struct btrfs_tree_block_info *bi;
8110
8111         if (!back->is_data)
8112                 rec->max_size = max_t(u64, rec->max_size,
8113                                     info->extent_root->nodesize);
8114
8115         if (!allocated) {
8116                 u32 item_size = sizeof(*ei);
8117
8118                 if (!back->is_data)
8119                         item_size += sizeof(*bi);
8120
8121                 ins_key.objectid = rec->start;
8122                 ins_key.offset = rec->max_size;
8123                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8124
8125                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8126                                         &ins_key, item_size);
8127                 if (ret)
8128                         goto fail;
8129
8130                 leaf = path->nodes[0];
8131                 ei = btrfs_item_ptr(leaf, path->slots[0],
8132                                     struct btrfs_extent_item);
8133
8134                 btrfs_set_extent_refs(leaf, ei, 0);
8135                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8136
8137                 if (back->is_data) {
8138                         btrfs_set_extent_flags(leaf, ei,
8139                                                BTRFS_EXTENT_FLAG_DATA);
8140                 } else {
8141                         struct btrfs_disk_key copy_key;;
8142
8143                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8144                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8145                                              sizeof(*bi));
8146
8147                         btrfs_set_disk_key_objectid(&copy_key,
8148                                                     rec->info_objectid);
8149                         btrfs_set_disk_key_type(&copy_key, 0);
8150                         btrfs_set_disk_key_offset(&copy_key, 0);
8151
8152                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8153                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8154
8155                         btrfs_set_extent_flags(leaf, ei,
8156                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8157                 }
8158
8159                 btrfs_mark_buffer_dirty(leaf);
8160                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8161                                                rec->max_size, 1, 0);
8162                 if (ret)
8163                         goto fail;
8164                 btrfs_release_path(path);
8165         }
8166
8167         if (back->is_data) {
8168                 u64 parent;
8169                 int i;
8170
8171                 dback = to_data_backref(back);
8172                 if (back->full_backref)
8173                         parent = dback->parent;
8174                 else
8175                         parent = 0;
8176
8177                 for (i = 0; i < dback->found_ref; i++) {
8178                         /* if parent != 0, we're doing a full backref
8179                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8180                          * just makes the backref allocator create a data
8181                          * backref
8182                          */
8183                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8184                                                    rec->start, rec->max_size,
8185                                                    parent,
8186                                                    dback->root,
8187                                                    parent ?
8188                                                    BTRFS_FIRST_FREE_OBJECTID :
8189                                                    dback->owner,
8190                                                    dback->offset);
8191                         if (ret)
8192                                 break;
8193                 }
8194                 fprintf(stderr, "adding new data backref"
8195                                 " on %llu %s %llu owner %llu"
8196                                 " offset %llu found %d\n",
8197                                 (unsigned long long)rec->start,
8198                                 back->full_backref ?
8199                                 "parent" : "root",
8200                                 back->full_backref ?
8201                                 (unsigned long long)parent :
8202                                 (unsigned long long)dback->root,
8203                                 (unsigned long long)dback->owner,
8204                                 (unsigned long long)dback->offset,
8205                                 dback->found_ref);
8206         } else {
8207                 u64 parent;
8208                 struct tree_backref *tback;
8209
8210                 tback = to_tree_backref(back);
8211                 if (back->full_backref)
8212                         parent = tback->parent;
8213                 else
8214                         parent = 0;
8215
8216                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8217                                            rec->start, rec->max_size,
8218                                            parent, tback->root, 0, 0);
8219                 fprintf(stderr, "adding new tree backref on "
8220                         "start %llu len %llu parent %llu root %llu\n",
8221                         rec->start, rec->max_size, parent, tback->root);
8222         }
8223 fail:
8224         btrfs_release_path(path);
8225         return ret;
8226 }
8227
8228 static struct extent_entry *find_entry(struct list_head *entries,
8229                                        u64 bytenr, u64 bytes)
8230 {
8231         struct extent_entry *entry = NULL;
8232
8233         list_for_each_entry(entry, entries, list) {
8234                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8235                         return entry;
8236         }
8237
8238         return NULL;
8239 }
8240
8241 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8242 {
8243         struct extent_entry *entry, *best = NULL, *prev = NULL;
8244
8245         list_for_each_entry(entry, entries, list) {
8246                 /*
8247                  * If there are as many broken entries as entries then we know
8248                  * not to trust this particular entry.
8249                  */
8250                 if (entry->broken == entry->count)
8251                         continue;
8252
8253                 /*
8254                  * Special case, when there are only two entries and 'best' is
8255                  * the first one
8256                  */
8257                 if (!prev) {
8258                         best = entry;
8259                         prev = entry;
8260                         continue;
8261                 }
8262
8263                 /*
8264                  * If our current entry == best then we can't be sure our best
8265                  * is really the best, so we need to keep searching.
8266                  */
8267                 if (best && best->count == entry->count) {
8268                         prev = entry;
8269                         best = NULL;
8270                         continue;
8271                 }
8272
8273                 /* Prev == entry, not good enough, have to keep searching */
8274                 if (!prev->broken && prev->count == entry->count)
8275                         continue;
8276
8277                 if (!best)
8278                         best = (prev->count > entry->count) ? prev : entry;
8279                 else if (best->count < entry->count)
8280                         best = entry;
8281                 prev = entry;
8282         }
8283
8284         return best;
8285 }
8286
8287 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8288                       struct data_backref *dback, struct extent_entry *entry)
8289 {
8290         struct btrfs_trans_handle *trans;
8291         struct btrfs_root *root;
8292         struct btrfs_file_extent_item *fi;
8293         struct extent_buffer *leaf;
8294         struct btrfs_key key;
8295         u64 bytenr, bytes;
8296         int ret, err;
8297
8298         key.objectid = dback->root;
8299         key.type = BTRFS_ROOT_ITEM_KEY;
8300         key.offset = (u64)-1;
8301         root = btrfs_read_fs_root(info, &key);
8302         if (IS_ERR(root)) {
8303                 fprintf(stderr, "Couldn't find root for our ref\n");
8304                 return -EINVAL;
8305         }
8306
8307         /*
8308          * The backref points to the original offset of the extent if it was
8309          * split, so we need to search down to the offset we have and then walk
8310          * forward until we find the backref we're looking for.
8311          */
8312         key.objectid = dback->owner;
8313         key.type = BTRFS_EXTENT_DATA_KEY;
8314         key.offset = dback->offset;
8315         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8316         if (ret < 0) {
8317                 fprintf(stderr, "Error looking up ref %d\n", ret);
8318                 return ret;
8319         }
8320
8321         while (1) {
8322                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8323                         ret = btrfs_next_leaf(root, path);
8324                         if (ret) {
8325                                 fprintf(stderr, "Couldn't find our ref, next\n");
8326                                 return -EINVAL;
8327                         }
8328                 }
8329                 leaf = path->nodes[0];
8330                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8331                 if (key.objectid != dback->owner ||
8332                     key.type != BTRFS_EXTENT_DATA_KEY) {
8333                         fprintf(stderr, "Couldn't find our ref, search\n");
8334                         return -EINVAL;
8335                 }
8336                 fi = btrfs_item_ptr(leaf, path->slots[0],
8337                                     struct btrfs_file_extent_item);
8338                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8339                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8340
8341                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8342                         break;
8343                 path->slots[0]++;
8344         }
8345
8346         btrfs_release_path(path);
8347
8348         trans = btrfs_start_transaction(root, 1);
8349         if (IS_ERR(trans))
8350                 return PTR_ERR(trans);
8351
8352         /*
8353          * Ok we have the key of the file extent we want to fix, now we can cow
8354          * down to the thing and fix it.
8355          */
8356         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8357         if (ret < 0) {
8358                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8359                         key.objectid, key.type, key.offset, ret);
8360                 goto out;
8361         }
8362         if (ret > 0) {
8363                 fprintf(stderr, "Well that's odd, we just found this key "
8364                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8365                         key.offset);
8366                 ret = -EINVAL;
8367                 goto out;
8368         }
8369         leaf = path->nodes[0];
8370         fi = btrfs_item_ptr(leaf, path->slots[0],
8371                             struct btrfs_file_extent_item);
8372
8373         if (btrfs_file_extent_compression(leaf, fi) &&
8374             dback->disk_bytenr != entry->bytenr) {
8375                 fprintf(stderr, "Ref doesn't match the record start and is "
8376                         "compressed, please take a btrfs-image of this file "
8377                         "system and send it to a btrfs developer so they can "
8378                         "complete this functionality for bytenr %Lu\n",
8379                         dback->disk_bytenr);
8380                 ret = -EINVAL;
8381                 goto out;
8382         }
8383
8384         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8385                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8386         } else if (dback->disk_bytenr > entry->bytenr) {
8387                 u64 off_diff, offset;
8388
8389                 off_diff = dback->disk_bytenr - entry->bytenr;
8390                 offset = btrfs_file_extent_offset(leaf, fi);
8391                 if (dback->disk_bytenr + offset +
8392                     btrfs_file_extent_num_bytes(leaf, fi) >
8393                     entry->bytenr + entry->bytes) {
8394                         fprintf(stderr, "Ref is past the entry end, please "
8395                                 "take a btrfs-image of this file system and "
8396                                 "send it to a btrfs developer, ref %Lu\n",
8397                                 dback->disk_bytenr);
8398                         ret = -EINVAL;
8399                         goto out;
8400                 }
8401                 offset += off_diff;
8402                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8403                 btrfs_set_file_extent_offset(leaf, fi, offset);
8404         } else if (dback->disk_bytenr < entry->bytenr) {
8405                 u64 offset;
8406
8407                 offset = btrfs_file_extent_offset(leaf, fi);
8408                 if (dback->disk_bytenr + offset < entry->bytenr) {
8409                         fprintf(stderr, "Ref is before the entry start, please"
8410                                 " take a btrfs-image of this file system and "
8411                                 "send it to a btrfs developer, ref %Lu\n",
8412                                 dback->disk_bytenr);
8413                         ret = -EINVAL;
8414                         goto out;
8415                 }
8416
8417                 offset += dback->disk_bytenr;
8418                 offset -= entry->bytenr;
8419                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8420                 btrfs_set_file_extent_offset(leaf, fi, offset);
8421         }
8422
8423         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8424
8425         /*
8426          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8427          * only do this if we aren't using compression, otherwise it's a
8428          * trickier case.
8429          */
8430         if (!btrfs_file_extent_compression(leaf, fi))
8431                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8432         else
8433                 printf("ram bytes may be wrong?\n");
8434         btrfs_mark_buffer_dirty(leaf);
8435 out:
8436         err = btrfs_commit_transaction(trans, root);
8437         btrfs_release_path(path);
8438         return ret ? ret : err;
8439 }
8440
8441 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8442                            struct extent_record *rec)
8443 {
8444         struct extent_backref *back;
8445         struct data_backref *dback;
8446         struct extent_entry *entry, *best = NULL;
8447         LIST_HEAD(entries);
8448         int nr_entries = 0;
8449         int broken_entries = 0;
8450         int ret = 0;
8451         short mismatch = 0;
8452
8453         /*
8454          * Metadata is easy and the backrefs should always agree on bytenr and
8455          * size, if not we've got bigger issues.
8456          */
8457         if (rec->metadata)
8458                 return 0;
8459
8460         list_for_each_entry(back, &rec->backrefs, list) {
8461                 if (back->full_backref || !back->is_data)
8462                         continue;
8463
8464                 dback = to_data_backref(back);
8465
8466                 /*
8467                  * We only pay attention to backrefs that we found a real
8468                  * backref for.
8469                  */
8470                 if (dback->found_ref == 0)
8471                         continue;
8472
8473                 /*
8474                  * For now we only catch when the bytes don't match, not the
8475                  * bytenr.  We can easily do this at the same time, but I want
8476                  * to have a fs image to test on before we just add repair
8477                  * functionality willy-nilly so we know we won't screw up the
8478                  * repair.
8479                  */
8480
8481                 entry = find_entry(&entries, dback->disk_bytenr,
8482                                    dback->bytes);
8483                 if (!entry) {
8484                         entry = malloc(sizeof(struct extent_entry));
8485                         if (!entry) {
8486                                 ret = -ENOMEM;
8487                                 goto out;
8488                         }
8489                         memset(entry, 0, sizeof(*entry));
8490                         entry->bytenr = dback->disk_bytenr;
8491                         entry->bytes = dback->bytes;
8492                         list_add_tail(&entry->list, &entries);
8493                         nr_entries++;
8494                 }
8495
8496                 /*
8497                  * If we only have on entry we may think the entries agree when
8498                  * in reality they don't so we have to do some extra checking.
8499                  */
8500                 if (dback->disk_bytenr != rec->start ||
8501                     dback->bytes != rec->nr || back->broken)
8502                         mismatch = 1;
8503
8504                 if (back->broken) {
8505                         entry->broken++;
8506                         broken_entries++;
8507                 }
8508
8509                 entry->count++;
8510         }
8511
8512         /* Yay all the backrefs agree, carry on good sir */
8513         if (nr_entries <= 1 && !mismatch)
8514                 goto out;
8515
8516         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8517                 "%Lu\n", rec->start);
8518
8519         /*
8520          * First we want to see if the backrefs can agree amongst themselves who
8521          * is right, so figure out which one of the entries has the highest
8522          * count.
8523          */
8524         best = find_most_right_entry(&entries);
8525
8526         /*
8527          * Ok so we may have an even split between what the backrefs think, so
8528          * this is where we use the extent ref to see what it thinks.
8529          */
8530         if (!best) {
8531                 entry = find_entry(&entries, rec->start, rec->nr);
8532                 if (!entry && (!broken_entries || !rec->found_rec)) {
8533                         fprintf(stderr, "Backrefs don't agree with each other "
8534                                 "and extent record doesn't agree with anybody,"
8535                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8536                                 rec->start, rec->nr);
8537                         ret = -EINVAL;
8538                         goto out;
8539                 } else if (!entry) {
8540                         /*
8541                          * Ok our backrefs were broken, we'll assume this is the
8542                          * correct value and add an entry for this range.
8543                          */
8544                         entry = malloc(sizeof(struct extent_entry));
8545                         if (!entry) {
8546                                 ret = -ENOMEM;
8547                                 goto out;
8548                         }
8549                         memset(entry, 0, sizeof(*entry));
8550                         entry->bytenr = rec->start;
8551                         entry->bytes = rec->nr;
8552                         list_add_tail(&entry->list, &entries);
8553                         nr_entries++;
8554                 }
8555                 entry->count++;
8556                 best = find_most_right_entry(&entries);
8557                 if (!best) {
8558                         fprintf(stderr, "Backrefs and extent record evenly "
8559                                 "split on who is right, this is going to "
8560                                 "require user input to fix bytenr %Lu bytes "
8561                                 "%Lu\n", rec->start, rec->nr);
8562                         ret = -EINVAL;
8563                         goto out;
8564                 }
8565         }
8566
8567         /*
8568          * I don't think this can happen currently as we'll abort() if we catch
8569          * this case higher up, but in case somebody removes that we still can't
8570          * deal with it properly here yet, so just bail out of that's the case.
8571          */
8572         if (best->bytenr != rec->start) {
8573                 fprintf(stderr, "Extent start and backref starts don't match, "
8574                         "please use btrfs-image on this file system and send "
8575                         "it to a btrfs developer so they can make fsck fix "
8576                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8577                         rec->start, rec->nr);
8578                 ret = -EINVAL;
8579                 goto out;
8580         }
8581
8582         /*
8583          * Ok great we all agreed on an extent record, let's go find the real
8584          * references and fix up the ones that don't match.
8585          */
8586         list_for_each_entry(back, &rec->backrefs, list) {
8587                 if (back->full_backref || !back->is_data)
8588                         continue;
8589
8590                 dback = to_data_backref(back);
8591
8592                 /*
8593                  * Still ignoring backrefs that don't have a real ref attached
8594                  * to them.
8595                  */
8596                 if (dback->found_ref == 0)
8597                         continue;
8598
8599                 if (dback->bytes == best->bytes &&
8600                     dback->disk_bytenr == best->bytenr)
8601                         continue;
8602
8603                 ret = repair_ref(info, path, dback, best);
8604                 if (ret)
8605                         goto out;
8606         }
8607
8608         /*
8609          * Ok we messed with the actual refs, which means we need to drop our
8610          * entire cache and go back and rescan.  I know this is a huge pain and
8611          * adds a lot of extra work, but it's the only way to be safe.  Once all
8612          * the backrefs agree we may not need to do anything to the extent
8613          * record itself.
8614          */
8615         ret = -EAGAIN;
8616 out:
8617         while (!list_empty(&entries)) {
8618                 entry = list_entry(entries.next, struct extent_entry, list);
8619                 list_del_init(&entry->list);
8620                 free(entry);
8621         }
8622         return ret;
8623 }
8624
8625 static int process_duplicates(struct cache_tree *extent_cache,
8626                               struct extent_record *rec)
8627 {
8628         struct extent_record *good, *tmp;
8629         struct cache_extent *cache;
8630         int ret;
8631
8632         /*
8633          * If we found a extent record for this extent then return, or if we
8634          * have more than one duplicate we are likely going to need to delete
8635          * something.
8636          */
8637         if (rec->found_rec || rec->num_duplicates > 1)
8638                 return 0;
8639
8640         /* Shouldn't happen but just in case */
8641         BUG_ON(!rec->num_duplicates);
8642
8643         /*
8644          * So this happens if we end up with a backref that doesn't match the
8645          * actual extent entry.  So either the backref is bad or the extent
8646          * entry is bad.  Either way we want to have the extent_record actually
8647          * reflect what we found in the extent_tree, so we need to take the
8648          * duplicate out and use that as the extent_record since the only way we
8649          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8650          */
8651         remove_cache_extent(extent_cache, &rec->cache);
8652
8653         good = to_extent_record(rec->dups.next);
8654         list_del_init(&good->list);
8655         INIT_LIST_HEAD(&good->backrefs);
8656         INIT_LIST_HEAD(&good->dups);
8657         good->cache.start = good->start;
8658         good->cache.size = good->nr;
8659         good->content_checked = 0;
8660         good->owner_ref_checked = 0;
8661         good->num_duplicates = 0;
8662         good->refs = rec->refs;
8663         list_splice_init(&rec->backrefs, &good->backrefs);
8664         while (1) {
8665                 cache = lookup_cache_extent(extent_cache, good->start,
8666                                             good->nr);
8667                 if (!cache)
8668                         break;
8669                 tmp = container_of(cache, struct extent_record, cache);
8670
8671                 /*
8672                  * If we find another overlapping extent and it's found_rec is
8673                  * set then it's a duplicate and we need to try and delete
8674                  * something.
8675                  */
8676                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8677                         if (list_empty(&good->list))
8678                                 list_add_tail(&good->list,
8679                                               &duplicate_extents);
8680                         good->num_duplicates += tmp->num_duplicates + 1;
8681                         list_splice_init(&tmp->dups, &good->dups);
8682                         list_del_init(&tmp->list);
8683                         list_add_tail(&tmp->list, &good->dups);
8684                         remove_cache_extent(extent_cache, &tmp->cache);
8685                         continue;
8686                 }
8687
8688                 /*
8689                  * Ok we have another non extent item backed extent rec, so lets
8690                  * just add it to this extent and carry on like we did above.
8691                  */
8692                 good->refs += tmp->refs;
8693                 list_splice_init(&tmp->backrefs, &good->backrefs);
8694                 remove_cache_extent(extent_cache, &tmp->cache);
8695                 free(tmp);
8696         }
8697         ret = insert_cache_extent(extent_cache, &good->cache);
8698         BUG_ON(ret);
8699         free(rec);
8700         return good->num_duplicates ? 0 : 1;
8701 }
8702
8703 static int delete_duplicate_records(struct btrfs_root *root,
8704                                     struct extent_record *rec)
8705 {
8706         struct btrfs_trans_handle *trans;
8707         LIST_HEAD(delete_list);
8708         struct btrfs_path path;
8709         struct extent_record *tmp, *good, *n;
8710         int nr_del = 0;
8711         int ret = 0, err;
8712         struct btrfs_key key;
8713
8714         btrfs_init_path(&path);
8715
8716         good = rec;
8717         /* Find the record that covers all of the duplicates. */
8718         list_for_each_entry(tmp, &rec->dups, list) {
8719                 if (good->start < tmp->start)
8720                         continue;
8721                 if (good->nr > tmp->nr)
8722                         continue;
8723
8724                 if (tmp->start + tmp->nr < good->start + good->nr) {
8725                         fprintf(stderr, "Ok we have overlapping extents that "
8726                                 "aren't completely covered by each other, this "
8727                                 "is going to require more careful thought.  "
8728                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8729                                 tmp->start, tmp->nr, good->start, good->nr);
8730                         abort();
8731                 }
8732                 good = tmp;
8733         }
8734
8735         if (good != rec)
8736                 list_add_tail(&rec->list, &delete_list);
8737
8738         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8739                 if (tmp == good)
8740                         continue;
8741                 list_move_tail(&tmp->list, &delete_list);
8742         }
8743
8744         root = root->fs_info->extent_root;
8745         trans = btrfs_start_transaction(root, 1);
8746         if (IS_ERR(trans)) {
8747                 ret = PTR_ERR(trans);
8748                 goto out;
8749         }
8750
8751         list_for_each_entry(tmp, &delete_list, list) {
8752                 if (tmp->found_rec == 0)
8753                         continue;
8754                 key.objectid = tmp->start;
8755                 key.type = BTRFS_EXTENT_ITEM_KEY;
8756                 key.offset = tmp->nr;
8757
8758                 /* Shouldn't happen but just in case */
8759                 if (tmp->metadata) {
8760                         fprintf(stderr, "Well this shouldn't happen, extent "
8761                                 "record overlaps but is metadata? "
8762                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8763                         abort();
8764                 }
8765
8766                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8767                 if (ret) {
8768                         if (ret > 0)
8769                                 ret = -EINVAL;
8770                         break;
8771                 }
8772                 ret = btrfs_del_item(trans, root, &path);
8773                 if (ret)
8774                         break;
8775                 btrfs_release_path(&path);
8776                 nr_del++;
8777         }
8778         err = btrfs_commit_transaction(trans, root);
8779         if (err && !ret)
8780                 ret = err;
8781 out:
8782         while (!list_empty(&delete_list)) {
8783                 tmp = to_extent_record(delete_list.next);
8784                 list_del_init(&tmp->list);
8785                 if (tmp == rec)
8786                         continue;
8787                 free(tmp);
8788         }
8789
8790         while (!list_empty(&rec->dups)) {
8791                 tmp = to_extent_record(rec->dups.next);
8792                 list_del_init(&tmp->list);
8793                 free(tmp);
8794         }
8795
8796         btrfs_release_path(&path);
8797
8798         if (!ret && !nr_del)
8799                 rec->num_duplicates = 0;
8800
8801         return ret ? ret : nr_del;
8802 }
8803
8804 static int find_possible_backrefs(struct btrfs_fs_info *info,
8805                                   struct btrfs_path *path,
8806                                   struct cache_tree *extent_cache,
8807                                   struct extent_record *rec)
8808 {
8809         struct btrfs_root *root;
8810         struct extent_backref *back;
8811         struct data_backref *dback;
8812         struct cache_extent *cache;
8813         struct btrfs_file_extent_item *fi;
8814         struct btrfs_key key;
8815         u64 bytenr, bytes;
8816         int ret;
8817
8818         list_for_each_entry(back, &rec->backrefs, list) {
8819                 /* Don't care about full backrefs (poor unloved backrefs) */
8820                 if (back->full_backref || !back->is_data)
8821                         continue;
8822
8823                 dback = to_data_backref(back);
8824
8825                 /* We found this one, we don't need to do a lookup */
8826                 if (dback->found_ref)
8827                         continue;
8828
8829                 key.objectid = dback->root;
8830                 key.type = BTRFS_ROOT_ITEM_KEY;
8831                 key.offset = (u64)-1;
8832
8833                 root = btrfs_read_fs_root(info, &key);
8834
8835                 /* No root, definitely a bad ref, skip */
8836                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8837                         continue;
8838                 /* Other err, exit */
8839                 if (IS_ERR(root))
8840                         return PTR_ERR(root);
8841
8842                 key.objectid = dback->owner;
8843                 key.type = BTRFS_EXTENT_DATA_KEY;
8844                 key.offset = dback->offset;
8845                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8846                 if (ret) {
8847                         btrfs_release_path(path);
8848                         if (ret < 0)
8849                                 return ret;
8850                         /* Didn't find it, we can carry on */
8851                         ret = 0;
8852                         continue;
8853                 }
8854
8855                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8856                                     struct btrfs_file_extent_item);
8857                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8858                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8859                 btrfs_release_path(path);
8860                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8861                 if (cache) {
8862                         struct extent_record *tmp;
8863                         tmp = container_of(cache, struct extent_record, cache);
8864
8865                         /*
8866                          * If we found an extent record for the bytenr for this
8867                          * particular backref then we can't add it to our
8868                          * current extent record.  We only want to add backrefs
8869                          * that don't have a corresponding extent item in the
8870                          * extent tree since they likely belong to this record
8871                          * and we need to fix it if it doesn't match bytenrs.
8872                          */
8873                         if  (tmp->found_rec)
8874                                 continue;
8875                 }
8876
8877                 dback->found_ref += 1;
8878                 dback->disk_bytenr = bytenr;
8879                 dback->bytes = bytes;
8880
8881                 /*
8882                  * Set this so the verify backref code knows not to trust the
8883                  * values in this backref.
8884                  */
8885                 back->broken = 1;
8886         }
8887
8888         return 0;
8889 }
8890
8891 /*
8892  * Record orphan data ref into corresponding root.
8893  *
8894  * Return 0 if the extent item contains data ref and recorded.
8895  * Return 1 if the extent item contains no useful data ref
8896  *   On that case, it may contains only shared_dataref or metadata backref
8897  *   or the file extent exists(this should be handled by the extent bytenr
8898  *   recovery routine)
8899  * Return <0 if something goes wrong.
8900  */
8901 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8902                                       struct extent_record *rec)
8903 {
8904         struct btrfs_key key;
8905         struct btrfs_root *dest_root;
8906         struct extent_backref *back;
8907         struct data_backref *dback;
8908         struct orphan_data_extent *orphan;
8909         struct btrfs_path path;
8910         int recorded_data_ref = 0;
8911         int ret = 0;
8912
8913         if (rec->metadata)
8914                 return 1;
8915         btrfs_init_path(&path);
8916         list_for_each_entry(back, &rec->backrefs, list) {
8917                 if (back->full_backref || !back->is_data ||
8918                     !back->found_extent_tree)
8919                         continue;
8920                 dback = to_data_backref(back);
8921                 if (dback->found_ref)
8922                         continue;
8923                 key.objectid = dback->root;
8924                 key.type = BTRFS_ROOT_ITEM_KEY;
8925                 key.offset = (u64)-1;
8926
8927                 dest_root = btrfs_read_fs_root(fs_info, &key);
8928
8929                 /* For non-exist root we just skip it */
8930                 if (IS_ERR(dest_root) || !dest_root)
8931                         continue;
8932
8933                 key.objectid = dback->owner;
8934                 key.type = BTRFS_EXTENT_DATA_KEY;
8935                 key.offset = dback->offset;
8936
8937                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8938                 btrfs_release_path(&path);
8939                 /*
8940                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8941                  * we need to record it for inode/file extent rebuild.
8942                  * For ret > 0, we record it only for file extent rebuild.
8943                  * For ret == 0, the file extent exists but only bytenr
8944                  * mismatch, let the original bytenr fix routine to handle,
8945                  * don't record it.
8946                  */
8947                 if (ret == 0)
8948                         continue;
8949                 ret = 0;
8950                 orphan = malloc(sizeof(*orphan));
8951                 if (!orphan) {
8952                         ret = -ENOMEM;
8953                         goto out;
8954                 }
8955                 INIT_LIST_HEAD(&orphan->list);
8956                 orphan->root = dback->root;
8957                 orphan->objectid = dback->owner;
8958                 orphan->offset = dback->offset;
8959                 orphan->disk_bytenr = rec->cache.start;
8960                 orphan->disk_len = rec->cache.size;
8961                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8962                 recorded_data_ref = 1;
8963         }
8964 out:
8965         btrfs_release_path(&path);
8966         if (!ret)
8967                 return !recorded_data_ref;
8968         else
8969                 return ret;
8970 }
8971
8972 /*
8973  * when an incorrect extent item is found, this will delete
8974  * all of the existing entries for it and recreate them
8975  * based on what the tree scan found.
8976  */
8977 static int fixup_extent_refs(struct btrfs_fs_info *info,
8978                              struct cache_tree *extent_cache,
8979                              struct extent_record *rec)
8980 {
8981         struct btrfs_trans_handle *trans = NULL;
8982         int ret;
8983         struct btrfs_path path;
8984         struct list_head *cur = rec->backrefs.next;
8985         struct cache_extent *cache;
8986         struct extent_backref *back;
8987         int allocated = 0;
8988         u64 flags = 0;
8989
8990         if (rec->flag_block_full_backref)
8991                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8992
8993         btrfs_init_path(&path);
8994         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8995                 /*
8996                  * Sometimes the backrefs themselves are so broken they don't
8997                  * get attached to any meaningful rec, so first go back and
8998                  * check any of our backrefs that we couldn't find and throw
8999                  * them into the list if we find the backref so that
9000                  * verify_backrefs can figure out what to do.
9001                  */
9002                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9003                 if (ret < 0)
9004                         goto out;
9005         }
9006
9007         /* step one, make sure all of the backrefs agree */
9008         ret = verify_backrefs(info, &path, rec);
9009         if (ret < 0)
9010                 goto out;
9011
9012         trans = btrfs_start_transaction(info->extent_root, 1);
9013         if (IS_ERR(trans)) {
9014                 ret = PTR_ERR(trans);
9015                 goto out;
9016         }
9017
9018         /* step two, delete all the existing records */
9019         ret = delete_extent_records(trans, info->extent_root, &path,
9020                                     rec->start);
9021
9022         if (ret < 0)
9023                 goto out;
9024
9025         /* was this block corrupt?  If so, don't add references to it */
9026         cache = lookup_cache_extent(info->corrupt_blocks,
9027                                     rec->start, rec->max_size);
9028         if (cache) {
9029                 ret = 0;
9030                 goto out;
9031         }
9032
9033         /* step three, recreate all the refs we did find */
9034         while(cur != &rec->backrefs) {
9035                 back = to_extent_backref(cur);
9036                 cur = cur->next;
9037
9038                 /*
9039                  * if we didn't find any references, don't create a
9040                  * new extent record
9041                  */
9042                 if (!back->found_ref)
9043                         continue;
9044
9045                 rec->bad_full_backref = 0;
9046                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9047                 allocated = 1;
9048
9049                 if (ret)
9050                         goto out;
9051         }
9052 out:
9053         if (trans) {
9054                 int err = btrfs_commit_transaction(trans, info->extent_root);
9055                 if (!ret)
9056                         ret = err;
9057         }
9058
9059         if (!ret)
9060                 fprintf(stderr, "Repaired extent references for %llu\n",
9061                                 (unsigned long long)rec->start);
9062
9063         btrfs_release_path(&path);
9064         return ret;
9065 }
9066
9067 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9068                               struct extent_record *rec)
9069 {
9070         struct btrfs_trans_handle *trans;
9071         struct btrfs_root *root = fs_info->extent_root;
9072         struct btrfs_path path;
9073         struct btrfs_extent_item *ei;
9074         struct btrfs_key key;
9075         u64 flags;
9076         int ret = 0;
9077
9078         key.objectid = rec->start;
9079         if (rec->metadata) {
9080                 key.type = BTRFS_METADATA_ITEM_KEY;
9081                 key.offset = rec->info_level;
9082         } else {
9083                 key.type = BTRFS_EXTENT_ITEM_KEY;
9084                 key.offset = rec->max_size;
9085         }
9086
9087         trans = btrfs_start_transaction(root, 0);
9088         if (IS_ERR(trans))
9089                 return PTR_ERR(trans);
9090
9091         btrfs_init_path(&path);
9092         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9093         if (ret < 0) {
9094                 btrfs_release_path(&path);
9095                 btrfs_commit_transaction(trans, root);
9096                 return ret;
9097         } else if (ret) {
9098                 fprintf(stderr, "Didn't find extent for %llu\n",
9099                         (unsigned long long)rec->start);
9100                 btrfs_release_path(&path);
9101                 btrfs_commit_transaction(trans, root);
9102                 return -ENOENT;
9103         }
9104
9105         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9106                             struct btrfs_extent_item);
9107         flags = btrfs_extent_flags(path.nodes[0], ei);
9108         if (rec->flag_block_full_backref) {
9109                 fprintf(stderr, "setting full backref on %llu\n",
9110                         (unsigned long long)key.objectid);
9111                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9112         } else {
9113                 fprintf(stderr, "clearing full backref on %llu\n",
9114                         (unsigned long long)key.objectid);
9115                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9116         }
9117         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9118         btrfs_mark_buffer_dirty(path.nodes[0]);
9119         btrfs_release_path(&path);
9120         ret = btrfs_commit_transaction(trans, root);
9121         if (!ret)
9122                 fprintf(stderr, "Repaired extent flags for %llu\n",
9123                                 (unsigned long long)rec->start);
9124
9125         return ret;
9126 }
9127
9128 /* right now we only prune from the extent allocation tree */
9129 static int prune_one_block(struct btrfs_trans_handle *trans,
9130                            struct btrfs_fs_info *info,
9131                            struct btrfs_corrupt_block *corrupt)
9132 {
9133         int ret;
9134         struct btrfs_path path;
9135         struct extent_buffer *eb;
9136         u64 found;
9137         int slot;
9138         int nritems;
9139         int level = corrupt->level + 1;
9140
9141         btrfs_init_path(&path);
9142 again:
9143         /* we want to stop at the parent to our busted block */
9144         path.lowest_level = level;
9145
9146         ret = btrfs_search_slot(trans, info->extent_root,
9147                                 &corrupt->key, &path, -1, 1);
9148
9149         if (ret < 0)
9150                 goto out;
9151
9152         eb = path.nodes[level];
9153         if (!eb) {
9154                 ret = -ENOENT;
9155                 goto out;
9156         }
9157
9158         /*
9159          * hopefully the search gave us the block we want to prune,
9160          * lets try that first
9161          */
9162         slot = path.slots[level];
9163         found =  btrfs_node_blockptr(eb, slot);
9164         if (found == corrupt->cache.start)
9165                 goto del_ptr;
9166
9167         nritems = btrfs_header_nritems(eb);
9168
9169         /* the search failed, lets scan this node and hope we find it */
9170         for (slot = 0; slot < nritems; slot++) {
9171                 found =  btrfs_node_blockptr(eb, slot);
9172                 if (found == corrupt->cache.start)
9173                         goto del_ptr;
9174         }
9175         /*
9176          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9177          * to this block
9178          */
9179         if (eb == info->extent_root->node) {
9180                 ret = -ENOENT;
9181                 goto out;
9182         } else {
9183                 level++;
9184                 btrfs_release_path(&path);
9185                 goto again;
9186         }
9187
9188 del_ptr:
9189         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9190         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9191
9192 out:
9193         btrfs_release_path(&path);
9194         return ret;
9195 }
9196
9197 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9198 {
9199         struct btrfs_trans_handle *trans = NULL;
9200         struct cache_extent *cache;
9201         struct btrfs_corrupt_block *corrupt;
9202
9203         while (1) {
9204                 cache = search_cache_extent(info->corrupt_blocks, 0);
9205                 if (!cache)
9206                         break;
9207                 if (!trans) {
9208                         trans = btrfs_start_transaction(info->extent_root, 1);
9209                         if (IS_ERR(trans))
9210                                 return PTR_ERR(trans);
9211                 }
9212                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9213                 prune_one_block(trans, info, corrupt);
9214                 remove_cache_extent(info->corrupt_blocks, cache);
9215         }
9216         if (trans)
9217                 return btrfs_commit_transaction(trans, info->extent_root);
9218         return 0;
9219 }
9220
9221 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9222 {
9223         struct btrfs_block_group_cache *cache;
9224         u64 start, end;
9225         int ret;
9226
9227         while (1) {
9228                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9229                                             &start, &end, EXTENT_DIRTY);
9230                 if (ret)
9231                         break;
9232                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9233         }
9234
9235         start = 0;
9236         while (1) {
9237                 cache = btrfs_lookup_first_block_group(fs_info, start);
9238                 if (!cache)
9239                         break;
9240                 if (cache->cached)
9241                         cache->cached = 0;
9242                 start = cache->key.objectid + cache->key.offset;
9243         }
9244 }
9245
9246 static int check_extent_refs(struct btrfs_root *root,
9247                              struct cache_tree *extent_cache)
9248 {
9249         struct extent_record *rec;
9250         struct cache_extent *cache;
9251         int ret = 0;
9252         int had_dups = 0;
9253
9254         if (repair) {
9255                 /*
9256                  * if we're doing a repair, we have to make sure
9257                  * we don't allocate from the problem extents.
9258                  * In the worst case, this will be all the
9259                  * extents in the FS
9260                  */
9261                 cache = search_cache_extent(extent_cache, 0);
9262                 while(cache) {
9263                         rec = container_of(cache, struct extent_record, cache);
9264                         set_extent_dirty(root->fs_info->excluded_extents,
9265                                          rec->start,
9266                                          rec->start + rec->max_size - 1);
9267                         cache = next_cache_extent(cache);
9268                 }
9269
9270                 /* pin down all the corrupted blocks too */
9271                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9272                 while(cache) {
9273                         set_extent_dirty(root->fs_info->excluded_extents,
9274                                          cache->start,
9275                                          cache->start + cache->size - 1);
9276                         cache = next_cache_extent(cache);
9277                 }
9278                 prune_corrupt_blocks(root->fs_info);
9279                 reset_cached_block_groups(root->fs_info);
9280         }
9281
9282         reset_cached_block_groups(root->fs_info);
9283
9284         /*
9285          * We need to delete any duplicate entries we find first otherwise we
9286          * could mess up the extent tree when we have backrefs that actually
9287          * belong to a different extent item and not the weird duplicate one.
9288          */
9289         while (repair && !list_empty(&duplicate_extents)) {
9290                 rec = to_extent_record(duplicate_extents.next);
9291                 list_del_init(&rec->list);
9292
9293                 /* Sometimes we can find a backref before we find an actual
9294                  * extent, so we need to process it a little bit to see if there
9295                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9296                  * if this is a backref screwup.  If we need to delete stuff
9297                  * process_duplicates() will return 0, otherwise it will return
9298                  * 1 and we
9299                  */
9300                 if (process_duplicates(extent_cache, rec))
9301                         continue;
9302                 ret = delete_duplicate_records(root, rec);
9303                 if (ret < 0)
9304                         return ret;
9305                 /*
9306                  * delete_duplicate_records will return the number of entries
9307                  * deleted, so if it's greater than 0 then we know we actually
9308                  * did something and we need to remove.
9309                  */
9310                 if (ret)
9311                         had_dups = 1;
9312         }
9313
9314         if (had_dups)
9315                 return -EAGAIN;
9316
9317         while(1) {
9318                 int cur_err = 0;
9319                 int fix = 0;
9320
9321                 cache = search_cache_extent(extent_cache, 0);
9322                 if (!cache)
9323                         break;
9324                 rec = container_of(cache, struct extent_record, cache);
9325                 if (rec->num_duplicates) {
9326                         fprintf(stderr, "extent item %llu has multiple extent "
9327                                 "items\n", (unsigned long long)rec->start);
9328                         cur_err = 1;
9329                 }
9330
9331                 if (rec->refs != rec->extent_item_refs) {
9332                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9333                                 (unsigned long long)rec->start,
9334                                 (unsigned long long)rec->nr);
9335                         fprintf(stderr, "extent item %llu, found %llu\n",
9336                                 (unsigned long long)rec->extent_item_refs,
9337                                 (unsigned long long)rec->refs);
9338                         ret = record_orphan_data_extents(root->fs_info, rec);
9339                         if (ret < 0)
9340                                 goto repair_abort;
9341                         fix = ret;
9342                         cur_err = 1;
9343                 }
9344                 if (all_backpointers_checked(rec, 1)) {
9345                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9346                                 (unsigned long long)rec->start,
9347                                 (unsigned long long)rec->nr);
9348                         fix = 1;
9349                         cur_err = 1;
9350                 }
9351                 if (!rec->owner_ref_checked) {
9352                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9353                                 (unsigned long long)rec->start,
9354                                 (unsigned long long)rec->nr);
9355                         fix = 1;
9356                         cur_err = 1;
9357                 }
9358
9359                 if (repair && fix) {
9360                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9361                         if (ret)
9362                                 goto repair_abort;
9363                 }
9364
9365
9366                 if (rec->bad_full_backref) {
9367                         fprintf(stderr, "bad full backref, on [%llu]\n",
9368                                 (unsigned long long)rec->start);
9369                         if (repair) {
9370                                 ret = fixup_extent_flags(root->fs_info, rec);
9371                                 if (ret)
9372                                         goto repair_abort;
9373                                 fix = 1;
9374                         }
9375                         cur_err = 1;
9376                 }
9377                 /*
9378                  * Although it's not a extent ref's problem, we reuse this
9379                  * routine for error reporting.
9380                  * No repair function yet.
9381                  */
9382                 if (rec->crossing_stripes) {
9383                         fprintf(stderr,
9384                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9385                                 rec->start, rec->start + rec->max_size);
9386                         cur_err = 1;
9387                 }
9388
9389                 if (rec->wrong_chunk_type) {
9390                         fprintf(stderr,
9391                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9392                                 rec->start, rec->start + rec->max_size);
9393                         cur_err = 1;
9394                 }
9395
9396                 remove_cache_extent(extent_cache, cache);
9397                 free_all_extent_backrefs(rec);
9398                 if (!init_extent_tree && repair && (!cur_err || fix))
9399                         clear_extent_dirty(root->fs_info->excluded_extents,
9400                                            rec->start,
9401                                            rec->start + rec->max_size - 1);
9402                 free(rec);
9403         }
9404 repair_abort:
9405         if (repair) {
9406                 if (ret && ret != -EAGAIN) {
9407                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9408                         exit(1);
9409                 } else if (!ret) {
9410                         struct btrfs_trans_handle *trans;
9411
9412                         root = root->fs_info->extent_root;
9413                         trans = btrfs_start_transaction(root, 1);
9414                         if (IS_ERR(trans)) {
9415                                 ret = PTR_ERR(trans);
9416                                 goto repair_abort;
9417                         }
9418
9419                         btrfs_fix_block_accounting(trans, root);
9420                         ret = btrfs_commit_transaction(trans, root);
9421                         if (ret)
9422                                 goto repair_abort;
9423                 }
9424                 return ret;
9425         }
9426         return 0;
9427 }
9428
9429 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9430 {
9431         u64 stripe_size;
9432
9433         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9434                 stripe_size = length;
9435                 stripe_size /= num_stripes;
9436         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9437                 stripe_size = length * 2;
9438                 stripe_size /= num_stripes;
9439         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9440                 stripe_size = length;
9441                 stripe_size /= (num_stripes - 1);
9442         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9443                 stripe_size = length;
9444                 stripe_size /= (num_stripes - 2);
9445         } else {
9446                 stripe_size = length;
9447         }
9448         return stripe_size;
9449 }
9450
9451 /*
9452  * Check the chunk with its block group/dev list ref:
9453  * Return 0 if all refs seems valid.
9454  * Return 1 if part of refs seems valid, need later check for rebuild ref
9455  * like missing block group and needs to search extent tree to rebuild them.
9456  * Return -1 if essential refs are missing and unable to rebuild.
9457  */
9458 static int check_chunk_refs(struct chunk_record *chunk_rec,
9459                             struct block_group_tree *block_group_cache,
9460                             struct device_extent_tree *dev_extent_cache,
9461                             int silent)
9462 {
9463         struct cache_extent *block_group_item;
9464         struct block_group_record *block_group_rec;
9465         struct cache_extent *dev_extent_item;
9466         struct device_extent_record *dev_extent_rec;
9467         u64 devid;
9468         u64 offset;
9469         u64 length;
9470         int metadump_v2 = 0;
9471         int i;
9472         int ret = 0;
9473
9474         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9475                                                chunk_rec->offset,
9476                                                chunk_rec->length);
9477         if (block_group_item) {
9478                 block_group_rec = container_of(block_group_item,
9479                                                struct block_group_record,
9480                                                cache);
9481                 if (chunk_rec->length != block_group_rec->offset ||
9482                     chunk_rec->offset != block_group_rec->objectid ||
9483                     (!metadump_v2 &&
9484                      chunk_rec->type_flags != block_group_rec->flags)) {
9485                         if (!silent)
9486                                 fprintf(stderr,
9487                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9488                                         chunk_rec->objectid,
9489                                         chunk_rec->type,
9490                                         chunk_rec->offset,
9491                                         chunk_rec->length,
9492                                         chunk_rec->offset,
9493                                         chunk_rec->type_flags,
9494                                         block_group_rec->objectid,
9495                                         block_group_rec->type,
9496                                         block_group_rec->offset,
9497                                         block_group_rec->offset,
9498                                         block_group_rec->objectid,
9499                                         block_group_rec->flags);
9500                         ret = -1;
9501                 } else {
9502                         list_del_init(&block_group_rec->list);
9503                         chunk_rec->bg_rec = block_group_rec;
9504                 }
9505         } else {
9506                 if (!silent)
9507                         fprintf(stderr,
9508                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9509                                 chunk_rec->objectid,
9510                                 chunk_rec->type,
9511                                 chunk_rec->offset,
9512                                 chunk_rec->length,
9513                                 chunk_rec->offset,
9514                                 chunk_rec->type_flags);
9515                 ret = 1;
9516         }
9517
9518         if (metadump_v2)
9519                 return ret;
9520
9521         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9522                                     chunk_rec->num_stripes);
9523         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9524                 devid = chunk_rec->stripes[i].devid;
9525                 offset = chunk_rec->stripes[i].offset;
9526                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9527                                                        devid, offset, length);
9528                 if (dev_extent_item) {
9529                         dev_extent_rec = container_of(dev_extent_item,
9530                                                 struct device_extent_record,
9531                                                 cache);
9532                         if (dev_extent_rec->objectid != devid ||
9533                             dev_extent_rec->offset != offset ||
9534                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9535                             dev_extent_rec->length != length) {
9536                                 if (!silent)
9537                                         fprintf(stderr,
9538                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9539                                                 chunk_rec->objectid,
9540                                                 chunk_rec->type,
9541                                                 chunk_rec->offset,
9542                                                 chunk_rec->stripes[i].devid,
9543                                                 chunk_rec->stripes[i].offset,
9544                                                 dev_extent_rec->objectid,
9545                                                 dev_extent_rec->offset,
9546                                                 dev_extent_rec->length);
9547                                 ret = -1;
9548                         } else {
9549                                 list_move(&dev_extent_rec->chunk_list,
9550                                           &chunk_rec->dextents);
9551                         }
9552                 } else {
9553                         if (!silent)
9554                                 fprintf(stderr,
9555                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9556                                         chunk_rec->objectid,
9557                                         chunk_rec->type,
9558                                         chunk_rec->offset,
9559                                         chunk_rec->stripes[i].devid,
9560                                         chunk_rec->stripes[i].offset);
9561                         ret = -1;
9562                 }
9563         }
9564         return ret;
9565 }
9566
9567 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9568 int check_chunks(struct cache_tree *chunk_cache,
9569                  struct block_group_tree *block_group_cache,
9570                  struct device_extent_tree *dev_extent_cache,
9571                  struct list_head *good, struct list_head *bad,
9572                  struct list_head *rebuild, int silent)
9573 {
9574         struct cache_extent *chunk_item;
9575         struct chunk_record *chunk_rec;
9576         struct block_group_record *bg_rec;
9577         struct device_extent_record *dext_rec;
9578         int err;
9579         int ret = 0;
9580
9581         chunk_item = first_cache_extent(chunk_cache);
9582         while (chunk_item) {
9583                 chunk_rec = container_of(chunk_item, struct chunk_record,
9584                                          cache);
9585                 err = check_chunk_refs(chunk_rec, block_group_cache,
9586                                        dev_extent_cache, silent);
9587                 if (err < 0)
9588                         ret = err;
9589                 if (err == 0 && good)
9590                         list_add_tail(&chunk_rec->list, good);
9591                 if (err > 0 && rebuild)
9592                         list_add_tail(&chunk_rec->list, rebuild);
9593                 if (err < 0 && bad)
9594                         list_add_tail(&chunk_rec->list, bad);
9595                 chunk_item = next_cache_extent(chunk_item);
9596         }
9597
9598         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9599                 if (!silent)
9600                         fprintf(stderr,
9601                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9602                                 bg_rec->objectid,
9603                                 bg_rec->offset,
9604                                 bg_rec->flags);
9605                 if (!ret)
9606                         ret = 1;
9607         }
9608
9609         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9610                             chunk_list) {
9611                 if (!silent)
9612                         fprintf(stderr,
9613                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9614                                 dext_rec->objectid,
9615                                 dext_rec->offset,
9616                                 dext_rec->length);
9617                 if (!ret)
9618                         ret = 1;
9619         }
9620         return ret;
9621 }
9622
9623
9624 static int check_device_used(struct device_record *dev_rec,
9625                              struct device_extent_tree *dext_cache)
9626 {
9627         struct cache_extent *cache;
9628         struct device_extent_record *dev_extent_rec;
9629         u64 total_byte = 0;
9630
9631         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9632         while (cache) {
9633                 dev_extent_rec = container_of(cache,
9634                                               struct device_extent_record,
9635                                               cache);
9636                 if (dev_extent_rec->objectid != dev_rec->devid)
9637                         break;
9638
9639                 list_del_init(&dev_extent_rec->device_list);
9640                 total_byte += dev_extent_rec->length;
9641                 cache = next_cache_extent(cache);
9642         }
9643
9644         if (total_byte != dev_rec->byte_used) {
9645                 fprintf(stderr,
9646                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9647                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9648                         dev_rec->type, dev_rec->offset);
9649                 return -1;
9650         } else {
9651                 return 0;
9652         }
9653 }
9654
9655 /* check btrfs_dev_item -> btrfs_dev_extent */
9656 static int check_devices(struct rb_root *dev_cache,
9657                          struct device_extent_tree *dev_extent_cache)
9658 {
9659         struct rb_node *dev_node;
9660         struct device_record *dev_rec;
9661         struct device_extent_record *dext_rec;
9662         int err;
9663         int ret = 0;
9664
9665         dev_node = rb_first(dev_cache);
9666         while (dev_node) {
9667                 dev_rec = container_of(dev_node, struct device_record, node);
9668                 err = check_device_used(dev_rec, dev_extent_cache);
9669                 if (err)
9670                         ret = err;
9671
9672                 dev_node = rb_next(dev_node);
9673         }
9674         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9675                             device_list) {
9676                 fprintf(stderr,
9677                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9678                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9679                 if (!ret)
9680                         ret = 1;
9681         }
9682         return ret;
9683 }
9684
9685 static int add_root_item_to_list(struct list_head *head,
9686                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9687                                   u8 level, u8 drop_level,
9688                                   int level_size, struct btrfs_key *drop_key)
9689 {
9690
9691         struct root_item_record *ri_rec;
9692         ri_rec = malloc(sizeof(*ri_rec));
9693         if (!ri_rec)
9694                 return -ENOMEM;
9695         ri_rec->bytenr = bytenr;
9696         ri_rec->objectid = objectid;
9697         ri_rec->level = level;
9698         ri_rec->level_size = level_size;
9699         ri_rec->drop_level = drop_level;
9700         ri_rec->last_snapshot = last_snapshot;
9701         if (drop_key)
9702                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9703         list_add_tail(&ri_rec->list, head);
9704
9705         return 0;
9706 }
9707
9708 static void free_root_item_list(struct list_head *list)
9709 {
9710         struct root_item_record *ri_rec;
9711
9712         while (!list_empty(list)) {
9713                 ri_rec = list_first_entry(list, struct root_item_record,
9714                                           list);
9715                 list_del_init(&ri_rec->list);
9716                 free(ri_rec);
9717         }
9718 }
9719
9720 static int deal_root_from_list(struct list_head *list,
9721                                struct btrfs_root *root,
9722                                struct block_info *bits,
9723                                int bits_nr,
9724                                struct cache_tree *pending,
9725                                struct cache_tree *seen,
9726                                struct cache_tree *reada,
9727                                struct cache_tree *nodes,
9728                                struct cache_tree *extent_cache,
9729                                struct cache_tree *chunk_cache,
9730                                struct rb_root *dev_cache,
9731                                struct block_group_tree *block_group_cache,
9732                                struct device_extent_tree *dev_extent_cache)
9733 {
9734         int ret = 0;
9735         u64 last;
9736
9737         while (!list_empty(list)) {
9738                 struct root_item_record *rec;
9739                 struct extent_buffer *buf;
9740                 rec = list_entry(list->next,
9741                                  struct root_item_record, list);
9742                 last = 0;
9743                 buf = read_tree_block(root->fs_info->tree_root,
9744                                       rec->bytenr, rec->level_size, 0);
9745                 if (!extent_buffer_uptodate(buf)) {
9746                         free_extent_buffer(buf);
9747                         ret = -EIO;
9748                         break;
9749                 }
9750                 ret = add_root_to_pending(buf, extent_cache, pending,
9751                                     seen, nodes, rec->objectid);
9752                 if (ret < 0)
9753                         break;
9754                 /*
9755                  * To rebuild extent tree, we need deal with snapshot
9756                  * one by one, otherwise we deal with node firstly which
9757                  * can maximize readahead.
9758                  */
9759                 while (1) {
9760                         ret = run_next_block(root, bits, bits_nr, &last,
9761                                              pending, seen, reada, nodes,
9762                                              extent_cache, chunk_cache,
9763                                              dev_cache, block_group_cache,
9764                                              dev_extent_cache, rec);
9765                         if (ret != 0)
9766                                 break;
9767                 }
9768                 free_extent_buffer(buf);
9769                 list_del(&rec->list);
9770                 free(rec);
9771                 if (ret < 0)
9772                         break;
9773         }
9774         while (ret >= 0) {
9775                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9776                                      reada, nodes, extent_cache, chunk_cache,
9777                                      dev_cache, block_group_cache,
9778                                      dev_extent_cache, NULL);
9779                 if (ret != 0) {
9780                         if (ret > 0)
9781                                 ret = 0;
9782                         break;
9783                 }
9784         }
9785         return ret;
9786 }
9787
9788 static int check_chunks_and_extents(struct btrfs_root *root)
9789 {
9790         struct rb_root dev_cache;
9791         struct cache_tree chunk_cache;
9792         struct block_group_tree block_group_cache;
9793         struct device_extent_tree dev_extent_cache;
9794         struct cache_tree extent_cache;
9795         struct cache_tree seen;
9796         struct cache_tree pending;
9797         struct cache_tree reada;
9798         struct cache_tree nodes;
9799         struct extent_io_tree excluded_extents;
9800         struct cache_tree corrupt_blocks;
9801         struct btrfs_path path;
9802         struct btrfs_key key;
9803         struct btrfs_key found_key;
9804         int ret, err = 0;
9805         struct block_info *bits;
9806         int bits_nr;
9807         struct extent_buffer *leaf;
9808         int slot;
9809         struct btrfs_root_item ri;
9810         struct list_head dropping_trees;
9811         struct list_head normal_trees;
9812         struct btrfs_root *root1;
9813         u64 objectid;
9814         u32 level_size;
9815         u8 level;
9816
9817         dev_cache = RB_ROOT;
9818         cache_tree_init(&chunk_cache);
9819         block_group_tree_init(&block_group_cache);
9820         device_extent_tree_init(&dev_extent_cache);
9821
9822         cache_tree_init(&extent_cache);
9823         cache_tree_init(&seen);
9824         cache_tree_init(&pending);
9825         cache_tree_init(&nodes);
9826         cache_tree_init(&reada);
9827         cache_tree_init(&corrupt_blocks);
9828         extent_io_tree_init(&excluded_extents);
9829         INIT_LIST_HEAD(&dropping_trees);
9830         INIT_LIST_HEAD(&normal_trees);
9831
9832         if (repair) {
9833                 root->fs_info->excluded_extents = &excluded_extents;
9834                 root->fs_info->fsck_extent_cache = &extent_cache;
9835                 root->fs_info->free_extent_hook = free_extent_hook;
9836                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9837         }
9838
9839         bits_nr = 1024;
9840         bits = malloc(bits_nr * sizeof(struct block_info));
9841         if (!bits) {
9842                 perror("malloc");
9843                 exit(1);
9844         }
9845
9846         if (ctx.progress_enabled) {
9847                 ctx.tp = TASK_EXTENTS;
9848                 task_start(ctx.info);
9849         }
9850
9851 again:
9852         root1 = root->fs_info->tree_root;
9853         level = btrfs_header_level(root1->node);
9854         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9855                                     root1->node->start, 0, level, 0,
9856                                     root1->nodesize, NULL);
9857         if (ret < 0)
9858                 goto out;
9859         root1 = root->fs_info->chunk_root;
9860         level = btrfs_header_level(root1->node);
9861         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9862                                     root1->node->start, 0, level, 0,
9863                                     root1->nodesize, NULL);
9864         if (ret < 0)
9865                 goto out;
9866         btrfs_init_path(&path);
9867         key.offset = 0;
9868         key.objectid = 0;
9869         key.type = BTRFS_ROOT_ITEM_KEY;
9870         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9871                                         &key, &path, 0, 0);
9872         if (ret < 0)
9873                 goto out;
9874         while(1) {
9875                 leaf = path.nodes[0];
9876                 slot = path.slots[0];
9877                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9878                         ret = btrfs_next_leaf(root, &path);
9879                         if (ret != 0)
9880                                 break;
9881                         leaf = path.nodes[0];
9882                         slot = path.slots[0];
9883                 }
9884                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9885                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9886                         unsigned long offset;
9887                         u64 last_snapshot;
9888
9889                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9890                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9891                         last_snapshot = btrfs_root_last_snapshot(&ri);
9892                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9893                                 level = btrfs_root_level(&ri);
9894                                 level_size = root->nodesize;
9895                                 ret = add_root_item_to_list(&normal_trees,
9896                                                 found_key.objectid,
9897                                                 btrfs_root_bytenr(&ri),
9898                                                 last_snapshot, level,
9899                                                 0, level_size, NULL);
9900                                 if (ret < 0)
9901                                         goto out;
9902                         } else {
9903                                 level = btrfs_root_level(&ri);
9904                                 level_size = root->nodesize;
9905                                 objectid = found_key.objectid;
9906                                 btrfs_disk_key_to_cpu(&found_key,
9907                                                       &ri.drop_progress);
9908                                 ret = add_root_item_to_list(&dropping_trees,
9909                                                 objectid,
9910                                                 btrfs_root_bytenr(&ri),
9911                                                 last_snapshot, level,
9912                                                 ri.drop_level,
9913                                                 level_size, &found_key);
9914                                 if (ret < 0)
9915                                         goto out;
9916                         }
9917                 }
9918                 path.slots[0]++;
9919         }
9920         btrfs_release_path(&path);
9921
9922         /*
9923          * check_block can return -EAGAIN if it fixes something, please keep
9924          * this in mind when dealing with return values from these functions, if
9925          * we get -EAGAIN we want to fall through and restart the loop.
9926          */
9927         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9928                                   &seen, &reada, &nodes, &extent_cache,
9929                                   &chunk_cache, &dev_cache, &block_group_cache,
9930                                   &dev_extent_cache);
9931         if (ret < 0) {
9932                 if (ret == -EAGAIN)
9933                         goto loop;
9934                 goto out;
9935         }
9936         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9937                                   &pending, &seen, &reada, &nodes,
9938                                   &extent_cache, &chunk_cache, &dev_cache,
9939                                   &block_group_cache, &dev_extent_cache);
9940         if (ret < 0) {
9941                 if (ret == -EAGAIN)
9942                         goto loop;
9943                 goto out;
9944         }
9945
9946         ret = check_chunks(&chunk_cache, &block_group_cache,
9947                            &dev_extent_cache, NULL, NULL, NULL, 0);
9948         if (ret) {
9949                 if (ret == -EAGAIN)
9950                         goto loop;
9951                 err = ret;
9952         }
9953
9954         ret = check_extent_refs(root, &extent_cache);
9955         if (ret < 0) {
9956                 if (ret == -EAGAIN)
9957                         goto loop;
9958                 goto out;
9959         }
9960
9961         ret = check_devices(&dev_cache, &dev_extent_cache);
9962         if (ret && err)
9963                 ret = err;
9964
9965 out:
9966         task_stop(ctx.info);
9967         if (repair) {
9968                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9969                 extent_io_tree_cleanup(&excluded_extents);
9970                 root->fs_info->fsck_extent_cache = NULL;
9971                 root->fs_info->free_extent_hook = NULL;
9972                 root->fs_info->corrupt_blocks = NULL;
9973                 root->fs_info->excluded_extents = NULL;
9974         }
9975         free(bits);
9976         free_chunk_cache_tree(&chunk_cache);
9977         free_device_cache_tree(&dev_cache);
9978         free_block_group_tree(&block_group_cache);
9979         free_device_extent_tree(&dev_extent_cache);
9980         free_extent_cache_tree(&seen);
9981         free_extent_cache_tree(&pending);
9982         free_extent_cache_tree(&reada);
9983         free_extent_cache_tree(&nodes);
9984         free_root_item_list(&normal_trees);
9985         free_root_item_list(&dropping_trees);
9986         return ret;
9987 loop:
9988         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9989         free_extent_cache_tree(&seen);
9990         free_extent_cache_tree(&pending);
9991         free_extent_cache_tree(&reada);
9992         free_extent_cache_tree(&nodes);
9993         free_chunk_cache_tree(&chunk_cache);
9994         free_block_group_tree(&block_group_cache);
9995         free_device_cache_tree(&dev_cache);
9996         free_device_extent_tree(&dev_extent_cache);
9997         free_extent_record_cache(&extent_cache);
9998         free_root_item_list(&normal_trees);
9999         free_root_item_list(&dropping_trees);
10000         extent_io_tree_cleanup(&excluded_extents);
10001         goto again;
10002 }
10003
10004 /*
10005  * Check backrefs of a tree block given by @bytenr or @eb.
10006  *
10007  * @root:       the root containing the @bytenr or @eb
10008  * @eb:         tree block extent buffer, can be NULL
10009  * @bytenr:     bytenr of the tree block to search
10010  * @level:      tree level of the tree block
10011  * @owner:      owner of the tree block
10012  *
10013  * Return >0 for any error found and output error message
10014  * Return 0 for no error found
10015  */
10016 static int check_tree_block_ref(struct btrfs_root *root,
10017                                 struct extent_buffer *eb, u64 bytenr,
10018                                 int level, u64 owner)
10019 {
10020         struct btrfs_key key;
10021         struct btrfs_root *extent_root = root->fs_info->extent_root;
10022         struct btrfs_path path;
10023         struct btrfs_extent_item *ei;
10024         struct btrfs_extent_inline_ref *iref;
10025         struct extent_buffer *leaf;
10026         unsigned long end;
10027         unsigned long ptr;
10028         int slot;
10029         int skinny_level;
10030         int type;
10031         u32 nodesize = root->nodesize;
10032         u32 item_size;
10033         u64 offset;
10034         int tree_reloc_root = 0;
10035         int found_ref = 0;
10036         int err = 0;
10037         int ret;
10038
10039         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10040             btrfs_header_bytenr(root->node) == bytenr)
10041                 tree_reloc_root = 1;
10042
10043         btrfs_init_path(&path);
10044         key.objectid = bytenr;
10045         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10046                 key.type = BTRFS_METADATA_ITEM_KEY;
10047         else
10048                 key.type = BTRFS_EXTENT_ITEM_KEY;
10049         key.offset = (u64)-1;
10050
10051         /* Search for the backref in extent tree */
10052         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10053         if (ret < 0) {
10054                 err |= BACKREF_MISSING;
10055                 goto out;
10056         }
10057         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10058         if (ret) {
10059                 err |= BACKREF_MISSING;
10060                 goto out;
10061         }
10062
10063         leaf = path.nodes[0];
10064         slot = path.slots[0];
10065         btrfs_item_key_to_cpu(leaf, &key, slot);
10066
10067         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10068
10069         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10070                 skinny_level = (int)key.offset;
10071                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10072         } else {
10073                 struct btrfs_tree_block_info *info;
10074
10075                 info = (struct btrfs_tree_block_info *)(ei + 1);
10076                 skinny_level = btrfs_tree_block_level(leaf, info);
10077                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10078         }
10079
10080         if (eb) {
10081                 u64 header_gen;
10082                 u64 extent_gen;
10083
10084                 if (!(btrfs_extent_flags(leaf, ei) &
10085                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10086                         error(
10087                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10088                                 key.objectid, nodesize,
10089                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10090                         err = BACKREF_MISMATCH;
10091                 }
10092                 header_gen = btrfs_header_generation(eb);
10093                 extent_gen = btrfs_extent_generation(leaf, ei);
10094                 if (header_gen != extent_gen) {
10095                         error(
10096         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10097                                 key.objectid, nodesize, header_gen,
10098                                 extent_gen);
10099                         err = BACKREF_MISMATCH;
10100                 }
10101                 if (level != skinny_level) {
10102                         error(
10103                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10104                                 key.objectid, nodesize, level, skinny_level);
10105                         err = BACKREF_MISMATCH;
10106                 }
10107                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10108                         error(
10109                         "extent[%llu %u] is referred by other roots than %llu",
10110                                 key.objectid, nodesize, root->objectid);
10111                         err = BACKREF_MISMATCH;
10112                 }
10113         }
10114
10115         /*
10116          * Iterate the extent/metadata item to find the exact backref
10117          */
10118         item_size = btrfs_item_size_nr(leaf, slot);
10119         ptr = (unsigned long)iref;
10120         end = (unsigned long)ei + item_size;
10121         while (ptr < end) {
10122                 iref = (struct btrfs_extent_inline_ref *)ptr;
10123                 type = btrfs_extent_inline_ref_type(leaf, iref);
10124                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10125
10126                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10127                         (offset == root->objectid || offset == owner)) {
10128                         found_ref = 1;
10129                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10130                         /*
10131                          * Backref of tree reloc root points to itself, no need
10132                          * to check backref any more.
10133                          */
10134                         if (tree_reloc_root)
10135                                 found_ref = 1;
10136                         else
10137                         /* Check if the backref points to valid referencer */
10138                                 found_ref = !check_tree_block_ref(root, NULL,
10139                                                 offset, level + 1, owner);
10140                 }
10141
10142                 if (found_ref)
10143                         break;
10144                 ptr += btrfs_extent_inline_ref_size(type);
10145         }
10146
10147         /*
10148          * Inlined extent item doesn't have what we need, check
10149          * TREE_BLOCK_REF_KEY
10150          */
10151         if (!found_ref) {
10152                 btrfs_release_path(&path);
10153                 key.objectid = bytenr;
10154                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10155                 key.offset = root->objectid;
10156
10157                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10158                 if (!ret)
10159                         found_ref = 1;
10160         }
10161         if (!found_ref)
10162                 err |= BACKREF_MISSING;
10163 out:
10164         btrfs_release_path(&path);
10165         if (eb && (err & BACKREF_MISSING))
10166                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10167                         bytenr, nodesize, owner, level);
10168         return err;
10169 }
10170
10171 /*
10172  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10173  *
10174  * Return >0 any error found and output error message
10175  * Return 0 for no error found
10176  */
10177 static int check_extent_data_item(struct btrfs_root *root,
10178                                   struct extent_buffer *eb, int slot)
10179 {
10180         struct btrfs_file_extent_item *fi;
10181         struct btrfs_path path;
10182         struct btrfs_root *extent_root = root->fs_info->extent_root;
10183         struct btrfs_key fi_key;
10184         struct btrfs_key dbref_key;
10185         struct extent_buffer *leaf;
10186         struct btrfs_extent_item *ei;
10187         struct btrfs_extent_inline_ref *iref;
10188         struct btrfs_extent_data_ref *dref;
10189         u64 owner;
10190         u64 disk_bytenr;
10191         u64 disk_num_bytes;
10192         u64 extent_num_bytes;
10193         u64 extent_flags;
10194         u32 item_size;
10195         unsigned long end;
10196         unsigned long ptr;
10197         int type;
10198         u64 ref_root;
10199         int found_dbackref = 0;
10200         int err = 0;
10201         int ret;
10202
10203         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10204         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10205
10206         /* Nothing to check for hole and inline data extents */
10207         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10208             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10209                 return 0;
10210
10211         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10212         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10213         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10214
10215         /* Check unaligned disk_num_bytes and num_bytes */
10216         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10217                 error(
10218 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10219                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10220                         root->sectorsize);
10221                 err |= BYTES_UNALIGNED;
10222         } else {
10223                 data_bytes_allocated += disk_num_bytes;
10224         }
10225         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10226                 error(
10227 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10228                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10229                         root->sectorsize);
10230                 err |= BYTES_UNALIGNED;
10231         } else {
10232                 data_bytes_referenced += extent_num_bytes;
10233         }
10234         owner = btrfs_header_owner(eb);
10235
10236         /* Check the extent item of the file extent in extent tree */
10237         btrfs_init_path(&path);
10238         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10239         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10240         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10241
10242         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10243         if (ret)
10244                 goto out;
10245
10246         leaf = path.nodes[0];
10247         slot = path.slots[0];
10248         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10249
10250         extent_flags = btrfs_extent_flags(leaf, ei);
10251
10252         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10253                 error(
10254                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10255                     disk_bytenr, disk_num_bytes,
10256                     BTRFS_EXTENT_FLAG_DATA);
10257                 err |= BACKREF_MISMATCH;
10258         }
10259
10260         /* Check data backref inside that extent item */
10261         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10262         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10263         ptr = (unsigned long)iref;
10264         end = (unsigned long)ei + item_size;
10265         while (ptr < end) {
10266                 iref = (struct btrfs_extent_inline_ref *)ptr;
10267                 type = btrfs_extent_inline_ref_type(leaf, iref);
10268                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10269
10270                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10271                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10272                         if (ref_root == owner || ref_root == root->objectid)
10273                                 found_dbackref = 1;
10274                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10275                         found_dbackref = !check_tree_block_ref(root, NULL,
10276                                 btrfs_extent_inline_ref_offset(leaf, iref),
10277                                 0, owner);
10278                 }
10279
10280                 if (found_dbackref)
10281                         break;
10282                 ptr += btrfs_extent_inline_ref_size(type);
10283         }
10284
10285         if (!found_dbackref) {
10286                 btrfs_release_path(&path);
10287
10288                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10289                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10290                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10291                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10292                                 fi_key.objectid, fi_key.offset);
10293
10294                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10295                                         &dbref_key, &path, 0, 0);
10296                 if (!ret) {
10297                         found_dbackref = 1;
10298                         goto out;
10299                 }
10300
10301                 btrfs_release_path(&path);
10302
10303                 /*
10304                  * Neither inlined nor EXTENT_DATA_REF found, try
10305                  * SHARED_DATA_REF as last chance.
10306                  */
10307                 dbref_key.objectid = disk_bytenr;
10308                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10309                 dbref_key.offset = eb->start;
10310
10311                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10312                                         &dbref_key, &path, 0, 0);
10313                 if (!ret) {
10314                         found_dbackref = 1;
10315                         goto out;
10316                 }
10317         }
10318
10319 out:
10320         if (!found_dbackref)
10321                 err |= BACKREF_MISSING;
10322         btrfs_release_path(&path);
10323         if (err & BACKREF_MISSING) {
10324                 error("data extent[%llu %llu] backref lost",
10325                       disk_bytenr, disk_num_bytes);
10326         }
10327         return err;
10328 }
10329
10330 /*
10331  * Get real tree block level for the case like shared block
10332  * Return >= 0 as tree level
10333  * Return <0 for error
10334  */
10335 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10336 {
10337         struct extent_buffer *eb;
10338         struct btrfs_path path;
10339         struct btrfs_key key;
10340         struct btrfs_extent_item *ei;
10341         u64 flags;
10342         u64 transid;
10343         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10344         u8 backref_level;
10345         u8 header_level;
10346         int ret;
10347
10348         /* Search extent tree for extent generation and level */
10349         key.objectid = bytenr;
10350         key.type = BTRFS_METADATA_ITEM_KEY;
10351         key.offset = (u64)-1;
10352
10353         btrfs_init_path(&path);
10354         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10355         if (ret < 0)
10356                 goto release_out;
10357         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10358         if (ret < 0)
10359                 goto release_out;
10360         if (ret > 0) {
10361                 ret = -ENOENT;
10362                 goto release_out;
10363         }
10364
10365         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10366         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10367                             struct btrfs_extent_item);
10368         flags = btrfs_extent_flags(path.nodes[0], ei);
10369         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10370                 ret = -ENOENT;
10371                 goto release_out;
10372         }
10373
10374         /* Get transid for later read_tree_block() check */
10375         transid = btrfs_extent_generation(path.nodes[0], ei);
10376
10377         /* Get backref level as one source */
10378         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10379                 backref_level = key.offset;
10380         } else {
10381                 struct btrfs_tree_block_info *info;
10382
10383                 info = (struct btrfs_tree_block_info *)(ei + 1);
10384                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10385         }
10386         btrfs_release_path(&path);
10387
10388         /* Get level from tree block as an alternative source */
10389         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10390         if (!extent_buffer_uptodate(eb)) {
10391                 free_extent_buffer(eb);
10392                 return -EIO;
10393         }
10394         header_level = btrfs_header_level(eb);
10395         free_extent_buffer(eb);
10396
10397         if (header_level != backref_level)
10398                 return -EIO;
10399         return header_level;
10400
10401 release_out:
10402         btrfs_release_path(&path);
10403         return ret;
10404 }
10405
10406 /*
10407  * Check if a tree block backref is valid (points to a valid tree block)
10408  * if level == -1, level will be resolved
10409  * Return >0 for any error found and print error message
10410  */
10411 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10412                                     u64 bytenr, int level)
10413 {
10414         struct btrfs_root *root;
10415         struct btrfs_key key;
10416         struct btrfs_path path;
10417         struct extent_buffer *eb;
10418         struct extent_buffer *node;
10419         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10420         int err = 0;
10421         int ret;
10422
10423         /* Query level for level == -1 special case */
10424         if (level == -1)
10425                 level = query_tree_block_level(fs_info, bytenr);
10426         if (level < 0) {
10427                 err |= REFERENCER_MISSING;
10428                 goto out;
10429         }
10430
10431         key.objectid = root_id;
10432         key.type = BTRFS_ROOT_ITEM_KEY;
10433         key.offset = (u64)-1;
10434
10435         root = btrfs_read_fs_root(fs_info, &key);
10436         if (IS_ERR(root)) {
10437                 err |= REFERENCER_MISSING;
10438                 goto out;
10439         }
10440
10441         /* Read out the tree block to get item/node key */
10442         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10443         if (!extent_buffer_uptodate(eb)) {
10444                 err |= REFERENCER_MISSING;
10445                 free_extent_buffer(eb);
10446                 goto out;
10447         }
10448
10449         /* Empty tree, no need to check key */
10450         if (!btrfs_header_nritems(eb) && !level) {
10451                 free_extent_buffer(eb);
10452                 goto out;
10453         }
10454
10455         if (level)
10456                 btrfs_node_key_to_cpu(eb, &key, 0);
10457         else
10458                 btrfs_item_key_to_cpu(eb, &key, 0);
10459
10460         free_extent_buffer(eb);
10461
10462         btrfs_init_path(&path);
10463         path.lowest_level = level;
10464         /* Search with the first key, to ensure we can reach it */
10465         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10466         if (ret < 0) {
10467                 err |= REFERENCER_MISSING;
10468                 goto release_out;
10469         }
10470
10471         node = path.nodes[level];
10472         if (btrfs_header_bytenr(node) != bytenr) {
10473                 error(
10474         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10475                         bytenr, nodesize, bytenr,
10476                         btrfs_header_bytenr(node));
10477                 err |= REFERENCER_MISMATCH;
10478         }
10479         if (btrfs_header_level(node) != level) {
10480                 error(
10481         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10482                         bytenr, nodesize, level,
10483                         btrfs_header_level(node));
10484                 err |= REFERENCER_MISMATCH;
10485         }
10486
10487 release_out:
10488         btrfs_release_path(&path);
10489 out:
10490         if (err & REFERENCER_MISSING) {
10491                 if (level < 0)
10492                         error("extent [%llu %d] lost referencer (owner: %llu)",
10493                                 bytenr, nodesize, root_id);
10494                 else
10495                         error(
10496                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10497                                 bytenr, nodesize, root_id, level);
10498         }
10499
10500         return err;
10501 }
10502
10503 /*
10504  * Check if tree block @eb is tree reloc root.
10505  * Return 0 if it's not or any problem happens
10506  * Return 1 if it's a tree reloc root
10507  */
10508 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10509                                  struct extent_buffer *eb)
10510 {
10511         struct btrfs_root *tree_reloc_root;
10512         struct btrfs_key key;
10513         u64 bytenr = btrfs_header_bytenr(eb);
10514         u64 owner = btrfs_header_owner(eb);
10515         int ret = 0;
10516
10517         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10518         key.offset = owner;
10519         key.type = BTRFS_ROOT_ITEM_KEY;
10520
10521         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10522         if (IS_ERR(tree_reloc_root))
10523                 return 0;
10524
10525         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10526                 ret = 1;
10527         btrfs_free_fs_root(tree_reloc_root);
10528         return ret;
10529 }
10530
10531 /*
10532  * Check referencer for shared block backref
10533  * If level == -1, this function will resolve the level.
10534  */
10535 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10536                                      u64 parent, u64 bytenr, int level)
10537 {
10538         struct extent_buffer *eb;
10539         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10540         u32 nr;
10541         int found_parent = 0;
10542         int i;
10543
10544         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10545         if (!extent_buffer_uptodate(eb))
10546                 goto out;
10547
10548         if (level == -1)
10549                 level = query_tree_block_level(fs_info, bytenr);
10550         if (level < 0)
10551                 goto out;
10552
10553         /* It's possible it's a tree reloc root */
10554         if (parent == bytenr) {
10555                 if (is_tree_reloc_root(fs_info, eb))
10556                         found_parent = 1;
10557                 goto out;
10558         }
10559
10560         if (level + 1 != btrfs_header_level(eb))
10561                 goto out;
10562
10563         nr = btrfs_header_nritems(eb);
10564         for (i = 0; i < nr; i++) {
10565                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10566                         found_parent = 1;
10567                         break;
10568                 }
10569         }
10570 out:
10571         free_extent_buffer(eb);
10572         if (!found_parent) {
10573                 error(
10574         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10575                         bytenr, nodesize, parent, level);
10576                 return REFERENCER_MISSING;
10577         }
10578         return 0;
10579 }
10580
10581 /*
10582  * Check referencer for normal (inlined) data ref
10583  * If len == 0, it will be resolved by searching in extent tree
10584  */
10585 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10586                                      u64 root_id, u64 objectid, u64 offset,
10587                                      u64 bytenr, u64 len, u32 count)
10588 {
10589         struct btrfs_root *root;
10590         struct btrfs_root *extent_root = fs_info->extent_root;
10591         struct btrfs_key key;
10592         struct btrfs_path path;
10593         struct extent_buffer *leaf;
10594         struct btrfs_file_extent_item *fi;
10595         u32 found_count = 0;
10596         int slot;
10597         int ret = 0;
10598
10599         if (!len) {
10600                 key.objectid = bytenr;
10601                 key.type = BTRFS_EXTENT_ITEM_KEY;
10602                 key.offset = (u64)-1;
10603
10604                 btrfs_init_path(&path);
10605                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10606                 if (ret < 0)
10607                         goto out;
10608                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10609                 if (ret)
10610                         goto out;
10611                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10612                 if (key.objectid != bytenr ||
10613                     key.type != BTRFS_EXTENT_ITEM_KEY)
10614                         goto out;
10615                 len = key.offset;
10616                 btrfs_release_path(&path);
10617         }
10618         key.objectid = root_id;
10619         key.type = BTRFS_ROOT_ITEM_KEY;
10620         key.offset = (u64)-1;
10621         btrfs_init_path(&path);
10622
10623         root = btrfs_read_fs_root(fs_info, &key);
10624         if (IS_ERR(root))
10625                 goto out;
10626
10627         key.objectid = objectid;
10628         key.type = BTRFS_EXTENT_DATA_KEY;
10629         /*
10630          * It can be nasty as data backref offset is
10631          * file offset - file extent offset, which is smaller or
10632          * equal to original backref offset.  The only special case is
10633          * overflow.  So we need to special check and do further search.
10634          */
10635         key.offset = offset & (1ULL << 63) ? 0 : offset;
10636
10637         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10638         if (ret < 0)
10639                 goto out;
10640
10641         /*
10642          * Search afterwards to get correct one
10643          * NOTE: As we must do a comprehensive check on the data backref to
10644          * make sure the dref count also matches, we must iterate all file
10645          * extents for that inode.
10646          */
10647         while (1) {
10648                 leaf = path.nodes[0];
10649                 slot = path.slots[0];
10650
10651                 if (slot >= btrfs_header_nritems(leaf))
10652                         goto next;
10653                 btrfs_item_key_to_cpu(leaf, &key, slot);
10654                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10655                         break;
10656                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10657                 /*
10658                  * Except normal disk bytenr and disk num bytes, we still
10659                  * need to do extra check on dbackref offset as
10660                  * dbackref offset = file_offset - file_extent_offset
10661                  */
10662                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10663                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10664                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10665                     offset)
10666                         found_count++;
10667
10668 next:
10669                 ret = btrfs_next_item(root, &path);
10670                 if (ret)
10671                         break;
10672         }
10673 out:
10674         btrfs_release_path(&path);
10675         if (found_count != count) {
10676                 error(
10677 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10678                         bytenr, len, root_id, objectid, offset, count, found_count);
10679                 return REFERENCER_MISSING;
10680         }
10681         return 0;
10682 }
10683
10684 /*
10685  * Check if the referencer of a shared data backref exists
10686  */
10687 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10688                                      u64 parent, u64 bytenr)
10689 {
10690         struct extent_buffer *eb;
10691         struct btrfs_key key;
10692         struct btrfs_file_extent_item *fi;
10693         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10694         u32 nr;
10695         int found_parent = 0;
10696         int i;
10697
10698         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10699         if (!extent_buffer_uptodate(eb))
10700                 goto out;
10701
10702         nr = btrfs_header_nritems(eb);
10703         for (i = 0; i < nr; i++) {
10704                 btrfs_item_key_to_cpu(eb, &key, i);
10705                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10706                         continue;
10707
10708                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10709                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10710                         continue;
10711
10712                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10713                         found_parent = 1;
10714                         break;
10715                 }
10716         }
10717
10718 out:
10719         free_extent_buffer(eb);
10720         if (!found_parent) {
10721                 error("shared extent %llu referencer lost (parent: %llu)",
10722                         bytenr, parent);
10723                 return REFERENCER_MISSING;
10724         }
10725         return 0;
10726 }
10727
10728 /*
10729  * This function will check a given extent item, including its backref and
10730  * itself (like crossing stripe boundary and type)
10731  *
10732  * Since we don't use extent_record anymore, introduce new error bit
10733  */
10734 static int check_extent_item(struct btrfs_fs_info *fs_info,
10735                              struct extent_buffer *eb, int slot)
10736 {
10737         struct btrfs_extent_item *ei;
10738         struct btrfs_extent_inline_ref *iref;
10739         struct btrfs_extent_data_ref *dref;
10740         unsigned long end;
10741         unsigned long ptr;
10742         int type;
10743         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10744         u32 item_size = btrfs_item_size_nr(eb, slot);
10745         u64 flags;
10746         u64 offset;
10747         int metadata = 0;
10748         int level;
10749         struct btrfs_key key;
10750         int ret;
10751         int err = 0;
10752
10753         btrfs_item_key_to_cpu(eb, &key, slot);
10754         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10755                 bytes_used += key.offset;
10756         else
10757                 bytes_used += nodesize;
10758
10759         if (item_size < sizeof(*ei)) {
10760                 /*
10761                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10762                  * old thing when on disk format is still un-determined.
10763                  * No need to care about it anymore
10764                  */
10765                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10766                 return -ENOTTY;
10767         }
10768
10769         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10770         flags = btrfs_extent_flags(eb, ei);
10771
10772         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10773                 metadata = 1;
10774         if (metadata && check_crossing_stripes(global_info, key.objectid,
10775                                                eb->len)) {
10776                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10777                       key.objectid, key.objectid + nodesize);
10778                 err |= CROSSING_STRIPE_BOUNDARY;
10779         }
10780
10781         ptr = (unsigned long)(ei + 1);
10782
10783         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10784                 /* Old EXTENT_ITEM metadata */
10785                 struct btrfs_tree_block_info *info;
10786
10787                 info = (struct btrfs_tree_block_info *)ptr;
10788                 level = btrfs_tree_block_level(eb, info);
10789                 ptr += sizeof(struct btrfs_tree_block_info);
10790         } else {
10791                 /* New METADATA_ITEM */
10792                 level = key.offset;
10793         }
10794         end = (unsigned long)ei + item_size;
10795
10796 next:
10797         /* Reached extent item end normally */
10798         if (ptr == end)
10799                 goto out;
10800
10801         /* Beyond extent item end, wrong item size */
10802         if (ptr > end) {
10803                 err |= ITEM_SIZE_MISMATCH;
10804                 error("extent item at bytenr %llu slot %d has wrong size",
10805                         eb->start, slot);
10806                 goto out;
10807         }
10808
10809         /* Now check every backref in this extent item */
10810         iref = (struct btrfs_extent_inline_ref *)ptr;
10811         type = btrfs_extent_inline_ref_type(eb, iref);
10812         offset = btrfs_extent_inline_ref_offset(eb, iref);
10813         switch (type) {
10814         case BTRFS_TREE_BLOCK_REF_KEY:
10815                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10816                                                level);
10817                 err |= ret;
10818                 break;
10819         case BTRFS_SHARED_BLOCK_REF_KEY:
10820                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10821                                                  level);
10822                 err |= ret;
10823                 break;
10824         case BTRFS_EXTENT_DATA_REF_KEY:
10825                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10826                 ret = check_extent_data_backref(fs_info,
10827                                 btrfs_extent_data_ref_root(eb, dref),
10828                                 btrfs_extent_data_ref_objectid(eb, dref),
10829                                 btrfs_extent_data_ref_offset(eb, dref),
10830                                 key.objectid, key.offset,
10831                                 btrfs_extent_data_ref_count(eb, dref));
10832                 err |= ret;
10833                 break;
10834         case BTRFS_SHARED_DATA_REF_KEY:
10835                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10836                 err |= ret;
10837                 break;
10838         default:
10839                 error("extent[%llu %d %llu] has unknown ref type: %d",
10840                         key.objectid, key.type, key.offset, type);
10841                 err |= UNKNOWN_TYPE;
10842                 goto out;
10843         }
10844
10845         ptr += btrfs_extent_inline_ref_size(type);
10846         goto next;
10847
10848 out:
10849         return err;
10850 }
10851
10852 /*
10853  * Check if a dev extent item is referred correctly by its chunk
10854  */
10855 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10856                                  struct extent_buffer *eb, int slot)
10857 {
10858         struct btrfs_root *chunk_root = fs_info->chunk_root;
10859         struct btrfs_dev_extent *ptr;
10860         struct btrfs_path path;
10861         struct btrfs_key chunk_key;
10862         struct btrfs_key devext_key;
10863         struct btrfs_chunk *chunk;
10864         struct extent_buffer *l;
10865         int num_stripes;
10866         u64 length;
10867         int i;
10868         int found_chunk = 0;
10869         int ret;
10870
10871         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10872         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10873         length = btrfs_dev_extent_length(eb, ptr);
10874
10875         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10876         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10877         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10878
10879         btrfs_init_path(&path);
10880         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10881         if (ret)
10882                 goto out;
10883
10884         l = path.nodes[0];
10885         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10886         if (btrfs_chunk_length(l, chunk) != length)
10887                 goto out;
10888
10889         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10890         for (i = 0; i < num_stripes; i++) {
10891                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10892                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10893
10894                 if (devid == devext_key.objectid &&
10895                     offset == devext_key.offset) {
10896                         found_chunk = 1;
10897                         break;
10898                 }
10899         }
10900 out:
10901         btrfs_release_path(&path);
10902         if (!found_chunk) {
10903                 error(
10904                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10905                         devext_key.objectid, devext_key.offset, length);
10906                 return REFERENCER_MISSING;
10907         }
10908         return 0;
10909 }
10910
10911 /*
10912  * Check if the used space is correct with the dev item
10913  */
10914 static int check_dev_item(struct btrfs_fs_info *fs_info,
10915                           struct extent_buffer *eb, int slot)
10916 {
10917         struct btrfs_root *dev_root = fs_info->dev_root;
10918         struct btrfs_dev_item *dev_item;
10919         struct btrfs_path path;
10920         struct btrfs_key key;
10921         struct btrfs_dev_extent *ptr;
10922         u64 dev_id;
10923         u64 used;
10924         u64 total = 0;
10925         int ret;
10926
10927         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10928         dev_id = btrfs_device_id(eb, dev_item);
10929         used = btrfs_device_bytes_used(eb, dev_item);
10930
10931         key.objectid = dev_id;
10932         key.type = BTRFS_DEV_EXTENT_KEY;
10933         key.offset = 0;
10934
10935         btrfs_init_path(&path);
10936         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10937         if (ret < 0) {
10938                 btrfs_item_key_to_cpu(eb, &key, slot);
10939                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10940                         key.objectid, key.type, key.offset);
10941                 btrfs_release_path(&path);
10942                 return REFERENCER_MISSING;
10943         }
10944
10945         /* Iterate dev_extents to calculate the used space of a device */
10946         while (1) {
10947                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10948                         goto next;
10949
10950                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10951                 if (key.objectid > dev_id)
10952                         break;
10953                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10954                         goto next;
10955
10956                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10957                                      struct btrfs_dev_extent);
10958                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10959 next:
10960                 ret = btrfs_next_item(dev_root, &path);
10961                 if (ret)
10962                         break;
10963         }
10964         btrfs_release_path(&path);
10965
10966         if (used != total) {
10967                 btrfs_item_key_to_cpu(eb, &key, slot);
10968                 error(
10969 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10970                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10971                         BTRFS_DEV_EXTENT_KEY, dev_id);
10972                 return ACCOUNTING_MISMATCH;
10973         }
10974         return 0;
10975 }
10976
10977 /*
10978  * Check a block group item with its referener (chunk) and its used space
10979  * with extent/metadata item
10980  */
10981 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10982                                   struct extent_buffer *eb, int slot)
10983 {
10984         struct btrfs_root *extent_root = fs_info->extent_root;
10985         struct btrfs_root *chunk_root = fs_info->chunk_root;
10986         struct btrfs_block_group_item *bi;
10987         struct btrfs_block_group_item bg_item;
10988         struct btrfs_path path;
10989         struct btrfs_key bg_key;
10990         struct btrfs_key chunk_key;
10991         struct btrfs_key extent_key;
10992         struct btrfs_chunk *chunk;
10993         struct extent_buffer *leaf;
10994         struct btrfs_extent_item *ei;
10995         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10996         u64 flags;
10997         u64 bg_flags;
10998         u64 used;
10999         u64 total = 0;
11000         int ret;
11001         int err = 0;
11002
11003         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11004         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11005         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11006         used = btrfs_block_group_used(&bg_item);
11007         bg_flags = btrfs_block_group_flags(&bg_item);
11008
11009         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11010         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11011         chunk_key.offset = bg_key.objectid;
11012
11013         btrfs_init_path(&path);
11014         /* Search for the referencer chunk */
11015         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11016         if (ret) {
11017                 error(
11018                 "block group[%llu %llu] did not find the related chunk item",
11019                         bg_key.objectid, bg_key.offset);
11020                 err |= REFERENCER_MISSING;
11021         } else {
11022                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11023                                         struct btrfs_chunk);
11024                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11025                                                 bg_key.offset) {
11026                         error(
11027         "block group[%llu %llu] related chunk item length does not match",
11028                                 bg_key.objectid, bg_key.offset);
11029                         err |= REFERENCER_MISMATCH;
11030                 }
11031         }
11032         btrfs_release_path(&path);
11033
11034         /* Search from the block group bytenr */
11035         extent_key.objectid = bg_key.objectid;
11036         extent_key.type = 0;
11037         extent_key.offset = 0;
11038
11039         btrfs_init_path(&path);
11040         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11041         if (ret < 0)
11042                 goto out;
11043
11044         /* Iterate extent tree to account used space */
11045         while (1) {
11046                 leaf = path.nodes[0];
11047
11048                 /* Search slot can point to the last item beyond leaf nritems */
11049                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11050                         goto next;
11051
11052                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11053                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11054                         break;
11055
11056                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11057                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11058                         goto next;
11059                 if (extent_key.objectid < bg_key.objectid)
11060                         goto next;
11061
11062                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11063                         total += nodesize;
11064                 else
11065                         total += extent_key.offset;
11066
11067                 ei = btrfs_item_ptr(leaf, path.slots[0],
11068                                     struct btrfs_extent_item);
11069                 flags = btrfs_extent_flags(leaf, ei);
11070                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11071                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11072                                 error(
11073                         "bad extent[%llu, %llu) type mismatch with chunk",
11074                                         extent_key.objectid,
11075                                         extent_key.objectid + extent_key.offset);
11076                                 err |= CHUNK_TYPE_MISMATCH;
11077                         }
11078                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11079                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11080                                     BTRFS_BLOCK_GROUP_METADATA))) {
11081                                 error(
11082                         "bad extent[%llu, %llu) type mismatch with chunk",
11083                                         extent_key.objectid,
11084                                         extent_key.objectid + nodesize);
11085                                 err |= CHUNK_TYPE_MISMATCH;
11086                         }
11087                 }
11088 next:
11089                 ret = btrfs_next_item(extent_root, &path);
11090                 if (ret)
11091                         break;
11092         }
11093
11094 out:
11095         btrfs_release_path(&path);
11096
11097         if (total != used) {
11098                 error(
11099                 "block group[%llu %llu] used %llu but extent items used %llu",
11100                         bg_key.objectid, bg_key.offset, used, total);
11101                 err |= ACCOUNTING_MISMATCH;
11102         }
11103         return err;
11104 }
11105
11106 /*
11107  * Check a chunk item.
11108  * Including checking all referred dev_extents and block group
11109  */
11110 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11111                             struct extent_buffer *eb, int slot)
11112 {
11113         struct btrfs_root *extent_root = fs_info->extent_root;
11114         struct btrfs_root *dev_root = fs_info->dev_root;
11115         struct btrfs_path path;
11116         struct btrfs_key chunk_key;
11117         struct btrfs_key bg_key;
11118         struct btrfs_key devext_key;
11119         struct btrfs_chunk *chunk;
11120         struct extent_buffer *leaf;
11121         struct btrfs_block_group_item *bi;
11122         struct btrfs_block_group_item bg_item;
11123         struct btrfs_dev_extent *ptr;
11124         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11125         u64 length;
11126         u64 chunk_end;
11127         u64 type;
11128         u64 profile;
11129         int num_stripes;
11130         u64 offset;
11131         u64 objectid;
11132         int i;
11133         int ret;
11134         int err = 0;
11135
11136         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11137         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11138         length = btrfs_chunk_length(eb, chunk);
11139         chunk_end = chunk_key.offset + length;
11140         if (!IS_ALIGNED(length, sectorsize)) {
11141                 error("chunk[%llu %llu) not aligned to %u",
11142                         chunk_key.offset, chunk_end, sectorsize);
11143                 err |= BYTES_UNALIGNED;
11144                 goto out;
11145         }
11146
11147         type = btrfs_chunk_type(eb, chunk);
11148         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11149         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11150                 error("chunk[%llu %llu) has no chunk type",
11151                         chunk_key.offset, chunk_end);
11152                 err |= UNKNOWN_TYPE;
11153         }
11154         if (profile && (profile & (profile - 1))) {
11155                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11156                         chunk_key.offset, chunk_end, profile);
11157                 err |= UNKNOWN_TYPE;
11158         }
11159
11160         bg_key.objectid = chunk_key.offset;
11161         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11162         bg_key.offset = length;
11163
11164         btrfs_init_path(&path);
11165         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11166         if (ret) {
11167                 error(
11168                 "chunk[%llu %llu) did not find the related block group item",
11169                         chunk_key.offset, chunk_end);
11170                 err |= REFERENCER_MISSING;
11171         } else{
11172                 leaf = path.nodes[0];
11173                 bi = btrfs_item_ptr(leaf, path.slots[0],
11174                                     struct btrfs_block_group_item);
11175                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11176                                    sizeof(bg_item));
11177                 if (btrfs_block_group_flags(&bg_item) != type) {
11178                         error(
11179 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11180                                 chunk_key.offset, chunk_end, type,
11181                                 btrfs_block_group_flags(&bg_item));
11182                         err |= REFERENCER_MISSING;
11183                 }
11184         }
11185
11186         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11187         for (i = 0; i < num_stripes; i++) {
11188                 btrfs_release_path(&path);
11189                 btrfs_init_path(&path);
11190                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11191                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11192                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11193
11194                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11195                                         0, 0);
11196                 if (ret)
11197                         goto not_match_dev;
11198
11199                 leaf = path.nodes[0];
11200                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11201                                      struct btrfs_dev_extent);
11202                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11203                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11204                 if (objectid != chunk_key.objectid ||
11205                     offset != chunk_key.offset ||
11206                     btrfs_dev_extent_length(leaf, ptr) != length)
11207                         goto not_match_dev;
11208                 continue;
11209 not_match_dev:
11210                 err |= BACKREF_MISSING;
11211                 error(
11212                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11213                         chunk_key.objectid, chunk_end, i);
11214                 continue;
11215         }
11216         btrfs_release_path(&path);
11217 out:
11218         return err;
11219 }
11220
11221 /*
11222  * Main entry function to check known items and update related accounting info
11223  */
11224 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11225 {
11226         struct btrfs_fs_info *fs_info = root->fs_info;
11227         struct btrfs_key key;
11228         int slot = 0;
11229         int type;
11230         struct btrfs_extent_data_ref *dref;
11231         int ret;
11232         int err = 0;
11233
11234 next:
11235         btrfs_item_key_to_cpu(eb, &key, slot);
11236         type = key.type;
11237
11238         switch (type) {
11239         case BTRFS_EXTENT_DATA_KEY:
11240                 ret = check_extent_data_item(root, eb, slot);
11241                 err |= ret;
11242                 break;
11243         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11244                 ret = check_block_group_item(fs_info, eb, slot);
11245                 err |= ret;
11246                 break;
11247         case BTRFS_DEV_ITEM_KEY:
11248                 ret = check_dev_item(fs_info, eb, slot);
11249                 err |= ret;
11250                 break;
11251         case BTRFS_CHUNK_ITEM_KEY:
11252                 ret = check_chunk_item(fs_info, eb, slot);
11253                 err |= ret;
11254                 break;
11255         case BTRFS_DEV_EXTENT_KEY:
11256                 ret = check_dev_extent_item(fs_info, eb, slot);
11257                 err |= ret;
11258                 break;
11259         case BTRFS_EXTENT_ITEM_KEY:
11260         case BTRFS_METADATA_ITEM_KEY:
11261                 ret = check_extent_item(fs_info, eb, slot);
11262                 err |= ret;
11263                 break;
11264         case BTRFS_EXTENT_CSUM_KEY:
11265                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11266                 break;
11267         case BTRFS_TREE_BLOCK_REF_KEY:
11268                 ret = check_tree_block_backref(fs_info, key.offset,
11269                                                key.objectid, -1);
11270                 err |= ret;
11271                 break;
11272         case BTRFS_EXTENT_DATA_REF_KEY:
11273                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11274                 ret = check_extent_data_backref(fs_info,
11275                                 btrfs_extent_data_ref_root(eb, dref),
11276                                 btrfs_extent_data_ref_objectid(eb, dref),
11277                                 btrfs_extent_data_ref_offset(eb, dref),
11278                                 key.objectid, 0,
11279                                 btrfs_extent_data_ref_count(eb, dref));
11280                 err |= ret;
11281                 break;
11282         case BTRFS_SHARED_BLOCK_REF_KEY:
11283                 ret = check_shared_block_backref(fs_info, key.offset,
11284                                                  key.objectid, -1);
11285                 err |= ret;
11286                 break;
11287         case BTRFS_SHARED_DATA_REF_KEY:
11288                 ret = check_shared_data_backref(fs_info, key.offset,
11289                                                 key.objectid);
11290                 err |= ret;
11291                 break;
11292         default:
11293                 break;
11294         }
11295
11296         if (++slot < btrfs_header_nritems(eb))
11297                 goto next;
11298
11299         return err;
11300 }
11301
11302 /*
11303  * Helper function for later fs/subvol tree check.  To determine if a tree
11304  * block should be checked.
11305  * This function will ensure only the direct referencer with lowest rootid to
11306  * check a fs/subvolume tree block.
11307  *
11308  * Backref check at extent tree would detect errors like missing subvolume
11309  * tree, so we can do aggressive check to reduce duplicated checks.
11310  */
11311 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11312 {
11313         struct btrfs_root *extent_root = root->fs_info->extent_root;
11314         struct btrfs_key key;
11315         struct btrfs_path path;
11316         struct extent_buffer *leaf;
11317         int slot;
11318         struct btrfs_extent_item *ei;
11319         unsigned long ptr;
11320         unsigned long end;
11321         int type;
11322         u32 item_size;
11323         u64 offset;
11324         struct btrfs_extent_inline_ref *iref;
11325         int ret;
11326
11327         btrfs_init_path(&path);
11328         key.objectid = btrfs_header_bytenr(eb);
11329         key.type = BTRFS_METADATA_ITEM_KEY;
11330         key.offset = (u64)-1;
11331
11332         /*
11333          * Any failure in backref resolving means we can't determine
11334          * whom the tree block belongs to.
11335          * So in that case, we need to check that tree block
11336          */
11337         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11338         if (ret < 0)
11339                 goto need_check;
11340
11341         ret = btrfs_previous_extent_item(extent_root, &path,
11342                                          btrfs_header_bytenr(eb));
11343         if (ret)
11344                 goto need_check;
11345
11346         leaf = path.nodes[0];
11347         slot = path.slots[0];
11348         btrfs_item_key_to_cpu(leaf, &key, slot);
11349         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11350
11351         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11352                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11353         } else {
11354                 struct btrfs_tree_block_info *info;
11355
11356                 info = (struct btrfs_tree_block_info *)(ei + 1);
11357                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11358         }
11359
11360         item_size = btrfs_item_size_nr(leaf, slot);
11361         ptr = (unsigned long)iref;
11362         end = (unsigned long)ei + item_size;
11363         while (ptr < end) {
11364                 iref = (struct btrfs_extent_inline_ref *)ptr;
11365                 type = btrfs_extent_inline_ref_type(leaf, iref);
11366                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11367
11368                 /*
11369                  * We only check the tree block if current root is
11370                  * the lowest referencer of it.
11371                  */
11372                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11373                     offset < root->objectid) {
11374                         btrfs_release_path(&path);
11375                         return 0;
11376                 }
11377
11378                 ptr += btrfs_extent_inline_ref_size(type);
11379         }
11380         /*
11381          * Normally we should also check keyed tree block ref, but that may be
11382          * very time consuming.  Inlined ref should already make us skip a lot
11383          * of refs now.  So skip search keyed tree block ref.
11384          */
11385
11386 need_check:
11387         btrfs_release_path(&path);
11388         return 1;
11389 }
11390
11391 /*
11392  * Traversal function for tree block. We will do:
11393  * 1) Skip shared fs/subvolume tree blocks
11394  * 2) Update related bytes accounting
11395  * 3) Pre-order traversal
11396  */
11397 static int traverse_tree_block(struct btrfs_root *root,
11398                                 struct extent_buffer *node)
11399 {
11400         struct extent_buffer *eb;
11401         struct btrfs_key key;
11402         struct btrfs_key drop_key;
11403         int level;
11404         u64 nr;
11405         int i;
11406         int err = 0;
11407         int ret;
11408
11409         /*
11410          * Skip shared fs/subvolume tree block, in that case they will
11411          * be checked by referencer with lowest rootid
11412          */
11413         if (is_fstree(root->objectid) && !should_check(root, node))
11414                 return 0;
11415
11416         /* Update bytes accounting */
11417         total_btree_bytes += node->len;
11418         if (fs_root_objectid(btrfs_header_owner(node)))
11419                 total_fs_tree_bytes += node->len;
11420         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11421                 total_extent_tree_bytes += node->len;
11422         if (!found_old_backref &&
11423             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11424             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11425             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11426                 found_old_backref = 1;
11427
11428         /* pre-order tranversal, check itself first */
11429         level = btrfs_header_level(node);
11430         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11431                                    btrfs_header_level(node),
11432                                    btrfs_header_owner(node));
11433         err |= ret;
11434         if (err)
11435                 error(
11436         "check %s failed root %llu bytenr %llu level %d, force continue check",
11437                         level ? "node":"leaf", root->objectid,
11438                         btrfs_header_bytenr(node), btrfs_header_level(node));
11439
11440         if (!level) {
11441                 btree_space_waste += btrfs_leaf_free_space(root, node);
11442                 ret = check_leaf_items(root, node);
11443                 err |= ret;
11444                 return err;
11445         }
11446
11447         nr = btrfs_header_nritems(node);
11448         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11449         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11450                 sizeof(struct btrfs_key_ptr);
11451
11452         /* Then check all its children */
11453         for (i = 0; i < nr; i++) {
11454                 u64 blocknr = btrfs_node_blockptr(node, i);
11455
11456                 btrfs_node_key_to_cpu(node, &key, i);
11457                 if (level == root->root_item.drop_level &&
11458                     is_dropped_key(&key, &drop_key))
11459                         continue;
11460
11461                 /*
11462                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11463                  * to call the function itself.
11464                  */
11465                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11466                 if (extent_buffer_uptodate(eb)) {
11467                         ret = traverse_tree_block(root, eb);
11468                         err |= ret;
11469                 }
11470                 free_extent_buffer(eb);
11471         }
11472
11473         return err;
11474 }
11475
11476 /*
11477  * Low memory usage version check_chunks_and_extents.
11478  */
11479 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11480 {
11481         struct btrfs_path path;
11482         struct btrfs_key key;
11483         struct btrfs_root *root1;
11484         struct btrfs_root *cur_root;
11485         int err = 0;
11486         int ret;
11487
11488         root1 = root->fs_info->chunk_root;
11489         ret = traverse_tree_block(root1, root1->node);
11490         err |= ret;
11491
11492         root1 = root->fs_info->tree_root;
11493         ret = traverse_tree_block(root1, root1->node);
11494         err |= ret;
11495
11496         btrfs_init_path(&path);
11497         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11498         key.offset = 0;
11499         key.type = BTRFS_ROOT_ITEM_KEY;
11500
11501         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11502         if (ret) {
11503                 error("cannot find extent treet in tree_root");
11504                 goto out;
11505         }
11506
11507         while (1) {
11508                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11509                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11510                         goto next;
11511                 key.offset = (u64)-1;
11512
11513                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11514                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11515                                         &key);
11516                 else
11517                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11518                 if (IS_ERR(cur_root) || !cur_root) {
11519                         error("failed to read tree: %lld", key.objectid);
11520                         goto next;
11521                 }
11522
11523                 ret = traverse_tree_block(cur_root, cur_root->node);
11524                 err |= ret;
11525
11526                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11527                         btrfs_free_fs_root(cur_root);
11528 next:
11529                 ret = btrfs_next_item(root1, &path);
11530                 if (ret)
11531                         goto out;
11532         }
11533
11534 out:
11535         btrfs_release_path(&path);
11536         return err;
11537 }
11538
11539 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11540                            struct btrfs_root *root, int overwrite)
11541 {
11542         struct extent_buffer *c;
11543         struct extent_buffer *old = root->node;
11544         int level;
11545         int ret;
11546         struct btrfs_disk_key disk_key = {0,0,0};
11547
11548         level = 0;
11549
11550         if (overwrite) {
11551                 c = old;
11552                 extent_buffer_get(c);
11553                 goto init;
11554         }
11555         c = btrfs_alloc_free_block(trans, root,
11556                                    root->nodesize,
11557                                    root->root_key.objectid,
11558                                    &disk_key, level, 0, 0);
11559         if (IS_ERR(c)) {
11560                 c = old;
11561                 extent_buffer_get(c);
11562                 overwrite = 1;
11563         }
11564 init:
11565         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11566         btrfs_set_header_level(c, level);
11567         btrfs_set_header_bytenr(c, c->start);
11568         btrfs_set_header_generation(c, trans->transid);
11569         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11570         btrfs_set_header_owner(c, root->root_key.objectid);
11571
11572         write_extent_buffer(c, root->fs_info->fsid,
11573                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11574
11575         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11576                             btrfs_header_chunk_tree_uuid(c),
11577                             BTRFS_UUID_SIZE);
11578
11579         btrfs_mark_buffer_dirty(c);
11580         /*
11581          * this case can happen in the following case:
11582          *
11583          * 1.overwrite previous root.
11584          *
11585          * 2.reinit reloc data root, this is because we skip pin
11586          * down reloc data tree before which means we can allocate
11587          * same block bytenr here.
11588          */
11589         if (old->start == c->start) {
11590                 btrfs_set_root_generation(&root->root_item,
11591                                           trans->transid);
11592                 root->root_item.level = btrfs_header_level(root->node);
11593                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11594                                         &root->root_key, &root->root_item);
11595                 if (ret) {
11596                         free_extent_buffer(c);
11597                         return ret;
11598                 }
11599         }
11600         free_extent_buffer(old);
11601         root->node = c;
11602         add_root_to_dirty_list(root);
11603         return 0;
11604 }
11605
11606 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11607                                 struct extent_buffer *eb, int tree_root)
11608 {
11609         struct extent_buffer *tmp;
11610         struct btrfs_root_item *ri;
11611         struct btrfs_key key;
11612         u64 bytenr;
11613         u32 nodesize;
11614         int level = btrfs_header_level(eb);
11615         int nritems;
11616         int ret;
11617         int i;
11618
11619         /*
11620          * If we have pinned this block before, don't pin it again.
11621          * This can not only avoid forever loop with broken filesystem
11622          * but also give us some speedups.
11623          */
11624         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11625                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11626                 return 0;
11627
11628         btrfs_pin_extent(fs_info, eb->start, eb->len);
11629
11630         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11631         nritems = btrfs_header_nritems(eb);
11632         for (i = 0; i < nritems; i++) {
11633                 if (level == 0) {
11634                         btrfs_item_key_to_cpu(eb, &key, i);
11635                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11636                                 continue;
11637                         /* Skip the extent root and reloc roots */
11638                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11639                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11640                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11641                                 continue;
11642                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11643                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11644
11645                         /*
11646                          * If at any point we start needing the real root we
11647                          * will have to build a stump root for the root we are
11648                          * in, but for now this doesn't actually use the root so
11649                          * just pass in extent_root.
11650                          */
11651                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11652                                               nodesize, 0);
11653                         if (!extent_buffer_uptodate(tmp)) {
11654                                 fprintf(stderr, "Error reading root block\n");
11655                                 return -EIO;
11656                         }
11657                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11658                         free_extent_buffer(tmp);
11659                         if (ret)
11660                                 return ret;
11661                 } else {
11662                         bytenr = btrfs_node_blockptr(eb, i);
11663
11664                         /* If we aren't the tree root don't read the block */
11665                         if (level == 1 && !tree_root) {
11666                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11667                                 continue;
11668                         }
11669
11670                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11671                                               nodesize, 0);
11672                         if (!extent_buffer_uptodate(tmp)) {
11673                                 fprintf(stderr, "Error reading tree block\n");
11674                                 return -EIO;
11675                         }
11676                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11677                         free_extent_buffer(tmp);
11678                         if (ret)
11679                                 return ret;
11680                 }
11681         }
11682
11683         return 0;
11684 }
11685
11686 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11687 {
11688         int ret;
11689
11690         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11691         if (ret)
11692                 return ret;
11693
11694         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11695 }
11696
11697 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11698 {
11699         struct btrfs_block_group_cache *cache;
11700         struct btrfs_path path;
11701         struct extent_buffer *leaf;
11702         struct btrfs_chunk *chunk;
11703         struct btrfs_key key;
11704         int ret;
11705         u64 start;
11706
11707         btrfs_init_path(&path);
11708         key.objectid = 0;
11709         key.type = BTRFS_CHUNK_ITEM_KEY;
11710         key.offset = 0;
11711         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11712         if (ret < 0) {
11713                 btrfs_release_path(&path);
11714                 return ret;
11715         }
11716
11717         /*
11718          * We do this in case the block groups were screwed up and had alloc
11719          * bits that aren't actually set on the chunks.  This happens with
11720          * restored images every time and could happen in real life I guess.
11721          */
11722         fs_info->avail_data_alloc_bits = 0;
11723         fs_info->avail_metadata_alloc_bits = 0;
11724         fs_info->avail_system_alloc_bits = 0;
11725
11726         /* First we need to create the in-memory block groups */
11727         while (1) {
11728                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11729                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11730                         if (ret < 0) {
11731                                 btrfs_release_path(&path);
11732                                 return ret;
11733                         }
11734                         if (ret) {
11735                                 ret = 0;
11736                                 break;
11737                         }
11738                 }
11739                 leaf = path.nodes[0];
11740                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11741                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11742                         path.slots[0]++;
11743                         continue;
11744                 }
11745
11746                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11747                 btrfs_add_block_group(fs_info, 0,
11748                                       btrfs_chunk_type(leaf, chunk),
11749                                       key.objectid, key.offset,
11750                                       btrfs_chunk_length(leaf, chunk));
11751                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11752                                  key.offset + btrfs_chunk_length(leaf, chunk));
11753                 path.slots[0]++;
11754         }
11755         start = 0;
11756         while (1) {
11757                 cache = btrfs_lookup_first_block_group(fs_info, start);
11758                 if (!cache)
11759                         break;
11760                 cache->cached = 1;
11761                 start = cache->key.objectid + cache->key.offset;
11762         }
11763
11764         btrfs_release_path(&path);
11765         return 0;
11766 }
11767
11768 static int reset_balance(struct btrfs_trans_handle *trans,
11769                          struct btrfs_fs_info *fs_info)
11770 {
11771         struct btrfs_root *root = fs_info->tree_root;
11772         struct btrfs_path path;
11773         struct extent_buffer *leaf;
11774         struct btrfs_key key;
11775         int del_slot, del_nr = 0;
11776         int ret;
11777         int found = 0;
11778
11779         btrfs_init_path(&path);
11780         key.objectid = BTRFS_BALANCE_OBJECTID;
11781         key.type = BTRFS_BALANCE_ITEM_KEY;
11782         key.offset = 0;
11783         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11784         if (ret) {
11785                 if (ret > 0)
11786                         ret = 0;
11787                 if (!ret)
11788                         goto reinit_data_reloc;
11789                 else
11790                         goto out;
11791         }
11792
11793         ret = btrfs_del_item(trans, root, &path);
11794         if (ret)
11795                 goto out;
11796         btrfs_release_path(&path);
11797
11798         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11799         key.type = BTRFS_ROOT_ITEM_KEY;
11800         key.offset = 0;
11801         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11802         if (ret < 0)
11803                 goto out;
11804         while (1) {
11805                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11806                         if (!found)
11807                                 break;
11808
11809                         if (del_nr) {
11810                                 ret = btrfs_del_items(trans, root, &path,
11811                                                       del_slot, del_nr);
11812                                 del_nr = 0;
11813                                 if (ret)
11814                                         goto out;
11815                         }
11816                         key.offset++;
11817                         btrfs_release_path(&path);
11818
11819                         found = 0;
11820                         ret = btrfs_search_slot(trans, root, &key, &path,
11821                                                 -1, 1);
11822                         if (ret < 0)
11823                                 goto out;
11824                         continue;
11825                 }
11826                 found = 1;
11827                 leaf = path.nodes[0];
11828                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11829                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11830                         break;
11831                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11832                         path.slots[0]++;
11833                         continue;
11834                 }
11835                 if (!del_nr) {
11836                         del_slot = path.slots[0];
11837                         del_nr = 1;
11838                 } else {
11839                         del_nr++;
11840                 }
11841                 path.slots[0]++;
11842         }
11843
11844         if (del_nr) {
11845                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11846                 if (ret)
11847                         goto out;
11848         }
11849         btrfs_release_path(&path);
11850
11851 reinit_data_reloc:
11852         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11853         key.type = BTRFS_ROOT_ITEM_KEY;
11854         key.offset = (u64)-1;
11855         root = btrfs_read_fs_root(fs_info, &key);
11856         if (IS_ERR(root)) {
11857                 fprintf(stderr, "Error reading data reloc tree\n");
11858                 ret = PTR_ERR(root);
11859                 goto out;
11860         }
11861         record_root_in_trans(trans, root);
11862         ret = btrfs_fsck_reinit_root(trans, root, 0);
11863         if (ret)
11864                 goto out;
11865         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11866 out:
11867         btrfs_release_path(&path);
11868         return ret;
11869 }
11870
11871 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11872                               struct btrfs_fs_info *fs_info)
11873 {
11874         u64 start = 0;
11875         int ret;
11876
11877         /*
11878          * The only reason we don't do this is because right now we're just
11879          * walking the trees we find and pinning down their bytes, we don't look
11880          * at any of the leaves.  In order to do mixed groups we'd have to check
11881          * the leaves of any fs roots and pin down the bytes for any file
11882          * extents we find.  Not hard but why do it if we don't have to?
11883          */
11884         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11885                 fprintf(stderr, "We don't support re-initing the extent tree "
11886                         "for mixed block groups yet, please notify a btrfs "
11887                         "developer you want to do this so they can add this "
11888                         "functionality.\n");
11889                 return -EINVAL;
11890         }
11891
11892         /*
11893          * first we need to walk all of the trees except the extent tree and pin
11894          * down the bytes that are in use so we don't overwrite any existing
11895          * metadata.
11896          */
11897         ret = pin_metadata_blocks(fs_info);
11898         if (ret) {
11899                 fprintf(stderr, "error pinning down used bytes\n");
11900                 return ret;
11901         }
11902
11903         /*
11904          * Need to drop all the block groups since we're going to recreate all
11905          * of them again.
11906          */
11907         btrfs_free_block_groups(fs_info);
11908         ret = reset_block_groups(fs_info);
11909         if (ret) {
11910                 fprintf(stderr, "error resetting the block groups\n");
11911                 return ret;
11912         }
11913
11914         /* Ok we can allocate now, reinit the extent root */
11915         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11916         if (ret) {
11917                 fprintf(stderr, "extent root initialization failed\n");
11918                 /*
11919                  * When the transaction code is updated we should end the
11920                  * transaction, but for now progs only knows about commit so
11921                  * just return an error.
11922                  */
11923                 return ret;
11924         }
11925
11926         /*
11927          * Now we have all the in-memory block groups setup so we can make
11928          * allocations properly, and the metadata we care about is safe since we
11929          * pinned all of it above.
11930          */
11931         while (1) {
11932                 struct btrfs_block_group_cache *cache;
11933
11934                 cache = btrfs_lookup_first_block_group(fs_info, start);
11935                 if (!cache)
11936                         break;
11937                 start = cache->key.objectid + cache->key.offset;
11938                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11939                                         &cache->key, &cache->item,
11940                                         sizeof(cache->item));
11941                 if (ret) {
11942                         fprintf(stderr, "Error adding block group\n");
11943                         return ret;
11944                 }
11945                 btrfs_extent_post_op(trans, fs_info->extent_root);
11946         }
11947
11948         ret = reset_balance(trans, fs_info);
11949         if (ret)
11950                 fprintf(stderr, "error resetting the pending balance\n");
11951
11952         return ret;
11953 }
11954
11955 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11956 {
11957         struct btrfs_path path;
11958         struct btrfs_trans_handle *trans;
11959         struct btrfs_key key;
11960         int ret;
11961
11962         printf("Recowing metadata block %llu\n", eb->start);
11963         key.objectid = btrfs_header_owner(eb);
11964         key.type = BTRFS_ROOT_ITEM_KEY;
11965         key.offset = (u64)-1;
11966
11967         root = btrfs_read_fs_root(root->fs_info, &key);
11968         if (IS_ERR(root)) {
11969                 fprintf(stderr, "Couldn't find owner root %llu\n",
11970                         key.objectid);
11971                 return PTR_ERR(root);
11972         }
11973
11974         trans = btrfs_start_transaction(root, 1);
11975         if (IS_ERR(trans))
11976                 return PTR_ERR(trans);
11977
11978         btrfs_init_path(&path);
11979         path.lowest_level = btrfs_header_level(eb);
11980         if (path.lowest_level)
11981                 btrfs_node_key_to_cpu(eb, &key, 0);
11982         else
11983                 btrfs_item_key_to_cpu(eb, &key, 0);
11984
11985         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11986         btrfs_commit_transaction(trans, root);
11987         btrfs_release_path(&path);
11988         return ret;
11989 }
11990
11991 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11992 {
11993         struct btrfs_path path;
11994         struct btrfs_trans_handle *trans;
11995         struct btrfs_key key;
11996         int ret;
11997
11998         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11999                bad->key.type, bad->key.offset);
12000         key.objectid = bad->root_id;
12001         key.type = BTRFS_ROOT_ITEM_KEY;
12002         key.offset = (u64)-1;
12003
12004         root = btrfs_read_fs_root(root->fs_info, &key);
12005         if (IS_ERR(root)) {
12006                 fprintf(stderr, "Couldn't find owner root %llu\n",
12007                         key.objectid);
12008                 return PTR_ERR(root);
12009         }
12010
12011         trans = btrfs_start_transaction(root, 1);
12012         if (IS_ERR(trans))
12013                 return PTR_ERR(trans);
12014
12015         btrfs_init_path(&path);
12016         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12017         if (ret) {
12018                 if (ret > 0)
12019                         ret = 0;
12020                 goto out;
12021         }
12022         ret = btrfs_del_item(trans, root, &path);
12023 out:
12024         btrfs_commit_transaction(trans, root);
12025         btrfs_release_path(&path);
12026         return ret;
12027 }
12028
12029 static int zero_log_tree(struct btrfs_root *root)
12030 {
12031         struct btrfs_trans_handle *trans;
12032         int ret;
12033
12034         trans = btrfs_start_transaction(root, 1);
12035         if (IS_ERR(trans)) {
12036                 ret = PTR_ERR(trans);
12037                 return ret;
12038         }
12039         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12040         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12041         ret = btrfs_commit_transaction(trans, root);
12042         return ret;
12043 }
12044
12045 static int populate_csum(struct btrfs_trans_handle *trans,
12046                          struct btrfs_root *csum_root, char *buf, u64 start,
12047                          u64 len)
12048 {
12049         u64 offset = 0;
12050         u64 sectorsize;
12051         int ret = 0;
12052
12053         while (offset < len) {
12054                 sectorsize = csum_root->sectorsize;
12055                 ret = read_extent_data(csum_root, buf, start + offset,
12056                                        &sectorsize, 0);
12057                 if (ret)
12058                         break;
12059                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12060                                             start + offset, buf, sectorsize);
12061                 if (ret)
12062                         break;
12063                 offset += sectorsize;
12064         }
12065         return ret;
12066 }
12067
12068 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12069                                       struct btrfs_root *csum_root,
12070                                       struct btrfs_root *cur_root)
12071 {
12072         struct btrfs_path path;
12073         struct btrfs_key key;
12074         struct extent_buffer *node;
12075         struct btrfs_file_extent_item *fi;
12076         char *buf = NULL;
12077         u64 start = 0;
12078         u64 len = 0;
12079         int slot = 0;
12080         int ret = 0;
12081
12082         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12083         if (!buf)
12084                 return -ENOMEM;
12085
12086         btrfs_init_path(&path);
12087         key.objectid = 0;
12088         key.offset = 0;
12089         key.type = 0;
12090         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12091         if (ret < 0)
12092                 goto out;
12093         /* Iterate all regular file extents and fill its csum */
12094         while (1) {
12095                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12096
12097                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12098                         goto next;
12099                 node = path.nodes[0];
12100                 slot = path.slots[0];
12101                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12102                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12103                         goto next;
12104                 start = btrfs_file_extent_disk_bytenr(node, fi);
12105                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12106
12107                 ret = populate_csum(trans, csum_root, buf, start, len);
12108                 if (ret == -EEXIST)
12109                         ret = 0;
12110                 if (ret < 0)
12111                         goto out;
12112 next:
12113                 /*
12114                  * TODO: if next leaf is corrupted, jump to nearest next valid
12115                  * leaf.
12116                  */
12117                 ret = btrfs_next_item(cur_root, &path);
12118                 if (ret < 0)
12119                         goto out;
12120                 if (ret > 0) {
12121                         ret = 0;
12122                         goto out;
12123                 }
12124         }
12125
12126 out:
12127         btrfs_release_path(&path);
12128         free(buf);
12129         return ret;
12130 }
12131
12132 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12133                                   struct btrfs_root *csum_root)
12134 {
12135         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12136         struct btrfs_path path;
12137         struct btrfs_root *tree_root = fs_info->tree_root;
12138         struct btrfs_root *cur_root;
12139         struct extent_buffer *node;
12140         struct btrfs_key key;
12141         int slot = 0;
12142         int ret = 0;
12143
12144         btrfs_init_path(&path);
12145         key.objectid = BTRFS_FS_TREE_OBJECTID;
12146         key.offset = 0;
12147         key.type = BTRFS_ROOT_ITEM_KEY;
12148         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12149         if (ret < 0)
12150                 goto out;
12151         if (ret > 0) {
12152                 ret = -ENOENT;
12153                 goto out;
12154         }
12155
12156         while (1) {
12157                 node = path.nodes[0];
12158                 slot = path.slots[0];
12159                 btrfs_item_key_to_cpu(node, &key, slot);
12160                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12161                         goto out;
12162                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12163                         goto next;
12164                 if (!is_fstree(key.objectid))
12165                         goto next;
12166                 key.offset = (u64)-1;
12167
12168                 cur_root = btrfs_read_fs_root(fs_info, &key);
12169                 if (IS_ERR(cur_root) || !cur_root) {
12170                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12171                                 key.objectid);
12172                         goto out;
12173                 }
12174                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12175                                 cur_root);
12176                 if (ret < 0)
12177                         goto out;
12178 next:
12179                 ret = btrfs_next_item(tree_root, &path);
12180                 if (ret > 0) {
12181                         ret = 0;
12182                         goto out;
12183                 }
12184                 if (ret < 0)
12185                         goto out;
12186         }
12187
12188 out:
12189         btrfs_release_path(&path);
12190         return ret;
12191 }
12192
12193 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12194                                       struct btrfs_root *csum_root)
12195 {
12196         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12197         struct btrfs_path path;
12198         struct btrfs_extent_item *ei;
12199         struct extent_buffer *leaf;
12200         char *buf;
12201         struct btrfs_key key;
12202         int ret;
12203
12204         btrfs_init_path(&path);
12205         key.objectid = 0;
12206         key.type = BTRFS_EXTENT_ITEM_KEY;
12207         key.offset = 0;
12208         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12209         if (ret < 0) {
12210                 btrfs_release_path(&path);
12211                 return ret;
12212         }
12213
12214         buf = malloc(csum_root->sectorsize);
12215         if (!buf) {
12216                 btrfs_release_path(&path);
12217                 return -ENOMEM;
12218         }
12219
12220         while (1) {
12221                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12222                         ret = btrfs_next_leaf(extent_root, &path);
12223                         if (ret < 0)
12224                                 break;
12225                         if (ret) {
12226                                 ret = 0;
12227                                 break;
12228                         }
12229                 }
12230                 leaf = path.nodes[0];
12231
12232                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12233                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12234                         path.slots[0]++;
12235                         continue;
12236                 }
12237
12238                 ei = btrfs_item_ptr(leaf, path.slots[0],
12239                                     struct btrfs_extent_item);
12240                 if (!(btrfs_extent_flags(leaf, ei) &
12241                       BTRFS_EXTENT_FLAG_DATA)) {
12242                         path.slots[0]++;
12243                         continue;
12244                 }
12245
12246                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12247                                     key.offset);
12248                 if (ret)
12249                         break;
12250                 path.slots[0]++;
12251         }
12252
12253         btrfs_release_path(&path);
12254         free(buf);
12255         return ret;
12256 }
12257
12258 /*
12259  * Recalculate the csum and put it into the csum tree.
12260  *
12261  * Extent tree init will wipe out all the extent info, so in that case, we
12262  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12263  * will use fs/subvol trees to init the csum tree.
12264  */
12265 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12266                           struct btrfs_root *csum_root,
12267                           int search_fs_tree)
12268 {
12269         if (search_fs_tree)
12270                 return fill_csum_tree_from_fs(trans, csum_root);
12271         else
12272                 return fill_csum_tree_from_extent(trans, csum_root);
12273 }
12274
12275 static void free_roots_info_cache(void)
12276 {
12277         if (!roots_info_cache)
12278                 return;
12279
12280         while (!cache_tree_empty(roots_info_cache)) {
12281                 struct cache_extent *entry;
12282                 struct root_item_info *rii;
12283
12284                 entry = first_cache_extent(roots_info_cache);
12285                 if (!entry)
12286                         break;
12287                 remove_cache_extent(roots_info_cache, entry);
12288                 rii = container_of(entry, struct root_item_info, cache_extent);
12289                 free(rii);
12290         }
12291
12292         free(roots_info_cache);
12293         roots_info_cache = NULL;
12294 }
12295
12296 static int build_roots_info_cache(struct btrfs_fs_info *info)
12297 {
12298         int ret = 0;
12299         struct btrfs_key key;
12300         struct extent_buffer *leaf;
12301         struct btrfs_path path;
12302
12303         if (!roots_info_cache) {
12304                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12305                 if (!roots_info_cache)
12306                         return -ENOMEM;
12307                 cache_tree_init(roots_info_cache);
12308         }
12309
12310         btrfs_init_path(&path);
12311         key.objectid = 0;
12312         key.type = BTRFS_EXTENT_ITEM_KEY;
12313         key.offset = 0;
12314         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12315         if (ret < 0)
12316                 goto out;
12317         leaf = path.nodes[0];
12318
12319         while (1) {
12320                 struct btrfs_key found_key;
12321                 struct btrfs_extent_item *ei;
12322                 struct btrfs_extent_inline_ref *iref;
12323                 int slot = path.slots[0];
12324                 int type;
12325                 u64 flags;
12326                 u64 root_id;
12327                 u8 level;
12328                 struct cache_extent *entry;
12329                 struct root_item_info *rii;
12330
12331                 if (slot >= btrfs_header_nritems(leaf)) {
12332                         ret = btrfs_next_leaf(info->extent_root, &path);
12333                         if (ret < 0) {
12334                                 break;
12335                         } else if (ret) {
12336                                 ret = 0;
12337                                 break;
12338                         }
12339                         leaf = path.nodes[0];
12340                         slot = path.slots[0];
12341                 }
12342
12343                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12344
12345                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12346                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12347                         goto next;
12348
12349                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12350                 flags = btrfs_extent_flags(leaf, ei);
12351
12352                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12353                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12354                         goto next;
12355
12356                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12357                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12358                         level = found_key.offset;
12359                 } else {
12360                         struct btrfs_tree_block_info *binfo;
12361
12362                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12363                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12364                         level = btrfs_tree_block_level(leaf, binfo);
12365                 }
12366
12367                 /*
12368                  * For a root extent, it must be of the following type and the
12369                  * first (and only one) iref in the item.
12370                  */
12371                 type = btrfs_extent_inline_ref_type(leaf, iref);
12372                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12373                         goto next;
12374
12375                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12376                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12377                 if (!entry) {
12378                         rii = malloc(sizeof(struct root_item_info));
12379                         if (!rii) {
12380                                 ret = -ENOMEM;
12381                                 goto out;
12382                         }
12383                         rii->cache_extent.start = root_id;
12384                         rii->cache_extent.size = 1;
12385                         rii->level = (u8)-1;
12386                         entry = &rii->cache_extent;
12387                         ret = insert_cache_extent(roots_info_cache, entry);
12388                         ASSERT(ret == 0);
12389                 } else {
12390                         rii = container_of(entry, struct root_item_info,
12391                                            cache_extent);
12392                 }
12393
12394                 ASSERT(rii->cache_extent.start == root_id);
12395                 ASSERT(rii->cache_extent.size == 1);
12396
12397                 if (level > rii->level || rii->level == (u8)-1) {
12398                         rii->level = level;
12399                         rii->bytenr = found_key.objectid;
12400                         rii->gen = btrfs_extent_generation(leaf, ei);
12401                         rii->node_count = 1;
12402                 } else if (level == rii->level) {
12403                         rii->node_count++;
12404                 }
12405 next:
12406                 path.slots[0]++;
12407         }
12408
12409 out:
12410         btrfs_release_path(&path);
12411
12412         return ret;
12413 }
12414
12415 static int maybe_repair_root_item(struct btrfs_path *path,
12416                                   const struct btrfs_key *root_key,
12417                                   const int read_only_mode)
12418 {
12419         const u64 root_id = root_key->objectid;
12420         struct cache_extent *entry;
12421         struct root_item_info *rii;
12422         struct btrfs_root_item ri;
12423         unsigned long offset;
12424
12425         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12426         if (!entry) {
12427                 fprintf(stderr,
12428                         "Error: could not find extent items for root %llu\n",
12429                         root_key->objectid);
12430                 return -ENOENT;
12431         }
12432
12433         rii = container_of(entry, struct root_item_info, cache_extent);
12434         ASSERT(rii->cache_extent.start == root_id);
12435         ASSERT(rii->cache_extent.size == 1);
12436
12437         if (rii->node_count != 1) {
12438                 fprintf(stderr,
12439                         "Error: could not find btree root extent for root %llu\n",
12440                         root_id);
12441                 return -ENOENT;
12442         }
12443
12444         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12445         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12446
12447         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12448             btrfs_root_level(&ri) != rii->level ||
12449             btrfs_root_generation(&ri) != rii->gen) {
12450
12451                 /*
12452                  * If we're in repair mode but our caller told us to not update
12453                  * the root item, i.e. just check if it needs to be updated, don't
12454                  * print this message, since the caller will call us again shortly
12455                  * for the same root item without read only mode (the caller will
12456                  * open a transaction first).
12457                  */
12458                 if (!(read_only_mode && repair))
12459                         fprintf(stderr,
12460                                 "%sroot item for root %llu,"
12461                                 " current bytenr %llu, current gen %llu, current level %u,"
12462                                 " new bytenr %llu, new gen %llu, new level %u\n",
12463                                 (read_only_mode ? "" : "fixing "),
12464                                 root_id,
12465                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12466                                 btrfs_root_level(&ri),
12467                                 rii->bytenr, rii->gen, rii->level);
12468
12469                 if (btrfs_root_generation(&ri) > rii->gen) {
12470                         fprintf(stderr,
12471                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12472                                 root_id, btrfs_root_generation(&ri), rii->gen);
12473                         return -EINVAL;
12474                 }
12475
12476                 if (!read_only_mode) {
12477                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12478                         btrfs_set_root_level(&ri, rii->level);
12479                         btrfs_set_root_generation(&ri, rii->gen);
12480                         write_extent_buffer(path->nodes[0], &ri,
12481                                             offset, sizeof(ri));
12482                 }
12483
12484                 return 1;
12485         }
12486
12487         return 0;
12488 }
12489
12490 /*
12491  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12492  * caused read-only snapshots to be corrupted if they were created at a moment
12493  * when the source subvolume/snapshot had orphan items. The issue was that the
12494  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12495  * node instead of the post orphan cleanup root node.
12496  * So this function, and its callees, just detects and fixes those cases. Even
12497  * though the regression was for read-only snapshots, this function applies to
12498  * any snapshot/subvolume root.
12499  * This must be run before any other repair code - not doing it so, makes other
12500  * repair code delete or modify backrefs in the extent tree for example, which
12501  * will result in an inconsistent fs after repairing the root items.
12502  */
12503 static int repair_root_items(struct btrfs_fs_info *info)
12504 {
12505         struct btrfs_path path;
12506         struct btrfs_key key;
12507         struct extent_buffer *leaf;
12508         struct btrfs_trans_handle *trans = NULL;
12509         int ret = 0;
12510         int bad_roots = 0;
12511         int need_trans = 0;
12512
12513         btrfs_init_path(&path);
12514
12515         ret = build_roots_info_cache(info);
12516         if (ret)
12517                 goto out;
12518
12519         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12520         key.type = BTRFS_ROOT_ITEM_KEY;
12521         key.offset = 0;
12522
12523 again:
12524         /*
12525          * Avoid opening and committing transactions if a leaf doesn't have
12526          * any root items that need to be fixed, so that we avoid rotating
12527          * backup roots unnecessarily.
12528          */
12529         if (need_trans) {
12530                 trans = btrfs_start_transaction(info->tree_root, 1);
12531                 if (IS_ERR(trans)) {
12532                         ret = PTR_ERR(trans);
12533                         goto out;
12534                 }
12535         }
12536
12537         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12538                                 0, trans ? 1 : 0);
12539         if (ret < 0)
12540                 goto out;
12541         leaf = path.nodes[0];
12542
12543         while (1) {
12544                 struct btrfs_key found_key;
12545
12546                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12547                         int no_more_keys = find_next_key(&path, &key);
12548
12549                         btrfs_release_path(&path);
12550                         if (trans) {
12551                                 ret = btrfs_commit_transaction(trans,
12552                                                                info->tree_root);
12553                                 trans = NULL;
12554                                 if (ret < 0)
12555                                         goto out;
12556                         }
12557                         need_trans = 0;
12558                         if (no_more_keys)
12559                                 break;
12560                         goto again;
12561                 }
12562
12563                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12564
12565                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12566                         goto next;
12567                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12568                         goto next;
12569
12570                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12571                 if (ret < 0)
12572                         goto out;
12573                 if (ret) {
12574                         if (!trans && repair) {
12575                                 need_trans = 1;
12576                                 key = found_key;
12577                                 btrfs_release_path(&path);
12578                                 goto again;
12579                         }
12580                         bad_roots++;
12581                 }
12582 next:
12583                 path.slots[0]++;
12584         }
12585         ret = 0;
12586 out:
12587         free_roots_info_cache();
12588         btrfs_release_path(&path);
12589         if (trans)
12590                 btrfs_commit_transaction(trans, info->tree_root);
12591         if (ret < 0)
12592                 return ret;
12593
12594         return bad_roots;
12595 }
12596
12597 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12598 {
12599         struct btrfs_trans_handle *trans;
12600         struct btrfs_block_group_cache *bg_cache;
12601         u64 current = 0;
12602         int ret = 0;
12603
12604         /* Clear all free space cache inodes and its extent data */
12605         while (1) {
12606                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12607                 if (!bg_cache)
12608                         break;
12609                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12610                 if (ret < 0)
12611                         return ret;
12612                 current = bg_cache->key.objectid + bg_cache->key.offset;
12613         }
12614
12615         /* Don't forget to set cache_generation to -1 */
12616         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12617         if (IS_ERR(trans)) {
12618                 error("failed to update super block cache generation");
12619                 return PTR_ERR(trans);
12620         }
12621         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12622         btrfs_commit_transaction(trans, fs_info->tree_root);
12623
12624         return ret;
12625 }
12626
12627 const char * const cmd_check_usage[] = {
12628         "btrfs check [options] <device>",
12629         "Check structural integrity of a filesystem (unmounted).",
12630         "Check structural integrity of an unmounted filesystem. Verify internal",
12631         "trees' consistency and item connectivity. In the repair mode try to",
12632         "fix the problems found. ",
12633         "WARNING: the repair mode is considered dangerous",
12634         "",
12635         "-s|--super <superblock>     use this superblock copy",
12636         "-b|--backup                 use the first valid backup root copy",
12637         "--repair                    try to repair the filesystem",
12638         "--readonly                  run in read-only mode (default)",
12639         "--init-csum-tree            create a new CRC tree",
12640         "--init-extent-tree          create a new extent tree",
12641         "--mode <MODE>               allows choice of memory/IO trade-offs",
12642         "                            where MODE is one of:",
12643         "                            original - read inodes and extents to memory (requires",
12644         "                                       more memory, does less IO)",
12645         "                            lowmem   - try to use less memory but read blocks again",
12646         "                                       when needed",
12647         "--check-data-csum           verify checksums of data blocks",
12648         "-Q|--qgroup-report          print a report on qgroup consistency",
12649         "-E|--subvol-extents <subvolid>",
12650         "                            print subvolume extents and sharing state",
12651         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12652         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12653         "-p|--progress               indicate progress",
12654         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12655         NULL
12656 };
12657
12658 int cmd_check(int argc, char **argv)
12659 {
12660         struct cache_tree root_cache;
12661         struct btrfs_root *root;
12662         struct btrfs_fs_info *info;
12663         u64 bytenr = 0;
12664         u64 subvolid = 0;
12665         u64 tree_root_bytenr = 0;
12666         u64 chunk_root_bytenr = 0;
12667         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12668         int ret;
12669         int err = 0;
12670         u64 num;
12671         int init_csum_tree = 0;
12672         int readonly = 0;
12673         int clear_space_cache = 0;
12674         int qgroup_report = 0;
12675         int qgroups_repaired = 0;
12676         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12677
12678         while(1) {
12679                 int c;
12680                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12681                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12682                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12683                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12684                 static const struct option long_options[] = {
12685                         { "super", required_argument, NULL, 's' },
12686                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12687                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12688                         { "init-csum-tree", no_argument, NULL,
12689                                 GETOPT_VAL_INIT_CSUM },
12690                         { "init-extent-tree", no_argument, NULL,
12691                                 GETOPT_VAL_INIT_EXTENT },
12692                         { "check-data-csum", no_argument, NULL,
12693                                 GETOPT_VAL_CHECK_CSUM },
12694                         { "backup", no_argument, NULL, 'b' },
12695                         { "subvol-extents", required_argument, NULL, 'E' },
12696                         { "qgroup-report", no_argument, NULL, 'Q' },
12697                         { "tree-root", required_argument, NULL, 'r' },
12698                         { "chunk-root", required_argument, NULL,
12699                                 GETOPT_VAL_CHUNK_TREE },
12700                         { "progress", no_argument, NULL, 'p' },
12701                         { "mode", required_argument, NULL,
12702                                 GETOPT_VAL_MODE },
12703                         { "clear-space-cache", required_argument, NULL,
12704                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12705                         { NULL, 0, NULL, 0}
12706                 };
12707
12708                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12709                 if (c < 0)
12710                         break;
12711                 switch(c) {
12712                         case 'a': /* ignored */ break;
12713                         case 'b':
12714                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12715                                 break;
12716                         case 's':
12717                                 num = arg_strtou64(optarg);
12718                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12719                                         error(
12720                                         "super mirror should be less than %d",
12721                                                 BTRFS_SUPER_MIRROR_MAX);
12722                                         exit(1);
12723                                 }
12724                                 bytenr = btrfs_sb_offset(((int)num));
12725                                 printf("using SB copy %llu, bytenr %llu\n", num,
12726                                        (unsigned long long)bytenr);
12727                                 break;
12728                         case 'Q':
12729                                 qgroup_report = 1;
12730                                 break;
12731                         case 'E':
12732                                 subvolid = arg_strtou64(optarg);
12733                                 break;
12734                         case 'r':
12735                                 tree_root_bytenr = arg_strtou64(optarg);
12736                                 break;
12737                         case GETOPT_VAL_CHUNK_TREE:
12738                                 chunk_root_bytenr = arg_strtou64(optarg);
12739                                 break;
12740                         case 'p':
12741                                 ctx.progress_enabled = true;
12742                                 break;
12743                         case '?':
12744                         case 'h':
12745                                 usage(cmd_check_usage);
12746                         case GETOPT_VAL_REPAIR:
12747                                 printf("enabling repair mode\n");
12748                                 repair = 1;
12749                                 ctree_flags |= OPEN_CTREE_WRITES;
12750                                 break;
12751                         case GETOPT_VAL_READONLY:
12752                                 readonly = 1;
12753                                 break;
12754                         case GETOPT_VAL_INIT_CSUM:
12755                                 printf("Creating a new CRC tree\n");
12756                                 init_csum_tree = 1;
12757                                 repair = 1;
12758                                 ctree_flags |= OPEN_CTREE_WRITES;
12759                                 break;
12760                         case GETOPT_VAL_INIT_EXTENT:
12761                                 init_extent_tree = 1;
12762                                 ctree_flags |= (OPEN_CTREE_WRITES |
12763                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12764                                 repair = 1;
12765                                 break;
12766                         case GETOPT_VAL_CHECK_CSUM:
12767                                 check_data_csum = 1;
12768                                 break;
12769                         case GETOPT_VAL_MODE:
12770                                 check_mode = parse_check_mode(optarg);
12771                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12772                                         error("unknown mode: %s", optarg);
12773                                         exit(1);
12774                                 }
12775                                 break;
12776                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12777                                 if (strcmp(optarg, "v1") == 0) {
12778                                         clear_space_cache = 1;
12779                                 } else if (strcmp(optarg, "v2") == 0) {
12780                                         clear_space_cache = 2;
12781                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12782                                 } else {
12783                                         error(
12784                 "invalid argument to --clear-space-cache, must be v1 or v2");
12785                                         exit(1);
12786                                 }
12787                                 ctree_flags |= OPEN_CTREE_WRITES;
12788                                 break;
12789                 }
12790         }
12791
12792         if (check_argc_exact(argc - optind, 1))
12793                 usage(cmd_check_usage);
12794
12795         if (ctx.progress_enabled) {
12796                 ctx.tp = TASK_NOTHING;
12797                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12798         }
12799
12800         /* This check is the only reason for --readonly to exist */
12801         if (readonly && repair) {
12802                 error("repair options are not compatible with --readonly");
12803                 exit(1);
12804         }
12805
12806         /*
12807          * Not supported yet
12808          */
12809         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12810                 error("low memory mode doesn't support repair yet");
12811                 exit(1);
12812         }
12813
12814         radix_tree_init();
12815         cache_tree_init(&root_cache);
12816
12817         if((ret = check_mounted(argv[optind])) < 0) {
12818                 error("could not check mount status: %s", strerror(-ret));
12819                 err |= !!ret;
12820                 goto err_out;
12821         } else if(ret) {
12822                 error("%s is currently mounted, aborting", argv[optind]);
12823                 ret = -EBUSY;
12824                 err |= !!ret;
12825                 goto err_out;
12826         }
12827
12828         /* only allow partial opening under repair mode */
12829         if (repair)
12830                 ctree_flags |= OPEN_CTREE_PARTIAL;
12831
12832         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12833                                   chunk_root_bytenr, ctree_flags);
12834         if (!info) {
12835                 error("cannot open file system");
12836                 ret = -EIO;
12837                 err |= !!ret;
12838                 goto err_out;
12839         }
12840
12841         global_info = info;
12842         root = info->fs_root;
12843         if (clear_space_cache == 1) {
12844                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12845                         error(
12846                 "free space cache v2 detected, use --clear-space-cache v2");
12847                         ret = 1;
12848                         goto close_out;
12849                 }
12850                 printf("Clearing free space cache\n");
12851                 ret = clear_free_space_cache(info);
12852                 if (ret) {
12853                         error("failed to clear free space cache");
12854                         ret = 1;
12855                 } else {
12856                         printf("Free space cache cleared\n");
12857                 }
12858                 goto close_out;
12859         } else if (clear_space_cache == 2) {
12860                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12861                         printf("no free space cache v2 to clear\n");
12862                         ret = 0;
12863                         goto close_out;
12864                 }
12865                 printf("Clear free space cache v2\n");
12866                 ret = btrfs_clear_free_space_tree(info);
12867                 if (ret) {
12868                         error("failed to clear free space cache v2: %d", ret);
12869                         ret = 1;
12870                 } else {
12871                         printf("free space cache v2 cleared\n");
12872                 }
12873                 goto close_out;
12874         }
12875
12876         /*
12877          * repair mode will force us to commit transaction which
12878          * will make us fail to load log tree when mounting.
12879          */
12880         if (repair && btrfs_super_log_root(info->super_copy)) {
12881                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12882                 if (!ret) {
12883                         ret = 1;
12884                         err |= !!ret;
12885                         goto close_out;
12886                 }
12887                 ret = zero_log_tree(root);
12888                 err |= !!ret;
12889                 if (ret) {
12890                         error("failed to zero log tree: %d", ret);
12891                         goto close_out;
12892                 }
12893         }
12894
12895         uuid_unparse(info->super_copy->fsid, uuidbuf);
12896         if (qgroup_report) {
12897                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12898                        uuidbuf);
12899                 ret = qgroup_verify_all(info);
12900                 err |= !!ret;
12901                 if (ret == 0)
12902                         report_qgroups(1);
12903                 goto close_out;
12904         }
12905         if (subvolid) {
12906                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12907                        subvolid, argv[optind], uuidbuf);
12908                 ret = print_extent_state(info, subvolid);
12909                 err |= !!ret;
12910                 goto close_out;
12911         }
12912         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12913
12914         if (!extent_buffer_uptodate(info->tree_root->node) ||
12915             !extent_buffer_uptodate(info->dev_root->node) ||
12916             !extent_buffer_uptodate(info->chunk_root->node)) {
12917                 error("critical roots corrupted, unable to check the filesystem");
12918                 err |= !!ret;
12919                 ret = -EIO;
12920                 goto close_out;
12921         }
12922
12923         if (init_extent_tree || init_csum_tree) {
12924                 struct btrfs_trans_handle *trans;
12925
12926                 trans = btrfs_start_transaction(info->extent_root, 0);
12927                 if (IS_ERR(trans)) {
12928                         error("error starting transaction");
12929                         ret = PTR_ERR(trans);
12930                         err |= !!ret;
12931                         goto close_out;
12932                 }
12933
12934                 if (init_extent_tree) {
12935                         printf("Creating a new extent tree\n");
12936                         ret = reinit_extent_tree(trans, info);
12937                         err |= !!ret;
12938                         if (ret)
12939                                 goto close_out;
12940                 }
12941
12942                 if (init_csum_tree) {
12943                         printf("Reinitialize checksum tree\n");
12944                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12945                         if (ret) {
12946                                 error("checksum tree initialization failed: %d",
12947                                                 ret);
12948                                 ret = -EIO;
12949                                 err |= !!ret;
12950                                 goto close_out;
12951                         }
12952
12953                         ret = fill_csum_tree(trans, info->csum_root,
12954                                              init_extent_tree);
12955                         err |= !!ret;
12956                         if (ret) {
12957                                 error("checksum tree refilling failed: %d", ret);
12958                                 return -EIO;
12959                         }
12960                 }
12961                 /*
12962                  * Ok now we commit and run the normal fsck, which will add
12963                  * extent entries for all of the items it finds.
12964                  */
12965                 ret = btrfs_commit_transaction(trans, info->extent_root);
12966                 err |= !!ret;
12967                 if (ret)
12968                         goto close_out;
12969         }
12970         if (!extent_buffer_uptodate(info->extent_root->node)) {
12971                 error("critical: extent_root, unable to check the filesystem");
12972                 ret = -EIO;
12973                 err |= !!ret;
12974                 goto close_out;
12975         }
12976         if (!extent_buffer_uptodate(info->csum_root->node)) {
12977                 error("critical: csum_root, unable to check the filesystem");
12978                 ret = -EIO;
12979                 err |= !!ret;
12980                 goto close_out;
12981         }
12982
12983         if (!ctx.progress_enabled)
12984                 fprintf(stderr, "checking extents\n");
12985         if (check_mode == CHECK_MODE_LOWMEM)
12986                 ret = check_chunks_and_extents_v2(root);
12987         else
12988                 ret = check_chunks_and_extents(root);
12989         err |= !!ret;
12990         if (ret)
12991                 error(
12992                 "errors found in extent allocation tree or chunk allocation");
12993
12994         ret = repair_root_items(info);
12995         err |= !!ret;
12996         if (ret < 0) {
12997                 error("failed to repair root items: %s", strerror(-ret));
12998                 goto close_out;
12999         }
13000         if (repair) {
13001                 fprintf(stderr, "Fixed %d roots.\n", ret);
13002                 ret = 0;
13003         } else if (ret > 0) {
13004                 fprintf(stderr,
13005                        "Found %d roots with an outdated root item.\n",
13006                        ret);
13007                 fprintf(stderr,
13008                         "Please run a filesystem check with the option --repair to fix them.\n");
13009                 ret = 1;
13010                 err |= !!ret;
13011                 goto close_out;
13012         }
13013
13014         if (!ctx.progress_enabled) {
13015                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13016                         fprintf(stderr, "checking free space tree\n");
13017                 else
13018                         fprintf(stderr, "checking free space cache\n");
13019         }
13020         ret = check_space_cache(root);
13021         err |= !!ret;
13022         if (ret) {
13023                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13024                         error("errors found in free space tree");
13025                 else
13026                         error("errors found in free space cache");
13027                 goto out;
13028         }
13029
13030         /*
13031          * We used to have to have these hole extents in between our real
13032          * extents so if we don't have this flag set we need to make sure there
13033          * are no gaps in the file extents for inodes, otherwise we can just
13034          * ignore it when this happens.
13035          */
13036         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13037         if (!ctx.progress_enabled)
13038                 fprintf(stderr, "checking fs roots\n");
13039         if (check_mode == CHECK_MODE_LOWMEM)
13040                 ret = check_fs_roots_v2(root->fs_info);
13041         else
13042                 ret = check_fs_roots(root, &root_cache);
13043         err |= !!ret;
13044         if (ret) {
13045                 error("errors found in fs roots");
13046                 goto out;
13047         }
13048
13049         fprintf(stderr, "checking csums\n");
13050         ret = check_csums(root);
13051         err |= !!ret;
13052         if (ret) {
13053                 error("errors found in csum tree");
13054                 goto out;
13055         }
13056
13057         fprintf(stderr, "checking root refs\n");
13058         /* For low memory mode, check_fs_roots_v2 handles root refs */
13059         if (check_mode != CHECK_MODE_LOWMEM) {
13060                 ret = check_root_refs(root, &root_cache);
13061                 err |= !!ret;
13062                 if (ret) {
13063                         error("errors found in root refs");
13064                         goto out;
13065                 }
13066         }
13067
13068         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13069                 struct extent_buffer *eb;
13070
13071                 eb = list_first_entry(&root->fs_info->recow_ebs,
13072                                       struct extent_buffer, recow);
13073                 list_del_init(&eb->recow);
13074                 ret = recow_extent_buffer(root, eb);
13075                 err |= !!ret;
13076                 if (ret) {
13077                         error("fails to fix transid errors");
13078                         break;
13079                 }
13080         }
13081
13082         while (!list_empty(&delete_items)) {
13083                 struct bad_item *bad;
13084
13085                 bad = list_first_entry(&delete_items, struct bad_item, list);
13086                 list_del_init(&bad->list);
13087                 if (repair) {
13088                         ret = delete_bad_item(root, bad);
13089                         err |= !!ret;
13090                 }
13091                 free(bad);
13092         }
13093
13094         if (info->quota_enabled) {
13095                 fprintf(stderr, "checking quota groups\n");
13096                 ret = qgroup_verify_all(info);
13097                 err |= !!ret;
13098                 if (ret) {
13099                         error("failed to check quota groups");
13100                         goto out;
13101                 }
13102                 report_qgroups(0);
13103                 ret = repair_qgroups(info, &qgroups_repaired);
13104                 err |= !!ret;
13105                 if (err) {
13106                         error("failed to repair quota groups");
13107                         goto out;
13108                 }
13109                 ret = 0;
13110         }
13111
13112         if (!list_empty(&root->fs_info->recow_ebs)) {
13113                 error("transid errors in file system");
13114                 ret = 1;
13115                 err |= !!ret;
13116         }
13117 out:
13118         if (found_old_backref) { /*
13119                  * there was a disk format change when mixed
13120                  * backref was in testing tree. The old format
13121                  * existed about one week.
13122                  */
13123                 printf("\n * Found old mixed backref format. "
13124                        "The old format is not supported! *"
13125                        "\n * Please mount the FS in readonly mode, "
13126                        "backup data and re-format the FS. *\n\n");
13127                 err |= 1;
13128         }
13129         printf("found %llu bytes used, ",
13130                (unsigned long long)bytes_used);
13131         if (err)
13132                 printf("error(s) found\n");
13133         else
13134                 printf("no error found\n");
13135         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13136         printf("total tree bytes: %llu\n",
13137                (unsigned long long)total_btree_bytes);
13138         printf("total fs tree bytes: %llu\n",
13139                (unsigned long long)total_fs_tree_bytes);
13140         printf("total extent tree bytes: %llu\n",
13141                (unsigned long long)total_extent_tree_bytes);
13142         printf("btree space waste bytes: %llu\n",
13143                (unsigned long long)btree_space_waste);
13144         printf("file data blocks allocated: %llu\n referenced %llu\n",
13145                 (unsigned long long)data_bytes_allocated,
13146                 (unsigned long long)data_bytes_referenced);
13147
13148         free_qgroup_counts();
13149         free_root_recs_tree(&root_cache);
13150 close_out:
13151         close_ctree(root);
13152 err_out:
13153         if (ctx.progress_enabled)
13154                 task_deinit(ctx.info);
13155
13156         return err;
13157 }