btrfs-progs: check: introduce function to check inode_ref
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833
3834 /*
3835  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3836  * INODE_REF/INODE_EXTREF match.
3837  *
3838  * @root:       the root of the fs/file tree
3839  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3840  * @key:        the key of the DIR_ITEM/DIR_INDEX
3841  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3842  *              distinguish root_dir between normal dir/file
3843  * @name:       the name in the INODE_REF/INODE_EXTREF
3844  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3845  * @mode:       the st_mode of INODE_ITEM
3846  *
3847  * Return 0 if no error occurred.
3848  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3849  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3850  * dir/file.
3851  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3852  * not match for normal dir/file.
3853  */
3854 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3855                          struct btrfs_key *key, u64 index, char *name,
3856                          u32 namelen, u32 mode)
3857 {
3858         struct btrfs_path path;
3859         struct extent_buffer *node;
3860         struct btrfs_dir_item *di;
3861         struct btrfs_key location;
3862         char namebuf[BTRFS_NAME_LEN] = {0};
3863         u32 total;
3864         u32 cur = 0;
3865         u32 len;
3866         u32 name_len;
3867         u32 data_len;
3868         u8 filetype;
3869         int slot;
3870         int ret;
3871
3872         btrfs_init_path(&path);
3873         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3874         if (ret < 0) {
3875                 ret = DIR_ITEM_MISSING;
3876                 goto out;
3877         }
3878
3879         /* Process root dir and goto out*/
3880         if (index == 0) {
3881                 if (ret == 0) {
3882                         ret = ROOT_DIR_ERROR;
3883                         error(
3884                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3885                                 root->objectid,
3886                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3887                                         "REF" : "EXTREF",
3888                                 ref_key->objectid, ref_key->offset,
3889                                 key->type == BTRFS_DIR_ITEM_KEY ?
3890                                         "DIR_ITEM" : "DIR_INDEX");
3891                 } else {
3892                         ret = 0;
3893                 }
3894
3895                 goto out;
3896         }
3897
3898         /* Process normal file/dir */
3899         if (ret > 0) {
3900                 ret = DIR_ITEM_MISSING;
3901                 error(
3902                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3903                         root->objectid,
3904                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3905                         ref_key->objectid, ref_key->offset,
3906                         key->type == BTRFS_DIR_ITEM_KEY ?
3907                                 "DIR_ITEM" : "DIR_INDEX",
3908                         key->objectid, key->offset, namelen, name,
3909                         imode_to_type(mode));
3910                 goto out;
3911         }
3912
3913         /* Check whether inode_id/filetype/name match */
3914         node = path.nodes[0];
3915         slot = path.slots[0];
3916         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3917         total = btrfs_item_size_nr(node, slot);
3918         while (cur < total) {
3919                 ret = DIR_ITEM_MISMATCH;
3920                 name_len = btrfs_dir_name_len(node, di);
3921                 data_len = btrfs_dir_data_len(node, di);
3922
3923                 btrfs_dir_item_key_to_cpu(node, di, &location);
3924                 if (location.objectid != ref_key->objectid ||
3925                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3926                     location.offset != 0)
3927                         goto next;
3928
3929                 filetype = btrfs_dir_type(node, di);
3930                 if (imode_to_type(mode) != filetype)
3931                         goto next;
3932
3933                 if (name_len <= BTRFS_NAME_LEN) {
3934                         len = name_len;
3935                 } else {
3936                         len = BTRFS_NAME_LEN;
3937                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3938                         root->objectid,
3939                         key->type == BTRFS_DIR_ITEM_KEY ?
3940                         "DIR_ITEM" : "DIR_INDEX",
3941                         key->objectid, key->offset, name_len);
3942                 }
3943                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3944                 if (len != namelen || strncmp(namebuf, name, len))
3945                         goto next;
3946
3947                 ret = 0;
3948                 goto out;
3949 next:
3950                 len = sizeof(*di) + name_len + data_len;
3951                 di = (struct btrfs_dir_item *)((char *)di + len);
3952                 cur += len;
3953         }
3954         if (ret == DIR_ITEM_MISMATCH)
3955                 error(
3956                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3957                         root->objectid,
3958                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3959                         ref_key->objectid, ref_key->offset,
3960                         key->type == BTRFS_DIR_ITEM_KEY ?
3961                                 "DIR_ITEM" : "DIR_INDEX",
3962                         key->objectid, key->offset, namelen, name,
3963                         imode_to_type(mode));
3964 out:
3965         btrfs_release_path(&path);
3966         return ret;
3967 }
3968
3969 /*
3970  * Traverse the given INODE_REF and call find_dir_item() to find related
3971  * DIR_ITEM/DIR_INDEX.
3972  *
3973  * @root:       the root of the fs/file tree
3974  * @ref_key:    the key of the INODE_REF
3975  * @refs:       the count of INODE_REF
3976  * @mode:       the st_mode of INODE_ITEM
3977  *
3978  * Return 0 if no error occurred.
3979  */
3980 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3981                            struct extent_buffer *node, int slot, u64 *refs,
3982                            int mode)
3983 {
3984         struct btrfs_key key;
3985         struct btrfs_inode_ref *ref;
3986         char namebuf[BTRFS_NAME_LEN] = {0};
3987         u32 total;
3988         u32 cur = 0;
3989         u32 len;
3990         u32 name_len;
3991         u64 index;
3992         int ret, err = 0;
3993
3994         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
3995         total = btrfs_item_size_nr(node, slot);
3996
3997 next:
3998         /* Update inode ref count */
3999         (*refs)++;
4000
4001         index = btrfs_inode_ref_index(node, ref);
4002         name_len = btrfs_inode_ref_name_len(node, ref);
4003         if (name_len <= BTRFS_NAME_LEN) {
4004                 len = name_len;
4005         } else {
4006                 len = BTRFS_NAME_LEN;
4007                 warning("root %llu INODE_REF[%llu %llu] name too long",
4008                         root->objectid, ref_key->objectid, ref_key->offset);
4009         }
4010
4011         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4012
4013         /* Check root dir ref name */
4014         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4015                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4016                       root->objectid, ref_key->objectid, ref_key->offset,
4017                       namebuf);
4018                 err |= ROOT_DIR_ERROR;
4019         }
4020
4021         /* Find related DIR_INDEX */
4022         key.objectid = ref_key->offset;
4023         key.type = BTRFS_DIR_INDEX_KEY;
4024         key.offset = index;
4025         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4026         err |= ret;
4027
4028         /* Find related dir_item */
4029         key.objectid = ref_key->offset;
4030         key.type = BTRFS_DIR_ITEM_KEY;
4031         key.offset = btrfs_name_hash(namebuf, len);
4032         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4033         err |= ret;
4034
4035         len = sizeof(*ref) + name_len;
4036         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4037         cur += len;
4038         if (cur < total)
4039                 goto next;
4040
4041         return err;
4042 }
4043
4044 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4045 {
4046         struct list_head *cur = rec->backrefs.next;
4047         struct extent_backref *back;
4048         struct tree_backref *tback;
4049         struct data_backref *dback;
4050         u64 found = 0;
4051         int err = 0;
4052
4053         while(cur != &rec->backrefs) {
4054                 back = to_extent_backref(cur);
4055                 cur = cur->next;
4056                 if (!back->found_extent_tree) {
4057                         err = 1;
4058                         if (!print_errs)
4059                                 goto out;
4060                         if (back->is_data) {
4061                                 dback = to_data_backref(back);
4062                                 fprintf(stderr, "Backref %llu %s %llu"
4063                                         " owner %llu offset %llu num_refs %lu"
4064                                         " not found in extent tree\n",
4065                                         (unsigned long long)rec->start,
4066                                         back->full_backref ?
4067                                         "parent" : "root",
4068                                         back->full_backref ?
4069                                         (unsigned long long)dback->parent:
4070                                         (unsigned long long)dback->root,
4071                                         (unsigned long long)dback->owner,
4072                                         (unsigned long long)dback->offset,
4073                                         (unsigned long)dback->num_refs);
4074                         } else {
4075                                 tback = to_tree_backref(back);
4076                                 fprintf(stderr, "Backref %llu parent %llu"
4077                                         " root %llu not found in extent tree\n",
4078                                         (unsigned long long)rec->start,
4079                                         (unsigned long long)tback->parent,
4080                                         (unsigned long long)tback->root);
4081                         }
4082                 }
4083                 if (!back->is_data && !back->found_ref) {
4084                         err = 1;
4085                         if (!print_errs)
4086                                 goto out;
4087                         tback = to_tree_backref(back);
4088                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4089                                 (unsigned long long)rec->start,
4090                                 back->full_backref ? "parent" : "root",
4091                                 back->full_backref ?
4092                                 (unsigned long long)tback->parent :
4093                                 (unsigned long long)tback->root, back);
4094                 }
4095                 if (back->is_data) {
4096                         dback = to_data_backref(back);
4097                         if (dback->found_ref != dback->num_refs) {
4098                                 err = 1;
4099                                 if (!print_errs)
4100                                         goto out;
4101                                 fprintf(stderr, "Incorrect local backref count"
4102                                         " on %llu %s %llu owner %llu"
4103                                         " offset %llu found %u wanted %u back %p\n",
4104                                         (unsigned long long)rec->start,
4105                                         back->full_backref ?
4106                                         "parent" : "root",
4107                                         back->full_backref ?
4108                                         (unsigned long long)dback->parent:
4109                                         (unsigned long long)dback->root,
4110                                         (unsigned long long)dback->owner,
4111                                         (unsigned long long)dback->offset,
4112                                         dback->found_ref, dback->num_refs, back);
4113                         }
4114                         if (dback->disk_bytenr != rec->start) {
4115                                 err = 1;
4116                                 if (!print_errs)
4117                                         goto out;
4118                                 fprintf(stderr, "Backref disk bytenr does not"
4119                                         " match extent record, bytenr=%llu, "
4120                                         "ref bytenr=%llu\n",
4121                                         (unsigned long long)rec->start,
4122                                         (unsigned long long)dback->disk_bytenr);
4123                         }
4124
4125                         if (dback->bytes != rec->nr) {
4126                                 err = 1;
4127                                 if (!print_errs)
4128                                         goto out;
4129                                 fprintf(stderr, "Backref bytes do not match "
4130                                         "extent backref, bytenr=%llu, ref "
4131                                         "bytes=%llu, backref bytes=%llu\n",
4132                                         (unsigned long long)rec->start,
4133                                         (unsigned long long)rec->nr,
4134                                         (unsigned long long)dback->bytes);
4135                         }
4136                 }
4137                 if (!back->is_data) {
4138                         found += 1;
4139                 } else {
4140                         dback = to_data_backref(back);
4141                         found += dback->found_ref;
4142                 }
4143         }
4144         if (found != rec->refs) {
4145                 err = 1;
4146                 if (!print_errs)
4147                         goto out;
4148                 fprintf(stderr, "Incorrect global backref count "
4149                         "on %llu found %llu wanted %llu\n",
4150                         (unsigned long long)rec->start,
4151                         (unsigned long long)found,
4152                         (unsigned long long)rec->refs);
4153         }
4154 out:
4155         return err;
4156 }
4157
4158 static int free_all_extent_backrefs(struct extent_record *rec)
4159 {
4160         struct extent_backref *back;
4161         struct list_head *cur;
4162         while (!list_empty(&rec->backrefs)) {
4163                 cur = rec->backrefs.next;
4164                 back = to_extent_backref(cur);
4165                 list_del(cur);
4166                 free(back);
4167         }
4168         return 0;
4169 }
4170
4171 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4172                                      struct cache_tree *extent_cache)
4173 {
4174         struct cache_extent *cache;
4175         struct extent_record *rec;
4176
4177         while (1) {
4178                 cache = first_cache_extent(extent_cache);
4179                 if (!cache)
4180                         break;
4181                 rec = container_of(cache, struct extent_record, cache);
4182                 remove_cache_extent(extent_cache, cache);
4183                 free_all_extent_backrefs(rec);
4184                 free(rec);
4185         }
4186 }
4187
4188 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4189                                  struct extent_record *rec)
4190 {
4191         if (rec->content_checked && rec->owner_ref_checked &&
4192             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4193             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4194             !rec->bad_full_backref && !rec->crossing_stripes &&
4195             !rec->wrong_chunk_type) {
4196                 remove_cache_extent(extent_cache, &rec->cache);
4197                 free_all_extent_backrefs(rec);
4198                 list_del_init(&rec->list);
4199                 free(rec);
4200         }
4201         return 0;
4202 }
4203
4204 static int check_owner_ref(struct btrfs_root *root,
4205                             struct extent_record *rec,
4206                             struct extent_buffer *buf)
4207 {
4208         struct extent_backref *node;
4209         struct tree_backref *back;
4210         struct btrfs_root *ref_root;
4211         struct btrfs_key key;
4212         struct btrfs_path path;
4213         struct extent_buffer *parent;
4214         int level;
4215         int found = 0;
4216         int ret;
4217
4218         list_for_each_entry(node, &rec->backrefs, list) {
4219                 if (node->is_data)
4220                         continue;
4221                 if (!node->found_ref)
4222                         continue;
4223                 if (node->full_backref)
4224                         continue;
4225                 back = to_tree_backref(node);
4226                 if (btrfs_header_owner(buf) == back->root)
4227                         return 0;
4228         }
4229         BUG_ON(rec->is_root);
4230
4231         /* try to find the block by search corresponding fs tree */
4232         key.objectid = btrfs_header_owner(buf);
4233         key.type = BTRFS_ROOT_ITEM_KEY;
4234         key.offset = (u64)-1;
4235
4236         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4237         if (IS_ERR(ref_root))
4238                 return 1;
4239
4240         level = btrfs_header_level(buf);
4241         if (level == 0)
4242                 btrfs_item_key_to_cpu(buf, &key, 0);
4243         else
4244                 btrfs_node_key_to_cpu(buf, &key, 0);
4245
4246         btrfs_init_path(&path);
4247         path.lowest_level = level + 1;
4248         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4249         if (ret < 0)
4250                 return 0;
4251
4252         parent = path.nodes[level + 1];
4253         if (parent && buf->start == btrfs_node_blockptr(parent,
4254                                                         path.slots[level + 1]))
4255                 found = 1;
4256
4257         btrfs_release_path(&path);
4258         return found ? 0 : 1;
4259 }
4260
4261 static int is_extent_tree_record(struct extent_record *rec)
4262 {
4263         struct list_head *cur = rec->backrefs.next;
4264         struct extent_backref *node;
4265         struct tree_backref *back;
4266         int is_extent = 0;
4267
4268         while(cur != &rec->backrefs) {
4269                 node = to_extent_backref(cur);
4270                 cur = cur->next;
4271                 if (node->is_data)
4272                         return 0;
4273                 back = to_tree_backref(node);
4274                 if (node->full_backref)
4275                         return 0;
4276                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4277                         is_extent = 1;
4278         }
4279         return is_extent;
4280 }
4281
4282
4283 static int record_bad_block_io(struct btrfs_fs_info *info,
4284                                struct cache_tree *extent_cache,
4285                                u64 start, u64 len)
4286 {
4287         struct extent_record *rec;
4288         struct cache_extent *cache;
4289         struct btrfs_key key;
4290
4291         cache = lookup_cache_extent(extent_cache, start, len);
4292         if (!cache)
4293                 return 0;
4294
4295         rec = container_of(cache, struct extent_record, cache);
4296         if (!is_extent_tree_record(rec))
4297                 return 0;
4298
4299         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4300         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4301 }
4302
4303 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4304                        struct extent_buffer *buf, int slot)
4305 {
4306         if (btrfs_header_level(buf)) {
4307                 struct btrfs_key_ptr ptr1, ptr2;
4308
4309                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4310                                    sizeof(struct btrfs_key_ptr));
4311                 read_extent_buffer(buf, &ptr2,
4312                                    btrfs_node_key_ptr_offset(slot + 1),
4313                                    sizeof(struct btrfs_key_ptr));
4314                 write_extent_buffer(buf, &ptr1,
4315                                     btrfs_node_key_ptr_offset(slot + 1),
4316                                     sizeof(struct btrfs_key_ptr));
4317                 write_extent_buffer(buf, &ptr2,
4318                                     btrfs_node_key_ptr_offset(slot),
4319                                     sizeof(struct btrfs_key_ptr));
4320                 if (slot == 0) {
4321                         struct btrfs_disk_key key;
4322                         btrfs_node_key(buf, &key, 0);
4323                         btrfs_fixup_low_keys(root, path, &key,
4324                                              btrfs_header_level(buf) + 1);
4325                 }
4326         } else {
4327                 struct btrfs_item *item1, *item2;
4328                 struct btrfs_key k1, k2;
4329                 char *item1_data, *item2_data;
4330                 u32 item1_offset, item2_offset, item1_size, item2_size;
4331
4332                 item1 = btrfs_item_nr(slot);
4333                 item2 = btrfs_item_nr(slot + 1);
4334                 btrfs_item_key_to_cpu(buf, &k1, slot);
4335                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4336                 item1_offset = btrfs_item_offset(buf, item1);
4337                 item2_offset = btrfs_item_offset(buf, item2);
4338                 item1_size = btrfs_item_size(buf, item1);
4339                 item2_size = btrfs_item_size(buf, item2);
4340
4341                 item1_data = malloc(item1_size);
4342                 if (!item1_data)
4343                         return -ENOMEM;
4344                 item2_data = malloc(item2_size);
4345                 if (!item2_data) {
4346                         free(item1_data);
4347                         return -ENOMEM;
4348                 }
4349
4350                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4351                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4352
4353                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4354                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4355                 free(item1_data);
4356                 free(item2_data);
4357
4358                 btrfs_set_item_offset(buf, item1, item2_offset);
4359                 btrfs_set_item_offset(buf, item2, item1_offset);
4360                 btrfs_set_item_size(buf, item1, item2_size);
4361                 btrfs_set_item_size(buf, item2, item1_size);
4362
4363                 path->slots[0] = slot;
4364                 btrfs_set_item_key_unsafe(root, path, &k2);
4365                 path->slots[0] = slot + 1;
4366                 btrfs_set_item_key_unsafe(root, path, &k1);
4367         }
4368         return 0;
4369 }
4370
4371 static int fix_key_order(struct btrfs_trans_handle *trans,
4372                          struct btrfs_root *root,
4373                          struct btrfs_path *path)
4374 {
4375         struct extent_buffer *buf;
4376         struct btrfs_key k1, k2;
4377         int i;
4378         int level = path->lowest_level;
4379         int ret = -EIO;
4380
4381         buf = path->nodes[level];
4382         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4383                 if (level) {
4384                         btrfs_node_key_to_cpu(buf, &k1, i);
4385                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4386                 } else {
4387                         btrfs_item_key_to_cpu(buf, &k1, i);
4388                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4389                 }
4390                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4391                         continue;
4392                 ret = swap_values(root, path, buf, i);
4393                 if (ret)
4394                         break;
4395                 btrfs_mark_buffer_dirty(buf);
4396                 i = 0;
4397         }
4398         return ret;
4399 }
4400
4401 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4402                              struct btrfs_root *root,
4403                              struct btrfs_path *path,
4404                              struct extent_buffer *buf, int slot)
4405 {
4406         struct btrfs_key key;
4407         int nritems = btrfs_header_nritems(buf);
4408
4409         btrfs_item_key_to_cpu(buf, &key, slot);
4410
4411         /* These are all the keys we can deal with missing. */
4412         if (key.type != BTRFS_DIR_INDEX_KEY &&
4413             key.type != BTRFS_EXTENT_ITEM_KEY &&
4414             key.type != BTRFS_METADATA_ITEM_KEY &&
4415             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4416             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4417                 return -1;
4418
4419         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4420                (unsigned long long)key.objectid, key.type,
4421                (unsigned long long)key.offset, slot, buf->start);
4422         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4423                               btrfs_item_nr_offset(slot + 1),
4424                               sizeof(struct btrfs_item) *
4425                               (nritems - slot - 1));
4426         btrfs_set_header_nritems(buf, nritems - 1);
4427         if (slot == 0) {
4428                 struct btrfs_disk_key disk_key;
4429
4430                 btrfs_item_key(buf, &disk_key, 0);
4431                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4432         }
4433         btrfs_mark_buffer_dirty(buf);
4434         return 0;
4435 }
4436
4437 static int fix_item_offset(struct btrfs_trans_handle *trans,
4438                            struct btrfs_root *root,
4439                            struct btrfs_path *path)
4440 {
4441         struct extent_buffer *buf;
4442         int i;
4443         int ret = 0;
4444
4445         /* We should only get this for leaves */
4446         BUG_ON(path->lowest_level);
4447         buf = path->nodes[0];
4448 again:
4449         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4450                 unsigned int shift = 0, offset;
4451
4452                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4453                     BTRFS_LEAF_DATA_SIZE(root)) {
4454                         if (btrfs_item_end_nr(buf, i) >
4455                             BTRFS_LEAF_DATA_SIZE(root)) {
4456                                 ret = delete_bogus_item(trans, root, path,
4457                                                         buf, i);
4458                                 if (!ret)
4459                                         goto again;
4460                                 fprintf(stderr, "item is off the end of the "
4461                                         "leaf, can't fix\n");
4462                                 ret = -EIO;
4463                                 break;
4464                         }
4465                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4466                                 btrfs_item_end_nr(buf, i);
4467                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4468                            btrfs_item_offset_nr(buf, i - 1)) {
4469                         if (btrfs_item_end_nr(buf, i) >
4470                             btrfs_item_offset_nr(buf, i - 1)) {
4471                                 ret = delete_bogus_item(trans, root, path,
4472                                                         buf, i);
4473                                 if (!ret)
4474                                         goto again;
4475                                 fprintf(stderr, "items overlap, can't fix\n");
4476                                 ret = -EIO;
4477                                 break;
4478                         }
4479                         shift = btrfs_item_offset_nr(buf, i - 1) -
4480                                 btrfs_item_end_nr(buf, i);
4481                 }
4482                 if (!shift)
4483                         continue;
4484
4485                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4486                        i, shift, (unsigned long long)buf->start);
4487                 offset = btrfs_item_offset_nr(buf, i);
4488                 memmove_extent_buffer(buf,
4489                                       btrfs_leaf_data(buf) + offset + shift,
4490                                       btrfs_leaf_data(buf) + offset,
4491                                       btrfs_item_size_nr(buf, i));
4492                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4493                                       offset + shift);
4494                 btrfs_mark_buffer_dirty(buf);
4495         }
4496
4497         /*
4498          * We may have moved things, in which case we want to exit so we don't
4499          * write those changes out.  Once we have proper abort functionality in
4500          * progs this can be changed to something nicer.
4501          */
4502         BUG_ON(ret);
4503         return ret;
4504 }
4505
4506 /*
4507  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4508  * then just return -EIO.
4509  */
4510 static int try_to_fix_bad_block(struct btrfs_root *root,
4511                                 struct extent_buffer *buf,
4512                                 enum btrfs_tree_block_status status)
4513 {
4514         struct btrfs_trans_handle *trans;
4515         struct ulist *roots;
4516         struct ulist_node *node;
4517         struct btrfs_root *search_root;
4518         struct btrfs_path path;
4519         struct ulist_iterator iter;
4520         struct btrfs_key root_key, key;
4521         int ret;
4522
4523         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4524             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4525                 return -EIO;
4526
4527         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4528         if (ret)
4529                 return -EIO;
4530
4531         btrfs_init_path(&path);
4532         ULIST_ITER_INIT(&iter);
4533         while ((node = ulist_next(roots, &iter))) {
4534                 root_key.objectid = node->val;
4535                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4536                 root_key.offset = (u64)-1;
4537
4538                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4539                 if (IS_ERR(root)) {
4540                         ret = -EIO;
4541                         break;
4542                 }
4543
4544
4545                 trans = btrfs_start_transaction(search_root, 0);
4546                 if (IS_ERR(trans)) {
4547                         ret = PTR_ERR(trans);
4548                         break;
4549                 }
4550
4551                 path.lowest_level = btrfs_header_level(buf);
4552                 path.skip_check_block = 1;
4553                 if (path.lowest_level)
4554                         btrfs_node_key_to_cpu(buf, &key, 0);
4555                 else
4556                         btrfs_item_key_to_cpu(buf, &key, 0);
4557                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4558                 if (ret) {
4559                         ret = -EIO;
4560                         btrfs_commit_transaction(trans, search_root);
4561                         break;
4562                 }
4563                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4564                         ret = fix_key_order(trans, search_root, &path);
4565                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4566                         ret = fix_item_offset(trans, search_root, &path);
4567                 if (ret) {
4568                         btrfs_commit_transaction(trans, search_root);
4569                         break;
4570                 }
4571                 btrfs_release_path(&path);
4572                 btrfs_commit_transaction(trans, search_root);
4573         }
4574         ulist_free(roots);
4575         btrfs_release_path(&path);
4576         return ret;
4577 }
4578
4579 static int check_block(struct btrfs_root *root,
4580                        struct cache_tree *extent_cache,
4581                        struct extent_buffer *buf, u64 flags)
4582 {
4583         struct extent_record *rec;
4584         struct cache_extent *cache;
4585         struct btrfs_key key;
4586         enum btrfs_tree_block_status status;
4587         int ret = 0;
4588         int level;
4589
4590         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4591         if (!cache)
4592                 return 1;
4593         rec = container_of(cache, struct extent_record, cache);
4594         rec->generation = btrfs_header_generation(buf);
4595
4596         level = btrfs_header_level(buf);
4597         if (btrfs_header_nritems(buf) > 0) {
4598
4599                 if (level == 0)
4600                         btrfs_item_key_to_cpu(buf, &key, 0);
4601                 else
4602                         btrfs_node_key_to_cpu(buf, &key, 0);
4603
4604                 rec->info_objectid = key.objectid;
4605         }
4606         rec->info_level = level;
4607
4608         if (btrfs_is_leaf(buf))
4609                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4610         else
4611                 status = btrfs_check_node(root, &rec->parent_key, buf);
4612
4613         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4614                 if (repair)
4615                         status = try_to_fix_bad_block(root, buf, status);
4616                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4617                         ret = -EIO;
4618                         fprintf(stderr, "bad block %llu\n",
4619                                 (unsigned long long)buf->start);
4620                 } else {
4621                         /*
4622                          * Signal to callers we need to start the scan over
4623                          * again since we'll have cowed blocks.
4624                          */
4625                         ret = -EAGAIN;
4626                 }
4627         } else {
4628                 rec->content_checked = 1;
4629                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4630                         rec->owner_ref_checked = 1;
4631                 else {
4632                         ret = check_owner_ref(root, rec, buf);
4633                         if (!ret)
4634                                 rec->owner_ref_checked = 1;
4635                 }
4636         }
4637         if (!ret)
4638                 maybe_free_extent_rec(extent_cache, rec);
4639         return ret;
4640 }
4641
4642 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4643                                                 u64 parent, u64 root)
4644 {
4645         struct list_head *cur = rec->backrefs.next;
4646         struct extent_backref *node;
4647         struct tree_backref *back;
4648
4649         while(cur != &rec->backrefs) {
4650                 node = to_extent_backref(cur);
4651                 cur = cur->next;
4652                 if (node->is_data)
4653                         continue;
4654                 back = to_tree_backref(node);
4655                 if (parent > 0) {
4656                         if (!node->full_backref)
4657                                 continue;
4658                         if (parent == back->parent)
4659                                 return back;
4660                 } else {
4661                         if (node->full_backref)
4662                                 continue;
4663                         if (back->root == root)
4664                                 return back;
4665                 }
4666         }
4667         return NULL;
4668 }
4669
4670 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4671                                                 u64 parent, u64 root)
4672 {
4673         struct tree_backref *ref = malloc(sizeof(*ref));
4674
4675         if (!ref)
4676                 return NULL;
4677         memset(&ref->node, 0, sizeof(ref->node));
4678         if (parent > 0) {
4679                 ref->parent = parent;
4680                 ref->node.full_backref = 1;
4681         } else {
4682                 ref->root = root;
4683                 ref->node.full_backref = 0;
4684         }
4685         list_add_tail(&ref->node.list, &rec->backrefs);
4686
4687         return ref;
4688 }
4689
4690 static struct data_backref *find_data_backref(struct extent_record *rec,
4691                                                 u64 parent, u64 root,
4692                                                 u64 owner, u64 offset,
4693                                                 int found_ref,
4694                                                 u64 disk_bytenr, u64 bytes)
4695 {
4696         struct list_head *cur = rec->backrefs.next;
4697         struct extent_backref *node;
4698         struct data_backref *back;
4699
4700         while(cur != &rec->backrefs) {
4701                 node = to_extent_backref(cur);
4702                 cur = cur->next;
4703                 if (!node->is_data)
4704                         continue;
4705                 back = to_data_backref(node);
4706                 if (parent > 0) {
4707                         if (!node->full_backref)
4708                                 continue;
4709                         if (parent == back->parent)
4710                                 return back;
4711                 } else {
4712                         if (node->full_backref)
4713                                 continue;
4714                         if (back->root == root && back->owner == owner &&
4715                             back->offset == offset) {
4716                                 if (found_ref && node->found_ref &&
4717                                     (back->bytes != bytes ||
4718                                     back->disk_bytenr != disk_bytenr))
4719                                         continue;
4720                                 return back;
4721                         }
4722                 }
4723         }
4724         return NULL;
4725 }
4726
4727 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4728                                                 u64 parent, u64 root,
4729                                                 u64 owner, u64 offset,
4730                                                 u64 max_size)
4731 {
4732         struct data_backref *ref = malloc(sizeof(*ref));
4733
4734         if (!ref)
4735                 return NULL;
4736         memset(&ref->node, 0, sizeof(ref->node));
4737         ref->node.is_data = 1;
4738
4739         if (parent > 0) {
4740                 ref->parent = parent;
4741                 ref->owner = 0;
4742                 ref->offset = 0;
4743                 ref->node.full_backref = 1;
4744         } else {
4745                 ref->root = root;
4746                 ref->owner = owner;
4747                 ref->offset = offset;
4748                 ref->node.full_backref = 0;
4749         }
4750         ref->bytes = max_size;
4751         ref->found_ref = 0;
4752         ref->num_refs = 0;
4753         list_add_tail(&ref->node.list, &rec->backrefs);
4754         if (max_size > rec->max_size)
4755                 rec->max_size = max_size;
4756         return ref;
4757 }
4758
4759 /* Check if the type of extent matches with its chunk */
4760 static void check_extent_type(struct extent_record *rec)
4761 {
4762         struct btrfs_block_group_cache *bg_cache;
4763
4764         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4765         if (!bg_cache)
4766                 return;
4767
4768         /* data extent, check chunk directly*/
4769         if (!rec->metadata) {
4770                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4771                         rec->wrong_chunk_type = 1;
4772                 return;
4773         }
4774
4775         /* metadata extent, check the obvious case first */
4776         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4777                                  BTRFS_BLOCK_GROUP_METADATA))) {
4778                 rec->wrong_chunk_type = 1;
4779                 return;
4780         }
4781
4782         /*
4783          * Check SYSTEM extent, as it's also marked as metadata, we can only
4784          * make sure it's a SYSTEM extent by its backref
4785          */
4786         if (!list_empty(&rec->backrefs)) {
4787                 struct extent_backref *node;
4788                 struct tree_backref *tback;
4789                 u64 bg_type;
4790
4791                 node = to_extent_backref(rec->backrefs.next);
4792                 if (node->is_data) {
4793                         /* tree block shouldn't have data backref */
4794                         rec->wrong_chunk_type = 1;
4795                         return;
4796                 }
4797                 tback = container_of(node, struct tree_backref, node);
4798
4799                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4800                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4801                 else
4802                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4803                 if (!(bg_cache->flags & bg_type))
4804                         rec->wrong_chunk_type = 1;
4805         }
4806 }
4807
4808 /*
4809  * Allocate a new extent record, fill default values from @tmpl and insert int
4810  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4811  * the cache, otherwise it fails.
4812  */
4813 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4814                 struct extent_record *tmpl)
4815 {
4816         struct extent_record *rec;
4817         int ret = 0;
4818
4819         rec = malloc(sizeof(*rec));
4820         if (!rec)
4821                 return -ENOMEM;
4822         rec->start = tmpl->start;
4823         rec->max_size = tmpl->max_size;
4824         rec->nr = max(tmpl->nr, tmpl->max_size);
4825         rec->found_rec = tmpl->found_rec;
4826         rec->content_checked = tmpl->content_checked;
4827         rec->owner_ref_checked = tmpl->owner_ref_checked;
4828         rec->num_duplicates = 0;
4829         rec->metadata = tmpl->metadata;
4830         rec->flag_block_full_backref = FLAG_UNSET;
4831         rec->bad_full_backref = 0;
4832         rec->crossing_stripes = 0;
4833         rec->wrong_chunk_type = 0;
4834         rec->is_root = tmpl->is_root;
4835         rec->refs = tmpl->refs;
4836         rec->extent_item_refs = tmpl->extent_item_refs;
4837         rec->parent_generation = tmpl->parent_generation;
4838         INIT_LIST_HEAD(&rec->backrefs);
4839         INIT_LIST_HEAD(&rec->dups);
4840         INIT_LIST_HEAD(&rec->list);
4841         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4842         rec->cache.start = tmpl->start;
4843         rec->cache.size = tmpl->nr;
4844         ret = insert_cache_extent(extent_cache, &rec->cache);
4845         if (ret) {
4846                 free(rec);
4847                 return ret;
4848         }
4849         bytes_used += rec->nr;
4850
4851         if (tmpl->metadata)
4852                 rec->crossing_stripes = check_crossing_stripes(global_info,
4853                                 rec->start, global_info->tree_root->nodesize);
4854         check_extent_type(rec);
4855         return ret;
4856 }
4857
4858 /*
4859  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4860  * some are hints:
4861  * - refs              - if found, increase refs
4862  * - is_root           - if found, set
4863  * - content_checked   - if found, set
4864  * - owner_ref_checked - if found, set
4865  *
4866  * If not found, create a new one, initialize and insert.
4867  */
4868 static int add_extent_rec(struct cache_tree *extent_cache,
4869                 struct extent_record *tmpl)
4870 {
4871         struct extent_record *rec;
4872         struct cache_extent *cache;
4873         int ret = 0;
4874         int dup = 0;
4875
4876         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4877         if (cache) {
4878                 rec = container_of(cache, struct extent_record, cache);
4879                 if (tmpl->refs)
4880                         rec->refs++;
4881                 if (rec->nr == 1)
4882                         rec->nr = max(tmpl->nr, tmpl->max_size);
4883
4884                 /*
4885                  * We need to make sure to reset nr to whatever the extent
4886                  * record says was the real size, this way we can compare it to
4887                  * the backrefs.
4888                  */
4889                 if (tmpl->found_rec) {
4890                         if (tmpl->start != rec->start || rec->found_rec) {
4891                                 struct extent_record *tmp;
4892
4893                                 dup = 1;
4894                                 if (list_empty(&rec->list))
4895                                         list_add_tail(&rec->list,
4896                                                       &duplicate_extents);
4897
4898                                 /*
4899                                  * We have to do this song and dance in case we
4900                                  * find an extent record that falls inside of
4901                                  * our current extent record but does not have
4902                                  * the same objectid.
4903                                  */
4904                                 tmp = malloc(sizeof(*tmp));
4905                                 if (!tmp)
4906                                         return -ENOMEM;
4907                                 tmp->start = tmpl->start;
4908                                 tmp->max_size = tmpl->max_size;
4909                                 tmp->nr = tmpl->nr;
4910                                 tmp->found_rec = 1;
4911                                 tmp->metadata = tmpl->metadata;
4912                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4913                                 INIT_LIST_HEAD(&tmp->list);
4914                                 list_add_tail(&tmp->list, &rec->dups);
4915                                 rec->num_duplicates++;
4916                         } else {
4917                                 rec->nr = tmpl->nr;
4918                                 rec->found_rec = 1;
4919                         }
4920                 }
4921
4922                 if (tmpl->extent_item_refs && !dup) {
4923                         if (rec->extent_item_refs) {
4924                                 fprintf(stderr, "block %llu rec "
4925                                         "extent_item_refs %llu, passed %llu\n",
4926                                         (unsigned long long)tmpl->start,
4927                                         (unsigned long long)
4928                                                         rec->extent_item_refs,
4929                                         (unsigned long long)tmpl->extent_item_refs);
4930                         }
4931                         rec->extent_item_refs = tmpl->extent_item_refs;
4932                 }
4933                 if (tmpl->is_root)
4934                         rec->is_root = 1;
4935                 if (tmpl->content_checked)
4936                         rec->content_checked = 1;
4937                 if (tmpl->owner_ref_checked)
4938                         rec->owner_ref_checked = 1;
4939                 memcpy(&rec->parent_key, &tmpl->parent_key,
4940                                 sizeof(tmpl->parent_key));
4941                 if (tmpl->parent_generation)
4942                         rec->parent_generation = tmpl->parent_generation;
4943                 if (rec->max_size < tmpl->max_size)
4944                         rec->max_size = tmpl->max_size;
4945
4946                 /*
4947                  * A metadata extent can't cross stripe_len boundary, otherwise
4948                  * kernel scrub won't be able to handle it.
4949                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4950                  * it.
4951                  */
4952                 if (tmpl->metadata)
4953                         rec->crossing_stripes = check_crossing_stripes(
4954                                         global_info, rec->start,
4955                                         global_info->tree_root->nodesize);
4956                 check_extent_type(rec);
4957                 maybe_free_extent_rec(extent_cache, rec);
4958                 return ret;
4959         }
4960
4961         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4962
4963         return ret;
4964 }
4965
4966 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4967                             u64 parent, u64 root, int found_ref)
4968 {
4969         struct extent_record *rec;
4970         struct tree_backref *back;
4971         struct cache_extent *cache;
4972         int ret;
4973
4974         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4975         if (!cache) {
4976                 struct extent_record tmpl;
4977
4978                 memset(&tmpl, 0, sizeof(tmpl));
4979                 tmpl.start = bytenr;
4980                 tmpl.nr = 1;
4981                 tmpl.metadata = 1;
4982
4983                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4984                 if (ret)
4985                         return ret;
4986
4987                 /* really a bug in cache_extent implement now */
4988                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4989                 if (!cache)
4990                         return -ENOENT;
4991         }
4992
4993         rec = container_of(cache, struct extent_record, cache);
4994         if (rec->start != bytenr) {
4995                 /*
4996                  * Several cause, from unaligned bytenr to over lapping extents
4997                  */
4998                 return -EEXIST;
4999         }
5000
5001         back = find_tree_backref(rec, parent, root);
5002         if (!back) {
5003                 back = alloc_tree_backref(rec, parent, root);
5004                 if (!back)
5005                         return -ENOMEM;
5006         }
5007
5008         if (found_ref) {
5009                 if (back->node.found_ref) {
5010                         fprintf(stderr, "Extent back ref already exists "
5011                                 "for %llu parent %llu root %llu \n",
5012                                 (unsigned long long)bytenr,
5013                                 (unsigned long long)parent,
5014                                 (unsigned long long)root);
5015                 }
5016                 back->node.found_ref = 1;
5017         } else {
5018                 if (back->node.found_extent_tree) {
5019                         fprintf(stderr, "Extent back ref already exists "
5020                                 "for %llu parent %llu root %llu \n",
5021                                 (unsigned long long)bytenr,
5022                                 (unsigned long long)parent,
5023                                 (unsigned long long)root);
5024                 }
5025                 back->node.found_extent_tree = 1;
5026         }
5027         check_extent_type(rec);
5028         maybe_free_extent_rec(extent_cache, rec);
5029         return 0;
5030 }
5031
5032 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5033                             u64 parent, u64 root, u64 owner, u64 offset,
5034                             u32 num_refs, int found_ref, u64 max_size)
5035 {
5036         struct extent_record *rec;
5037         struct data_backref *back;
5038         struct cache_extent *cache;
5039         int ret;
5040
5041         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5042         if (!cache) {
5043                 struct extent_record tmpl;
5044
5045                 memset(&tmpl, 0, sizeof(tmpl));
5046                 tmpl.start = bytenr;
5047                 tmpl.nr = 1;
5048                 tmpl.max_size = max_size;
5049
5050                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5051                 if (ret)
5052                         return ret;
5053
5054                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5055                 if (!cache)
5056                         abort();
5057         }
5058
5059         rec = container_of(cache, struct extent_record, cache);
5060         if (rec->max_size < max_size)
5061                 rec->max_size = max_size;
5062
5063         /*
5064          * If found_ref is set then max_size is the real size and must match the
5065          * existing refs.  So if we have already found a ref then we need to
5066          * make sure that this ref matches the existing one, otherwise we need
5067          * to add a new backref so we can notice that the backrefs don't match
5068          * and we need to figure out who is telling the truth.  This is to
5069          * account for that awful fsync bug I introduced where we'd end up with
5070          * a btrfs_file_extent_item that would have its length include multiple
5071          * prealloc extents or point inside of a prealloc extent.
5072          */
5073         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5074                                  bytenr, max_size);
5075         if (!back) {
5076                 back = alloc_data_backref(rec, parent, root, owner, offset,
5077                                           max_size);
5078                 BUG_ON(!back);
5079         }
5080
5081         if (found_ref) {
5082                 BUG_ON(num_refs != 1);
5083                 if (back->node.found_ref)
5084                         BUG_ON(back->bytes != max_size);
5085                 back->node.found_ref = 1;
5086                 back->found_ref += 1;
5087                 back->bytes = max_size;
5088                 back->disk_bytenr = bytenr;
5089                 rec->refs += 1;
5090                 rec->content_checked = 1;
5091                 rec->owner_ref_checked = 1;
5092         } else {
5093                 if (back->node.found_extent_tree) {
5094                         fprintf(stderr, "Extent back ref already exists "
5095                                 "for %llu parent %llu root %llu "
5096                                 "owner %llu offset %llu num_refs %lu\n",
5097                                 (unsigned long long)bytenr,
5098                                 (unsigned long long)parent,
5099                                 (unsigned long long)root,
5100                                 (unsigned long long)owner,
5101                                 (unsigned long long)offset,
5102                                 (unsigned long)num_refs);
5103                 }
5104                 back->num_refs = num_refs;
5105                 back->node.found_extent_tree = 1;
5106         }
5107         maybe_free_extent_rec(extent_cache, rec);
5108         return 0;
5109 }
5110
5111 static int add_pending(struct cache_tree *pending,
5112                        struct cache_tree *seen, u64 bytenr, u32 size)
5113 {
5114         int ret;
5115         ret = add_cache_extent(seen, bytenr, size);
5116         if (ret)
5117                 return ret;
5118         add_cache_extent(pending, bytenr, size);
5119         return 0;
5120 }
5121
5122 static int pick_next_pending(struct cache_tree *pending,
5123                         struct cache_tree *reada,
5124                         struct cache_tree *nodes,
5125                         u64 last, struct block_info *bits, int bits_nr,
5126                         int *reada_bits)
5127 {
5128         unsigned long node_start = last;
5129         struct cache_extent *cache;
5130         int ret;
5131
5132         cache = search_cache_extent(reada, 0);
5133         if (cache) {
5134                 bits[0].start = cache->start;
5135                 bits[0].size = cache->size;
5136                 *reada_bits = 1;
5137                 return 1;
5138         }
5139         *reada_bits = 0;
5140         if (node_start > 32768)
5141                 node_start -= 32768;
5142
5143         cache = search_cache_extent(nodes, node_start);
5144         if (!cache)
5145                 cache = search_cache_extent(nodes, 0);
5146
5147         if (!cache) {
5148                  cache = search_cache_extent(pending, 0);
5149                  if (!cache)
5150                          return 0;
5151                  ret = 0;
5152                  do {
5153                          bits[ret].start = cache->start;
5154                          bits[ret].size = cache->size;
5155                          cache = next_cache_extent(cache);
5156                          ret++;
5157                  } while (cache && ret < bits_nr);
5158                  return ret;
5159         }
5160
5161         ret = 0;
5162         do {
5163                 bits[ret].start = cache->start;
5164                 bits[ret].size = cache->size;
5165                 cache = next_cache_extent(cache);
5166                 ret++;
5167         } while (cache && ret < bits_nr);
5168
5169         if (bits_nr - ret > 8) {
5170                 u64 lookup = bits[0].start + bits[0].size;
5171                 struct cache_extent *next;
5172                 next = search_cache_extent(pending, lookup);
5173                 while(next) {
5174                         if (next->start - lookup > 32768)
5175                                 break;
5176                         bits[ret].start = next->start;
5177                         bits[ret].size = next->size;
5178                         lookup = next->start + next->size;
5179                         ret++;
5180                         if (ret == bits_nr)
5181                                 break;
5182                         next = next_cache_extent(next);
5183                         if (!next)
5184                                 break;
5185                 }
5186         }
5187         return ret;
5188 }
5189
5190 static void free_chunk_record(struct cache_extent *cache)
5191 {
5192         struct chunk_record *rec;
5193
5194         rec = container_of(cache, struct chunk_record, cache);
5195         list_del_init(&rec->list);
5196         list_del_init(&rec->dextents);
5197         free(rec);
5198 }
5199
5200 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5201 {
5202         cache_tree_free_extents(chunk_cache, free_chunk_record);
5203 }
5204
5205 static void free_device_record(struct rb_node *node)
5206 {
5207         struct device_record *rec;
5208
5209         rec = container_of(node, struct device_record, node);
5210         free(rec);
5211 }
5212
5213 FREE_RB_BASED_TREE(device_cache, free_device_record);
5214
5215 int insert_block_group_record(struct block_group_tree *tree,
5216                               struct block_group_record *bg_rec)
5217 {
5218         int ret;
5219
5220         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5221         if (ret)
5222                 return ret;
5223
5224         list_add_tail(&bg_rec->list, &tree->block_groups);
5225         return 0;
5226 }
5227
5228 static void free_block_group_record(struct cache_extent *cache)
5229 {
5230         struct block_group_record *rec;
5231
5232         rec = container_of(cache, struct block_group_record, cache);
5233         list_del_init(&rec->list);
5234         free(rec);
5235 }
5236
5237 void free_block_group_tree(struct block_group_tree *tree)
5238 {
5239         cache_tree_free_extents(&tree->tree, free_block_group_record);
5240 }
5241
5242 int insert_device_extent_record(struct device_extent_tree *tree,
5243                                 struct device_extent_record *de_rec)
5244 {
5245         int ret;
5246
5247         /*
5248          * Device extent is a bit different from the other extents, because
5249          * the extents which belong to the different devices may have the
5250          * same start and size, so we need use the special extent cache
5251          * search/insert functions.
5252          */
5253         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5254         if (ret)
5255                 return ret;
5256
5257         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5258         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5259         return 0;
5260 }
5261
5262 static void free_device_extent_record(struct cache_extent *cache)
5263 {
5264         struct device_extent_record *rec;
5265
5266         rec = container_of(cache, struct device_extent_record, cache);
5267         if (!list_empty(&rec->chunk_list))
5268                 list_del_init(&rec->chunk_list);
5269         if (!list_empty(&rec->device_list))
5270                 list_del_init(&rec->device_list);
5271         free(rec);
5272 }
5273
5274 void free_device_extent_tree(struct device_extent_tree *tree)
5275 {
5276         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5277 }
5278
5279 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5280 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5281                                  struct extent_buffer *leaf, int slot)
5282 {
5283         struct btrfs_extent_ref_v0 *ref0;
5284         struct btrfs_key key;
5285         int ret;
5286
5287         btrfs_item_key_to_cpu(leaf, &key, slot);
5288         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5289         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5290                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5291                                 0, 0);
5292         } else {
5293                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5294                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5295         }
5296         return ret;
5297 }
5298 #endif
5299
5300 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5301                                             struct btrfs_key *key,
5302                                             int slot)
5303 {
5304         struct btrfs_chunk *ptr;
5305         struct chunk_record *rec;
5306         int num_stripes, i;
5307
5308         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5309         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5310
5311         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5312         if (!rec) {
5313                 fprintf(stderr, "memory allocation failed\n");
5314                 exit(-1);
5315         }
5316
5317         INIT_LIST_HEAD(&rec->list);
5318         INIT_LIST_HEAD(&rec->dextents);
5319         rec->bg_rec = NULL;
5320
5321         rec->cache.start = key->offset;
5322         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5323
5324         rec->generation = btrfs_header_generation(leaf);
5325
5326         rec->objectid = key->objectid;
5327         rec->type = key->type;
5328         rec->offset = key->offset;
5329
5330         rec->length = rec->cache.size;
5331         rec->owner = btrfs_chunk_owner(leaf, ptr);
5332         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5333         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5334         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5335         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5336         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5337         rec->num_stripes = num_stripes;
5338         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5339
5340         for (i = 0; i < rec->num_stripes; ++i) {
5341                 rec->stripes[i].devid =
5342                         btrfs_stripe_devid_nr(leaf, ptr, i);
5343                 rec->stripes[i].offset =
5344                         btrfs_stripe_offset_nr(leaf, ptr, i);
5345                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5346                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5347                                 BTRFS_UUID_SIZE);
5348         }
5349
5350         return rec;
5351 }
5352
5353 static int process_chunk_item(struct cache_tree *chunk_cache,
5354                               struct btrfs_key *key, struct extent_buffer *eb,
5355                               int slot)
5356 {
5357         struct chunk_record *rec;
5358         struct btrfs_chunk *chunk;
5359         int ret = 0;
5360
5361         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5362         /*
5363          * Do extra check for this chunk item,
5364          *
5365          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5366          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5367          * and owner<->key_type check.
5368          */
5369         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5370                                       key->offset);
5371         if (ret < 0) {
5372                 error("chunk(%llu, %llu) is not valid, ignore it",
5373                       key->offset, btrfs_chunk_length(eb, chunk));
5374                 return 0;
5375         }
5376         rec = btrfs_new_chunk_record(eb, key, slot);
5377         ret = insert_cache_extent(chunk_cache, &rec->cache);
5378         if (ret) {
5379                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5380                         rec->offset, rec->length);
5381                 free(rec);
5382         }
5383
5384         return ret;
5385 }
5386
5387 static int process_device_item(struct rb_root *dev_cache,
5388                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5389 {
5390         struct btrfs_dev_item *ptr;
5391         struct device_record *rec;
5392         int ret = 0;
5393
5394         ptr = btrfs_item_ptr(eb,
5395                 slot, struct btrfs_dev_item);
5396
5397         rec = malloc(sizeof(*rec));
5398         if (!rec) {
5399                 fprintf(stderr, "memory allocation failed\n");
5400                 return -ENOMEM;
5401         }
5402
5403         rec->devid = key->offset;
5404         rec->generation = btrfs_header_generation(eb);
5405
5406         rec->objectid = key->objectid;
5407         rec->type = key->type;
5408         rec->offset = key->offset;
5409
5410         rec->devid = btrfs_device_id(eb, ptr);
5411         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5412         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5413
5414         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5415         if (ret) {
5416                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5417                 free(rec);
5418         }
5419
5420         return ret;
5421 }
5422
5423 struct block_group_record *
5424 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5425                              int slot)
5426 {
5427         struct btrfs_block_group_item *ptr;
5428         struct block_group_record *rec;
5429
5430         rec = calloc(1, sizeof(*rec));
5431         if (!rec) {
5432                 fprintf(stderr, "memory allocation failed\n");
5433                 exit(-1);
5434         }
5435
5436         rec->cache.start = key->objectid;
5437         rec->cache.size = key->offset;
5438
5439         rec->generation = btrfs_header_generation(leaf);
5440
5441         rec->objectid = key->objectid;
5442         rec->type = key->type;
5443         rec->offset = key->offset;
5444
5445         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5446         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5447
5448         INIT_LIST_HEAD(&rec->list);
5449
5450         return rec;
5451 }
5452
5453 static int process_block_group_item(struct block_group_tree *block_group_cache,
5454                                     struct btrfs_key *key,
5455                                     struct extent_buffer *eb, int slot)
5456 {
5457         struct block_group_record *rec;
5458         int ret = 0;
5459
5460         rec = btrfs_new_block_group_record(eb, key, slot);
5461         ret = insert_block_group_record(block_group_cache, rec);
5462         if (ret) {
5463                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5464                         rec->objectid, rec->offset);
5465                 free(rec);
5466         }
5467
5468         return ret;
5469 }
5470
5471 struct device_extent_record *
5472 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5473                                struct btrfs_key *key, int slot)
5474 {
5475         struct device_extent_record *rec;
5476         struct btrfs_dev_extent *ptr;
5477
5478         rec = calloc(1, sizeof(*rec));
5479         if (!rec) {
5480                 fprintf(stderr, "memory allocation failed\n");
5481                 exit(-1);
5482         }
5483
5484         rec->cache.objectid = key->objectid;
5485         rec->cache.start = key->offset;
5486
5487         rec->generation = btrfs_header_generation(leaf);
5488
5489         rec->objectid = key->objectid;
5490         rec->type = key->type;
5491         rec->offset = key->offset;
5492
5493         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5494         rec->chunk_objecteid =
5495                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5496         rec->chunk_offset =
5497                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5498         rec->length = btrfs_dev_extent_length(leaf, ptr);
5499         rec->cache.size = rec->length;
5500
5501         INIT_LIST_HEAD(&rec->chunk_list);
5502         INIT_LIST_HEAD(&rec->device_list);
5503
5504         return rec;
5505 }
5506
5507 static int
5508 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5509                            struct btrfs_key *key, struct extent_buffer *eb,
5510                            int slot)
5511 {
5512         struct device_extent_record *rec;
5513         int ret;
5514
5515         rec = btrfs_new_device_extent_record(eb, key, slot);
5516         ret = insert_device_extent_record(dev_extent_cache, rec);
5517         if (ret) {
5518                 fprintf(stderr,
5519                         "Device extent[%llu, %llu, %llu] existed.\n",
5520                         rec->objectid, rec->offset, rec->length);
5521                 free(rec);
5522         }
5523
5524         return ret;
5525 }
5526
5527 static int process_extent_item(struct btrfs_root *root,
5528                                struct cache_tree *extent_cache,
5529                                struct extent_buffer *eb, int slot)
5530 {
5531         struct btrfs_extent_item *ei;
5532         struct btrfs_extent_inline_ref *iref;
5533         struct btrfs_extent_data_ref *dref;
5534         struct btrfs_shared_data_ref *sref;
5535         struct btrfs_key key;
5536         struct extent_record tmpl;
5537         unsigned long end;
5538         unsigned long ptr;
5539         int ret;
5540         int type;
5541         u32 item_size = btrfs_item_size_nr(eb, slot);
5542         u64 refs = 0;
5543         u64 offset;
5544         u64 num_bytes;
5545         int metadata = 0;
5546
5547         btrfs_item_key_to_cpu(eb, &key, slot);
5548
5549         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5550                 metadata = 1;
5551                 num_bytes = root->nodesize;
5552         } else {
5553                 num_bytes = key.offset;
5554         }
5555
5556         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5557                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5558                       key.objectid, root->sectorsize);
5559                 return -EIO;
5560         }
5561         if (item_size < sizeof(*ei)) {
5562 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5563                 struct btrfs_extent_item_v0 *ei0;
5564                 BUG_ON(item_size != sizeof(*ei0));
5565                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5566                 refs = btrfs_extent_refs_v0(eb, ei0);
5567 #else
5568                 BUG();
5569 #endif
5570                 memset(&tmpl, 0, sizeof(tmpl));
5571                 tmpl.start = key.objectid;
5572                 tmpl.nr = num_bytes;
5573                 tmpl.extent_item_refs = refs;
5574                 tmpl.metadata = metadata;
5575                 tmpl.found_rec = 1;
5576                 tmpl.max_size = num_bytes;
5577
5578                 return add_extent_rec(extent_cache, &tmpl);
5579         }
5580
5581         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5582         refs = btrfs_extent_refs(eb, ei);
5583         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5584                 metadata = 1;
5585         else
5586                 metadata = 0;
5587         if (metadata && num_bytes != root->nodesize) {
5588                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5589                       num_bytes, root->nodesize);
5590                 return -EIO;
5591         }
5592         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5593                 error("ignore invalid data extent, length %llu is not aligned to %u",
5594                       num_bytes, root->sectorsize);
5595                 return -EIO;
5596         }
5597
5598         memset(&tmpl, 0, sizeof(tmpl));
5599         tmpl.start = key.objectid;
5600         tmpl.nr = num_bytes;
5601         tmpl.extent_item_refs = refs;
5602         tmpl.metadata = metadata;
5603         tmpl.found_rec = 1;
5604         tmpl.max_size = num_bytes;
5605         add_extent_rec(extent_cache, &tmpl);
5606
5607         ptr = (unsigned long)(ei + 1);
5608         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5609             key.type == BTRFS_EXTENT_ITEM_KEY)
5610                 ptr += sizeof(struct btrfs_tree_block_info);
5611
5612         end = (unsigned long)ei + item_size;
5613         while (ptr < end) {
5614                 iref = (struct btrfs_extent_inline_ref *)ptr;
5615                 type = btrfs_extent_inline_ref_type(eb, iref);
5616                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5617                 switch (type) {
5618                 case BTRFS_TREE_BLOCK_REF_KEY:
5619                         ret = add_tree_backref(extent_cache, key.objectid,
5620                                         0, offset, 0);
5621                         if (ret < 0)
5622                                 error("add_tree_backref failed: %s",
5623                                       strerror(-ret));
5624                         break;
5625                 case BTRFS_SHARED_BLOCK_REF_KEY:
5626                         ret = add_tree_backref(extent_cache, key.objectid,
5627                                         offset, 0, 0);
5628                         if (ret < 0)
5629                                 error("add_tree_backref failed: %s",
5630                                       strerror(-ret));
5631                         break;
5632                 case BTRFS_EXTENT_DATA_REF_KEY:
5633                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5634                         add_data_backref(extent_cache, key.objectid, 0,
5635                                         btrfs_extent_data_ref_root(eb, dref),
5636                                         btrfs_extent_data_ref_objectid(eb,
5637                                                                        dref),
5638                                         btrfs_extent_data_ref_offset(eb, dref),
5639                                         btrfs_extent_data_ref_count(eb, dref),
5640                                         0, num_bytes);
5641                         break;
5642                 case BTRFS_SHARED_DATA_REF_KEY:
5643                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5644                         add_data_backref(extent_cache, key.objectid, offset,
5645                                         0, 0, 0,
5646                                         btrfs_shared_data_ref_count(eb, sref),
5647                                         0, num_bytes);
5648                         break;
5649                 default:
5650                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5651                                 key.objectid, key.type, num_bytes);
5652                         goto out;
5653                 }
5654                 ptr += btrfs_extent_inline_ref_size(type);
5655         }
5656         WARN_ON(ptr > end);
5657 out:
5658         return 0;
5659 }
5660
5661 static int check_cache_range(struct btrfs_root *root,
5662                              struct btrfs_block_group_cache *cache,
5663                              u64 offset, u64 bytes)
5664 {
5665         struct btrfs_free_space *entry;
5666         u64 *logical;
5667         u64 bytenr;
5668         int stripe_len;
5669         int i, nr, ret;
5670
5671         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5672                 bytenr = btrfs_sb_offset(i);
5673                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5674                                        cache->key.objectid, bytenr, 0,
5675                                        &logical, &nr, &stripe_len);
5676                 if (ret)
5677                         return ret;
5678
5679                 while (nr--) {
5680                         if (logical[nr] + stripe_len <= offset)
5681                                 continue;
5682                         if (offset + bytes <= logical[nr])
5683                                 continue;
5684                         if (logical[nr] == offset) {
5685                                 if (stripe_len >= bytes) {
5686                                         free(logical);
5687                                         return 0;
5688                                 }
5689                                 bytes -= stripe_len;
5690                                 offset += stripe_len;
5691                         } else if (logical[nr] < offset) {
5692                                 if (logical[nr] + stripe_len >=
5693                                     offset + bytes) {
5694                                         free(logical);
5695                                         return 0;
5696                                 }
5697                                 bytes = (offset + bytes) -
5698                                         (logical[nr] + stripe_len);
5699                                 offset = logical[nr] + stripe_len;
5700                         } else {
5701                                 /*
5702                                  * Could be tricky, the super may land in the
5703                                  * middle of the area we're checking.  First
5704                                  * check the easiest case, it's at the end.
5705                                  */
5706                                 if (logical[nr] + stripe_len >=
5707                                     bytes + offset) {
5708                                         bytes = logical[nr] - offset;
5709                                         continue;
5710                                 }
5711
5712                                 /* Check the left side */
5713                                 ret = check_cache_range(root, cache,
5714                                                         offset,
5715                                                         logical[nr] - offset);
5716                                 if (ret) {
5717                                         free(logical);
5718                                         return ret;
5719                                 }
5720
5721                                 /* Now we continue with the right side */
5722                                 bytes = (offset + bytes) -
5723                                         (logical[nr] + stripe_len);
5724                                 offset = logical[nr] + stripe_len;
5725                         }
5726                 }
5727
5728                 free(logical);
5729         }
5730
5731         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5732         if (!entry) {
5733                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5734                         offset, offset+bytes);
5735                 return -EINVAL;
5736         }
5737
5738         if (entry->offset != offset) {
5739                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5740                         entry->offset);
5741                 return -EINVAL;
5742         }
5743
5744         if (entry->bytes != bytes) {
5745                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5746                         bytes, entry->bytes, offset);
5747                 return -EINVAL;
5748         }
5749
5750         unlink_free_space(cache->free_space_ctl, entry);
5751         free(entry);
5752         return 0;
5753 }
5754
5755 static int verify_space_cache(struct btrfs_root *root,
5756                               struct btrfs_block_group_cache *cache)
5757 {
5758         struct btrfs_path path;
5759         struct extent_buffer *leaf;
5760         struct btrfs_key key;
5761         u64 last;
5762         int ret = 0;
5763
5764         root = root->fs_info->extent_root;
5765
5766         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5767
5768         btrfs_init_path(&path);
5769         key.objectid = last;
5770         key.offset = 0;
5771         key.type = BTRFS_EXTENT_ITEM_KEY;
5772         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5773         if (ret < 0)
5774                 goto out;
5775         ret = 0;
5776         while (1) {
5777                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5778                         ret = btrfs_next_leaf(root, &path);
5779                         if (ret < 0)
5780                                 goto out;
5781                         if (ret > 0) {
5782                                 ret = 0;
5783                                 break;
5784                         }
5785                 }
5786                 leaf = path.nodes[0];
5787                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5788                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5789                         break;
5790                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5791                     key.type != BTRFS_METADATA_ITEM_KEY) {
5792                         path.slots[0]++;
5793                         continue;
5794                 }
5795
5796                 if (last == key.objectid) {
5797                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5798                                 last = key.objectid + key.offset;
5799                         else
5800                                 last = key.objectid + root->nodesize;
5801                         path.slots[0]++;
5802                         continue;
5803                 }
5804
5805                 ret = check_cache_range(root, cache, last,
5806                                         key.objectid - last);
5807                 if (ret)
5808                         break;
5809                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5810                         last = key.objectid + key.offset;
5811                 else
5812                         last = key.objectid + root->nodesize;
5813                 path.slots[0]++;
5814         }
5815
5816         if (last < cache->key.objectid + cache->key.offset)
5817                 ret = check_cache_range(root, cache, last,
5818                                         cache->key.objectid +
5819                                         cache->key.offset - last);
5820
5821 out:
5822         btrfs_release_path(&path);
5823
5824         if (!ret &&
5825             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5826                 fprintf(stderr, "There are still entries left in the space "
5827                         "cache\n");
5828                 ret = -EINVAL;
5829         }
5830
5831         return ret;
5832 }
5833
5834 static int check_space_cache(struct btrfs_root *root)
5835 {
5836         struct btrfs_block_group_cache *cache;
5837         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5838         int ret;
5839         int error = 0;
5840
5841         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5842             btrfs_super_generation(root->fs_info->super_copy) !=
5843             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5844                 printf("cache and super generation don't match, space cache "
5845                        "will be invalidated\n");
5846                 return 0;
5847         }
5848
5849         if (ctx.progress_enabled) {
5850                 ctx.tp = TASK_FREE_SPACE;
5851                 task_start(ctx.info);
5852         }
5853
5854         while (1) {
5855                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5856                 if (!cache)
5857                         break;
5858
5859                 start = cache->key.objectid + cache->key.offset;
5860                 if (!cache->free_space_ctl) {
5861                         if (btrfs_init_free_space_ctl(cache,
5862                                                       root->sectorsize)) {
5863                                 ret = -ENOMEM;
5864                                 break;
5865                         }
5866                 } else {
5867                         btrfs_remove_free_space_cache(cache);
5868                 }
5869
5870                 if (btrfs_fs_compat_ro(root->fs_info,
5871                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5872                         ret = exclude_super_stripes(root, cache);
5873                         if (ret) {
5874                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5875                                         strerror(-ret));
5876                                 error++;
5877                                 continue;
5878                         }
5879                         ret = load_free_space_tree(root->fs_info, cache);
5880                         free_excluded_extents(root, cache);
5881                         if (ret < 0) {
5882                                 fprintf(stderr, "could not load free space tree: %s\n",
5883                                         strerror(-ret));
5884                                 error++;
5885                                 continue;
5886                         }
5887                         error += ret;
5888                 } else {
5889                         ret = load_free_space_cache(root->fs_info, cache);
5890                         if (!ret)
5891                                 continue;
5892                 }
5893
5894                 ret = verify_space_cache(root, cache);
5895                 if (ret) {
5896                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5897                                 cache->key.objectid);
5898                         error++;
5899                 }
5900         }
5901
5902         task_stop(ctx.info);
5903
5904         return error ? -EINVAL : 0;
5905 }
5906
5907 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5908                         u64 num_bytes, unsigned long leaf_offset,
5909                         struct extent_buffer *eb) {
5910
5911         u64 offset = 0;
5912         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5913         char *data;
5914         unsigned long csum_offset;
5915         u32 csum;
5916         u32 csum_expected;
5917         u64 read_len;
5918         u64 data_checked = 0;
5919         u64 tmp;
5920         int ret = 0;
5921         int mirror;
5922         int num_copies;
5923
5924         if (num_bytes % root->sectorsize)
5925                 return -EINVAL;
5926
5927         data = malloc(num_bytes);
5928         if (!data)
5929                 return -ENOMEM;
5930
5931         while (offset < num_bytes) {
5932                 mirror = 0;
5933 again:
5934                 read_len = num_bytes - offset;
5935                 /* read as much space once a time */
5936                 ret = read_extent_data(root, data + offset,
5937                                 bytenr + offset, &read_len, mirror);
5938                 if (ret)
5939                         goto out;
5940                 data_checked = 0;
5941                 /* verify every 4k data's checksum */
5942                 while (data_checked < read_len) {
5943                         csum = ~(u32)0;
5944                         tmp = offset + data_checked;
5945
5946                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5947                                                csum, root->sectorsize);
5948                         btrfs_csum_final(csum, (u8 *)&csum);
5949
5950                         csum_offset = leaf_offset +
5951                                  tmp / root->sectorsize * csum_size;
5952                         read_extent_buffer(eb, (char *)&csum_expected,
5953                                            csum_offset, csum_size);
5954                         /* try another mirror */
5955                         if (csum != csum_expected) {
5956                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5957                                                 mirror, bytenr + tmp,
5958                                                 csum, csum_expected);
5959                                 num_copies = btrfs_num_copies(
5960                                                 &root->fs_info->mapping_tree,
5961                                                 bytenr, num_bytes);
5962                                 if (mirror < num_copies - 1) {
5963                                         mirror += 1;
5964                                         goto again;
5965                                 }
5966                         }
5967                         data_checked += root->sectorsize;
5968                 }
5969                 offset += read_len;
5970         }
5971 out:
5972         free(data);
5973         return ret;
5974 }
5975
5976 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5977                                u64 num_bytes)
5978 {
5979         struct btrfs_path path;
5980         struct extent_buffer *leaf;
5981         struct btrfs_key key;
5982         int ret;
5983
5984         btrfs_init_path(&path);
5985         key.objectid = bytenr;
5986         key.type = BTRFS_EXTENT_ITEM_KEY;
5987         key.offset = (u64)-1;
5988
5989 again:
5990         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5991                                 0, 0);
5992         if (ret < 0) {
5993                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5994                 btrfs_release_path(&path);
5995                 return ret;
5996         } else if (ret) {
5997                 if (path.slots[0] > 0) {
5998                         path.slots[0]--;
5999                 } else {
6000                         ret = btrfs_prev_leaf(root, &path);
6001                         if (ret < 0) {
6002                                 goto out;
6003                         } else if (ret > 0) {
6004                                 ret = 0;
6005                                 goto out;
6006                         }
6007                 }
6008         }
6009
6010         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6011
6012         /*
6013          * Block group items come before extent items if they have the same
6014          * bytenr, so walk back one more just in case.  Dear future traveller,
6015          * first congrats on mastering time travel.  Now if it's not too much
6016          * trouble could you go back to 2006 and tell Chris to make the
6017          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6018          * EXTENT_ITEM_KEY please?
6019          */
6020         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6021                 if (path.slots[0] > 0) {
6022                         path.slots[0]--;
6023                 } else {
6024                         ret = btrfs_prev_leaf(root, &path);
6025                         if (ret < 0) {
6026                                 goto out;
6027                         } else if (ret > 0) {
6028                                 ret = 0;
6029                                 goto out;
6030                         }
6031                 }
6032                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6033         }
6034
6035         while (num_bytes) {
6036                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6037                         ret = btrfs_next_leaf(root, &path);
6038                         if (ret < 0) {
6039                                 fprintf(stderr, "Error going to next leaf "
6040                                         "%d\n", ret);
6041                                 btrfs_release_path(&path);
6042                                 return ret;
6043                         } else if (ret) {
6044                                 break;
6045                         }
6046                 }
6047                 leaf = path.nodes[0];
6048                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6049                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6050                         path.slots[0]++;
6051                         continue;
6052                 }
6053                 if (key.objectid + key.offset < bytenr) {
6054                         path.slots[0]++;
6055                         continue;
6056                 }
6057                 if (key.objectid > bytenr + num_bytes)
6058                         break;
6059
6060                 if (key.objectid == bytenr) {
6061                         if (key.offset >= num_bytes) {
6062                                 num_bytes = 0;
6063                                 break;
6064                         }
6065                         num_bytes -= key.offset;
6066                         bytenr += key.offset;
6067                 } else if (key.objectid < bytenr) {
6068                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6069                                 num_bytes = 0;
6070                                 break;
6071                         }
6072                         num_bytes = (bytenr + num_bytes) -
6073                                 (key.objectid + key.offset);
6074                         bytenr = key.objectid + key.offset;
6075                 } else {
6076                         if (key.objectid + key.offset < bytenr + num_bytes) {
6077                                 u64 new_start = key.objectid + key.offset;
6078                                 u64 new_bytes = bytenr + num_bytes - new_start;
6079
6080                                 /*
6081                                  * Weird case, the extent is in the middle of
6082                                  * our range, we'll have to search one side
6083                                  * and then the other.  Not sure if this happens
6084                                  * in real life, but no harm in coding it up
6085                                  * anyway just in case.
6086                                  */
6087                                 btrfs_release_path(&path);
6088                                 ret = check_extent_exists(root, new_start,
6089                                                           new_bytes);
6090                                 if (ret) {
6091                                         fprintf(stderr, "Right section didn't "
6092                                                 "have a record\n");
6093                                         break;
6094                                 }
6095                                 num_bytes = key.objectid - bytenr;
6096                                 goto again;
6097                         }
6098                         num_bytes = key.objectid - bytenr;
6099                 }
6100                 path.slots[0]++;
6101         }
6102         ret = 0;
6103
6104 out:
6105         if (num_bytes && !ret) {
6106                 fprintf(stderr, "There are no extents for csum range "
6107                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6108                 ret = 1;
6109         }
6110
6111         btrfs_release_path(&path);
6112         return ret;
6113 }
6114
6115 static int check_csums(struct btrfs_root *root)
6116 {
6117         struct btrfs_path path;
6118         struct extent_buffer *leaf;
6119         struct btrfs_key key;
6120         u64 offset = 0, num_bytes = 0;
6121         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6122         int errors = 0;
6123         int ret;
6124         u64 data_len;
6125         unsigned long leaf_offset;
6126
6127         root = root->fs_info->csum_root;
6128         if (!extent_buffer_uptodate(root->node)) {
6129                 fprintf(stderr, "No valid csum tree found\n");
6130                 return -ENOENT;
6131         }
6132
6133         btrfs_init_path(&path);
6134         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6135         key.type = BTRFS_EXTENT_CSUM_KEY;
6136         key.offset = 0;
6137         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6138         if (ret < 0) {
6139                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6140                 btrfs_release_path(&path);
6141                 return ret;
6142         }
6143
6144         if (ret > 0 && path.slots[0])
6145                 path.slots[0]--;
6146         ret = 0;
6147
6148         while (1) {
6149                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6150                         ret = btrfs_next_leaf(root, &path);
6151                         if (ret < 0) {
6152                                 fprintf(stderr, "Error going to next leaf "
6153                                         "%d\n", ret);
6154                                 break;
6155                         }
6156                         if (ret)
6157                                 break;
6158                 }
6159                 leaf = path.nodes[0];
6160
6161                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6162                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6163                         path.slots[0]++;
6164                         continue;
6165                 }
6166
6167                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6168                               csum_size) * root->sectorsize;
6169                 if (!check_data_csum)
6170                         goto skip_csum_check;
6171                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6172                 ret = check_extent_csums(root, key.offset, data_len,
6173                                          leaf_offset, leaf);
6174                 if (ret)
6175                         break;
6176 skip_csum_check:
6177                 if (!num_bytes) {
6178                         offset = key.offset;
6179                 } else if (key.offset != offset + num_bytes) {
6180                         ret = check_extent_exists(root, offset, num_bytes);
6181                         if (ret) {
6182                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6183                                         "there is no extent record\n",
6184                                         offset, offset+num_bytes);
6185                                 errors++;
6186                         }
6187                         offset = key.offset;
6188                         num_bytes = 0;
6189                 }
6190                 num_bytes += data_len;
6191                 path.slots[0]++;
6192         }
6193
6194         btrfs_release_path(&path);
6195         return errors;
6196 }
6197
6198 static int is_dropped_key(struct btrfs_key *key,
6199                           struct btrfs_key *drop_key) {
6200         if (key->objectid < drop_key->objectid)
6201                 return 1;
6202         else if (key->objectid == drop_key->objectid) {
6203                 if (key->type < drop_key->type)
6204                         return 1;
6205                 else if (key->type == drop_key->type) {
6206                         if (key->offset < drop_key->offset)
6207                                 return 1;
6208                 }
6209         }
6210         return 0;
6211 }
6212
6213 /*
6214  * Here are the rules for FULL_BACKREF.
6215  *
6216  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6217  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6218  *      FULL_BACKREF set.
6219  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6220  *    if it happened after the relocation occurred since we'll have dropped the
6221  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6222  *    have no real way to know for sure.
6223  *
6224  * We process the blocks one root at a time, and we start from the lowest root
6225  * objectid and go to the highest.  So we can just lookup the owner backref for
6226  * the record and if we don't find it then we know it doesn't exist and we have
6227  * a FULL BACKREF.
6228  *
6229  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6230  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6231  * be set or not and then we can check later once we've gathered all the refs.
6232  */
6233 static int calc_extent_flag(struct btrfs_root *root,
6234                            struct cache_tree *extent_cache,
6235                            struct extent_buffer *buf,
6236                            struct root_item_record *ri,
6237                            u64 *flags)
6238 {
6239         struct extent_record *rec;
6240         struct cache_extent *cache;
6241         struct tree_backref *tback;
6242         u64 owner = 0;
6243
6244         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6245         /* we have added this extent before */
6246         if (!cache)
6247                 return -ENOENT;
6248
6249         rec = container_of(cache, struct extent_record, cache);
6250
6251         /*
6252          * Except file/reloc tree, we can not have
6253          * FULL BACKREF MODE
6254          */
6255         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6256                 goto normal;
6257         /*
6258          * root node
6259          */
6260         if (buf->start == ri->bytenr)
6261                 goto normal;
6262
6263         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6264                 goto full_backref;
6265
6266         owner = btrfs_header_owner(buf);
6267         if (owner == ri->objectid)
6268                 goto normal;
6269
6270         tback = find_tree_backref(rec, 0, owner);
6271         if (!tback)
6272                 goto full_backref;
6273 normal:
6274         *flags = 0;
6275         if (rec->flag_block_full_backref != FLAG_UNSET &&
6276             rec->flag_block_full_backref != 0)
6277                 rec->bad_full_backref = 1;
6278         return 0;
6279 full_backref:
6280         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6281         if (rec->flag_block_full_backref != FLAG_UNSET &&
6282             rec->flag_block_full_backref != 1)
6283                 rec->bad_full_backref = 1;
6284         return 0;
6285 }
6286
6287 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6288 {
6289         fprintf(stderr, "Invalid key type(");
6290         print_key_type(stderr, 0, key_type);
6291         fprintf(stderr, ") found in root(");
6292         print_objectid(stderr, rootid, 0);
6293         fprintf(stderr, ")\n");
6294 }
6295
6296 /*
6297  * Check if the key is valid with its extent buffer.
6298  *
6299  * This is a early check in case invalid key exists in a extent buffer
6300  * This is not comprehensive yet, but should prevent wrong key/item passed
6301  * further
6302  */
6303 static int check_type_with_root(u64 rootid, u8 key_type)
6304 {
6305         switch (key_type) {
6306         /* Only valid in chunk tree */
6307         case BTRFS_DEV_ITEM_KEY:
6308         case BTRFS_CHUNK_ITEM_KEY:
6309                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6310                         goto err;
6311                 break;
6312         /* valid in csum and log tree */
6313         case BTRFS_CSUM_TREE_OBJECTID:
6314                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6315                       is_fstree(rootid)))
6316                         goto err;
6317                 break;
6318         case BTRFS_EXTENT_ITEM_KEY:
6319         case BTRFS_METADATA_ITEM_KEY:
6320         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6321                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6322                         goto err;
6323                 break;
6324         case BTRFS_ROOT_ITEM_KEY:
6325                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6326                         goto err;
6327                 break;
6328         case BTRFS_DEV_EXTENT_KEY:
6329                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6330                         goto err;
6331                 break;
6332         }
6333         return 0;
6334 err:
6335         report_mismatch_key_root(key_type, rootid);
6336         return -EINVAL;
6337 }
6338
6339 static int run_next_block(struct btrfs_root *root,
6340                           struct block_info *bits,
6341                           int bits_nr,
6342                           u64 *last,
6343                           struct cache_tree *pending,
6344                           struct cache_tree *seen,
6345                           struct cache_tree *reada,
6346                           struct cache_tree *nodes,
6347                           struct cache_tree *extent_cache,
6348                           struct cache_tree *chunk_cache,
6349                           struct rb_root *dev_cache,
6350                           struct block_group_tree *block_group_cache,
6351                           struct device_extent_tree *dev_extent_cache,
6352                           struct root_item_record *ri)
6353 {
6354         struct extent_buffer *buf;
6355         struct extent_record *rec = NULL;
6356         u64 bytenr;
6357         u32 size;
6358         u64 parent;
6359         u64 owner;
6360         u64 flags;
6361         u64 ptr;
6362         u64 gen = 0;
6363         int ret = 0;
6364         int i;
6365         int nritems;
6366         struct btrfs_key key;
6367         struct cache_extent *cache;
6368         int reada_bits;
6369
6370         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6371                                     bits_nr, &reada_bits);
6372         if (nritems == 0)
6373                 return 1;
6374
6375         if (!reada_bits) {
6376                 for(i = 0; i < nritems; i++) {
6377                         ret = add_cache_extent(reada, bits[i].start,
6378                                                bits[i].size);
6379                         if (ret == -EEXIST)
6380                                 continue;
6381
6382                         /* fixme, get the parent transid */
6383                         readahead_tree_block(root, bits[i].start,
6384                                              bits[i].size, 0);
6385                 }
6386         }
6387         *last = bits[0].start;
6388         bytenr = bits[0].start;
6389         size = bits[0].size;
6390
6391         cache = lookup_cache_extent(pending, bytenr, size);
6392         if (cache) {
6393                 remove_cache_extent(pending, cache);
6394                 free(cache);
6395         }
6396         cache = lookup_cache_extent(reada, bytenr, size);
6397         if (cache) {
6398                 remove_cache_extent(reada, cache);
6399                 free(cache);
6400         }
6401         cache = lookup_cache_extent(nodes, bytenr, size);
6402         if (cache) {
6403                 remove_cache_extent(nodes, cache);
6404                 free(cache);
6405         }
6406         cache = lookup_cache_extent(extent_cache, bytenr, size);
6407         if (cache) {
6408                 rec = container_of(cache, struct extent_record, cache);
6409                 gen = rec->parent_generation;
6410         }
6411
6412         /* fixme, get the real parent transid */
6413         buf = read_tree_block(root, bytenr, size, gen);
6414         if (!extent_buffer_uptodate(buf)) {
6415                 record_bad_block_io(root->fs_info,
6416                                     extent_cache, bytenr, size);
6417                 goto out;
6418         }
6419
6420         nritems = btrfs_header_nritems(buf);
6421
6422         flags = 0;
6423         if (!init_extent_tree) {
6424                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6425                                        btrfs_header_level(buf), 1, NULL,
6426                                        &flags);
6427                 if (ret < 0) {
6428                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6429                         if (ret < 0) {
6430                                 fprintf(stderr, "Couldn't calc extent flags\n");
6431                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6432                         }
6433                 }
6434         } else {
6435                 flags = 0;
6436                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6437                 if (ret < 0) {
6438                         fprintf(stderr, "Couldn't calc extent flags\n");
6439                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6440                 }
6441         }
6442
6443         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6444                 if (ri != NULL &&
6445                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6446                     ri->objectid == btrfs_header_owner(buf)) {
6447                         /*
6448                          * Ok we got to this block from it's original owner and
6449                          * we have FULL_BACKREF set.  Relocation can leave
6450                          * converted blocks over so this is altogether possible,
6451                          * however it's not possible if the generation > the
6452                          * last snapshot, so check for this case.
6453                          */
6454                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6455                             btrfs_header_generation(buf) > ri->last_snapshot) {
6456                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6457                                 rec->bad_full_backref = 1;
6458                         }
6459                 }
6460         } else {
6461                 if (ri != NULL &&
6462                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6463                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6464                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6465                         rec->bad_full_backref = 1;
6466                 }
6467         }
6468
6469         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6470                 rec->flag_block_full_backref = 1;
6471                 parent = bytenr;
6472                 owner = 0;
6473         } else {
6474                 rec->flag_block_full_backref = 0;
6475                 parent = 0;
6476                 owner = btrfs_header_owner(buf);
6477         }
6478
6479         ret = check_block(root, extent_cache, buf, flags);
6480         if (ret)
6481                 goto out;
6482
6483         if (btrfs_is_leaf(buf)) {
6484                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6485                 for (i = 0; i < nritems; i++) {
6486                         struct btrfs_file_extent_item *fi;
6487                         btrfs_item_key_to_cpu(buf, &key, i);
6488                         /*
6489                          * Check key type against the leaf owner.
6490                          * Could filter quite a lot of early error if
6491                          * owner is correct
6492                          */
6493                         if (check_type_with_root(btrfs_header_owner(buf),
6494                                                  key.type)) {
6495                                 fprintf(stderr, "ignoring invalid key\n");
6496                                 continue;
6497                         }
6498                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6499                                 process_extent_item(root, extent_cache, buf,
6500                                                     i);
6501                                 continue;
6502                         }
6503                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6504                                 process_extent_item(root, extent_cache, buf,
6505                                                     i);
6506                                 continue;
6507                         }
6508                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6509                                 total_csum_bytes +=
6510                                         btrfs_item_size_nr(buf, i);
6511                                 continue;
6512                         }
6513                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6514                                 process_chunk_item(chunk_cache, &key, buf, i);
6515                                 continue;
6516                         }
6517                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6518                                 process_device_item(dev_cache, &key, buf, i);
6519                                 continue;
6520                         }
6521                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6522                                 process_block_group_item(block_group_cache,
6523                                         &key, buf, i);
6524                                 continue;
6525                         }
6526                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6527                                 process_device_extent_item(dev_extent_cache,
6528                                         &key, buf, i);
6529                                 continue;
6530
6531                         }
6532                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6533 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6534                                 process_extent_ref_v0(extent_cache, buf, i);
6535 #else
6536                                 BUG();
6537 #endif
6538                                 continue;
6539                         }
6540
6541                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6542                                 ret = add_tree_backref(extent_cache,
6543                                                 key.objectid, 0, key.offset, 0);
6544                                 if (ret < 0)
6545                                         error("add_tree_backref failed: %s",
6546                                               strerror(-ret));
6547                                 continue;
6548                         }
6549                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6550                                 ret = add_tree_backref(extent_cache,
6551                                                 key.objectid, key.offset, 0, 0);
6552                                 if (ret < 0)
6553                                         error("add_tree_backref failed: %s",
6554                                               strerror(-ret));
6555                                 continue;
6556                         }
6557                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6558                                 struct btrfs_extent_data_ref *ref;
6559                                 ref = btrfs_item_ptr(buf, i,
6560                                                 struct btrfs_extent_data_ref);
6561                                 add_data_backref(extent_cache,
6562                                         key.objectid, 0,
6563                                         btrfs_extent_data_ref_root(buf, ref),
6564                                         btrfs_extent_data_ref_objectid(buf,
6565                                                                        ref),
6566                                         btrfs_extent_data_ref_offset(buf, ref),
6567                                         btrfs_extent_data_ref_count(buf, ref),
6568                                         0, root->sectorsize);
6569                                 continue;
6570                         }
6571                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6572                                 struct btrfs_shared_data_ref *ref;
6573                                 ref = btrfs_item_ptr(buf, i,
6574                                                 struct btrfs_shared_data_ref);
6575                                 add_data_backref(extent_cache,
6576                                         key.objectid, key.offset, 0, 0, 0,
6577                                         btrfs_shared_data_ref_count(buf, ref),
6578                                         0, root->sectorsize);
6579                                 continue;
6580                         }
6581                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6582                                 struct bad_item *bad;
6583
6584                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6585                                         continue;
6586                                 if (!owner)
6587                                         continue;
6588                                 bad = malloc(sizeof(struct bad_item));
6589                                 if (!bad)
6590                                         continue;
6591                                 INIT_LIST_HEAD(&bad->list);
6592                                 memcpy(&bad->key, &key,
6593                                        sizeof(struct btrfs_key));
6594                                 bad->root_id = owner;
6595                                 list_add_tail(&bad->list, &delete_items);
6596                                 continue;
6597                         }
6598                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6599                                 continue;
6600                         fi = btrfs_item_ptr(buf, i,
6601                                             struct btrfs_file_extent_item);
6602                         if (btrfs_file_extent_type(buf, fi) ==
6603                             BTRFS_FILE_EXTENT_INLINE)
6604                                 continue;
6605                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6606                                 continue;
6607
6608                         data_bytes_allocated +=
6609                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6610                         if (data_bytes_allocated < root->sectorsize) {
6611                                 abort();
6612                         }
6613                         data_bytes_referenced +=
6614                                 btrfs_file_extent_num_bytes(buf, fi);
6615                         add_data_backref(extent_cache,
6616                                 btrfs_file_extent_disk_bytenr(buf, fi),
6617                                 parent, owner, key.objectid, key.offset -
6618                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6619                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6620                 }
6621         } else {
6622                 int level;
6623                 struct btrfs_key first_key;
6624
6625                 first_key.objectid = 0;
6626
6627                 if (nritems > 0)
6628                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6629                 level = btrfs_header_level(buf);
6630                 for (i = 0; i < nritems; i++) {
6631                         struct extent_record tmpl;
6632
6633                         ptr = btrfs_node_blockptr(buf, i);
6634                         size = root->nodesize;
6635                         btrfs_node_key_to_cpu(buf, &key, i);
6636                         if (ri != NULL) {
6637                                 if ((level == ri->drop_level)
6638                                     && is_dropped_key(&key, &ri->drop_key)) {
6639                                         continue;
6640                                 }
6641                         }
6642
6643                         memset(&tmpl, 0, sizeof(tmpl));
6644                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6645                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6646                         tmpl.start = ptr;
6647                         tmpl.nr = size;
6648                         tmpl.refs = 1;
6649                         tmpl.metadata = 1;
6650                         tmpl.max_size = size;
6651                         ret = add_extent_rec(extent_cache, &tmpl);
6652                         if (ret < 0)
6653                                 goto out;
6654
6655                         ret = add_tree_backref(extent_cache, ptr, parent,
6656                                         owner, 1);
6657                         if (ret < 0) {
6658                                 error("add_tree_backref failed: %s",
6659                                       strerror(-ret));
6660                                 continue;
6661                         }
6662
6663                         if (level > 1) {
6664                                 add_pending(nodes, seen, ptr, size);
6665                         } else {
6666                                 add_pending(pending, seen, ptr, size);
6667                         }
6668                 }
6669                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6670                                       nritems) * sizeof(struct btrfs_key_ptr);
6671         }
6672         total_btree_bytes += buf->len;
6673         if (fs_root_objectid(btrfs_header_owner(buf)))
6674                 total_fs_tree_bytes += buf->len;
6675         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6676                 total_extent_tree_bytes += buf->len;
6677         if (!found_old_backref &&
6678             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6679             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6680             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6681                 found_old_backref = 1;
6682 out:
6683         free_extent_buffer(buf);
6684         return ret;
6685 }
6686
6687 static int add_root_to_pending(struct extent_buffer *buf,
6688                                struct cache_tree *extent_cache,
6689                                struct cache_tree *pending,
6690                                struct cache_tree *seen,
6691                                struct cache_tree *nodes,
6692                                u64 objectid)
6693 {
6694         struct extent_record tmpl;
6695         int ret;
6696
6697         if (btrfs_header_level(buf) > 0)
6698                 add_pending(nodes, seen, buf->start, buf->len);
6699         else
6700                 add_pending(pending, seen, buf->start, buf->len);
6701
6702         memset(&tmpl, 0, sizeof(tmpl));
6703         tmpl.start = buf->start;
6704         tmpl.nr = buf->len;
6705         tmpl.is_root = 1;
6706         tmpl.refs = 1;
6707         tmpl.metadata = 1;
6708         tmpl.max_size = buf->len;
6709         add_extent_rec(extent_cache, &tmpl);
6710
6711         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6712             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6713                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6714                                 0, 1);
6715         else
6716                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6717                                 1);
6718         return ret;
6719 }
6720
6721 /* as we fix the tree, we might be deleting blocks that
6722  * we're tracking for repair.  This hook makes sure we
6723  * remove any backrefs for blocks as we are fixing them.
6724  */
6725 static int free_extent_hook(struct btrfs_trans_handle *trans,
6726                             struct btrfs_root *root,
6727                             u64 bytenr, u64 num_bytes, u64 parent,
6728                             u64 root_objectid, u64 owner, u64 offset,
6729                             int refs_to_drop)
6730 {
6731         struct extent_record *rec;
6732         struct cache_extent *cache;
6733         int is_data;
6734         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6735
6736         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6737         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6738         if (!cache)
6739                 return 0;
6740
6741         rec = container_of(cache, struct extent_record, cache);
6742         if (is_data) {
6743                 struct data_backref *back;
6744                 back = find_data_backref(rec, parent, root_objectid, owner,
6745                                          offset, 1, bytenr, num_bytes);
6746                 if (!back)
6747                         goto out;
6748                 if (back->node.found_ref) {
6749                         back->found_ref -= refs_to_drop;
6750                         if (rec->refs)
6751                                 rec->refs -= refs_to_drop;
6752                 }
6753                 if (back->node.found_extent_tree) {
6754                         back->num_refs -= refs_to_drop;
6755                         if (rec->extent_item_refs)
6756                                 rec->extent_item_refs -= refs_to_drop;
6757                 }
6758                 if (back->found_ref == 0)
6759                         back->node.found_ref = 0;
6760                 if (back->num_refs == 0)
6761                         back->node.found_extent_tree = 0;
6762
6763                 if (!back->node.found_extent_tree && back->node.found_ref) {
6764                         list_del(&back->node.list);
6765                         free(back);
6766                 }
6767         } else {
6768                 struct tree_backref *back;
6769                 back = find_tree_backref(rec, parent, root_objectid);
6770                 if (!back)
6771                         goto out;
6772                 if (back->node.found_ref) {
6773                         if (rec->refs)
6774                                 rec->refs--;
6775                         back->node.found_ref = 0;
6776                 }
6777                 if (back->node.found_extent_tree) {
6778                         if (rec->extent_item_refs)
6779                                 rec->extent_item_refs--;
6780                         back->node.found_extent_tree = 0;
6781                 }
6782                 if (!back->node.found_extent_tree && back->node.found_ref) {
6783                         list_del(&back->node.list);
6784                         free(back);
6785                 }
6786         }
6787         maybe_free_extent_rec(extent_cache, rec);
6788 out:
6789         return 0;
6790 }
6791
6792 static int delete_extent_records(struct btrfs_trans_handle *trans,
6793                                  struct btrfs_root *root,
6794                                  struct btrfs_path *path,
6795                                  u64 bytenr, u64 new_len)
6796 {
6797         struct btrfs_key key;
6798         struct btrfs_key found_key;
6799         struct extent_buffer *leaf;
6800         int ret;
6801         int slot;
6802
6803
6804         key.objectid = bytenr;
6805         key.type = (u8)-1;
6806         key.offset = (u64)-1;
6807
6808         while(1) {
6809                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6810                                         &key, path, 0, 1);
6811                 if (ret < 0)
6812                         break;
6813
6814                 if (ret > 0) {
6815                         ret = 0;
6816                         if (path->slots[0] == 0)
6817                                 break;
6818                         path->slots[0]--;
6819                 }
6820                 ret = 0;
6821
6822                 leaf = path->nodes[0];
6823                 slot = path->slots[0];
6824
6825                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6826                 if (found_key.objectid != bytenr)
6827                         break;
6828
6829                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6830                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6831                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6832                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6833                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6834                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6835                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6836                         btrfs_release_path(path);
6837                         if (found_key.type == 0) {
6838                                 if (found_key.offset == 0)
6839                                         break;
6840                                 key.offset = found_key.offset - 1;
6841                                 key.type = found_key.type;
6842                         }
6843                         key.type = found_key.type - 1;
6844                         key.offset = (u64)-1;
6845                         continue;
6846                 }
6847
6848                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6849                         found_key.objectid, found_key.type, found_key.offset);
6850
6851                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6852                 if (ret)
6853                         break;
6854                 btrfs_release_path(path);
6855
6856                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6857                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6858                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6859                                 found_key.offset : root->nodesize;
6860
6861                         ret = btrfs_update_block_group(trans, root, bytenr,
6862                                                        bytes, 0, 0);
6863                         if (ret)
6864                                 break;
6865                 }
6866         }
6867
6868         btrfs_release_path(path);
6869         return ret;
6870 }
6871
6872 /*
6873  * for a single backref, this will allocate a new extent
6874  * and add the backref to it.
6875  */
6876 static int record_extent(struct btrfs_trans_handle *trans,
6877                          struct btrfs_fs_info *info,
6878                          struct btrfs_path *path,
6879                          struct extent_record *rec,
6880                          struct extent_backref *back,
6881                          int allocated, u64 flags)
6882 {
6883         int ret;
6884         struct btrfs_root *extent_root = info->extent_root;
6885         struct extent_buffer *leaf;
6886         struct btrfs_key ins_key;
6887         struct btrfs_extent_item *ei;
6888         struct data_backref *dback;
6889         struct btrfs_tree_block_info *bi;
6890
6891         if (!back->is_data)
6892                 rec->max_size = max_t(u64, rec->max_size,
6893                                     info->extent_root->nodesize);
6894
6895         if (!allocated) {
6896                 u32 item_size = sizeof(*ei);
6897
6898                 if (!back->is_data)
6899                         item_size += sizeof(*bi);
6900
6901                 ins_key.objectid = rec->start;
6902                 ins_key.offset = rec->max_size;
6903                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6904
6905                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6906                                         &ins_key, item_size);
6907                 if (ret)
6908                         goto fail;
6909
6910                 leaf = path->nodes[0];
6911                 ei = btrfs_item_ptr(leaf, path->slots[0],
6912                                     struct btrfs_extent_item);
6913
6914                 btrfs_set_extent_refs(leaf, ei, 0);
6915                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6916
6917                 if (back->is_data) {
6918                         btrfs_set_extent_flags(leaf, ei,
6919                                                BTRFS_EXTENT_FLAG_DATA);
6920                 } else {
6921                         struct btrfs_disk_key copy_key;;
6922
6923                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6924                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6925                                              sizeof(*bi));
6926
6927                         btrfs_set_disk_key_objectid(&copy_key,
6928                                                     rec->info_objectid);
6929                         btrfs_set_disk_key_type(&copy_key, 0);
6930                         btrfs_set_disk_key_offset(&copy_key, 0);
6931
6932                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6933                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6934
6935                         btrfs_set_extent_flags(leaf, ei,
6936                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6937                 }
6938
6939                 btrfs_mark_buffer_dirty(leaf);
6940                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6941                                                rec->max_size, 1, 0);
6942                 if (ret)
6943                         goto fail;
6944                 btrfs_release_path(path);
6945         }
6946
6947         if (back->is_data) {
6948                 u64 parent;
6949                 int i;
6950
6951                 dback = to_data_backref(back);
6952                 if (back->full_backref)
6953                         parent = dback->parent;
6954                 else
6955                         parent = 0;
6956
6957                 for (i = 0; i < dback->found_ref; i++) {
6958                         /* if parent != 0, we're doing a full backref
6959                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6960                          * just makes the backref allocator create a data
6961                          * backref
6962                          */
6963                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6964                                                    rec->start, rec->max_size,
6965                                                    parent,
6966                                                    dback->root,
6967                                                    parent ?
6968                                                    BTRFS_FIRST_FREE_OBJECTID :
6969                                                    dback->owner,
6970                                                    dback->offset);
6971                         if (ret)
6972                                 break;
6973                 }
6974                 fprintf(stderr, "adding new data backref"
6975                                 " on %llu %s %llu owner %llu"
6976                                 " offset %llu found %d\n",
6977                                 (unsigned long long)rec->start,
6978                                 back->full_backref ?
6979                                 "parent" : "root",
6980                                 back->full_backref ?
6981                                 (unsigned long long)parent :
6982                                 (unsigned long long)dback->root,
6983                                 (unsigned long long)dback->owner,
6984                                 (unsigned long long)dback->offset,
6985                                 dback->found_ref);
6986         } else {
6987                 u64 parent;
6988                 struct tree_backref *tback;
6989
6990                 tback = to_tree_backref(back);
6991                 if (back->full_backref)
6992                         parent = tback->parent;
6993                 else
6994                         parent = 0;
6995
6996                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6997                                            rec->start, rec->max_size,
6998                                            parent, tback->root, 0, 0);
6999                 fprintf(stderr, "adding new tree backref on "
7000                         "start %llu len %llu parent %llu root %llu\n",
7001                         rec->start, rec->max_size, parent, tback->root);
7002         }
7003 fail:
7004         btrfs_release_path(path);
7005         return ret;
7006 }
7007
7008 static struct extent_entry *find_entry(struct list_head *entries,
7009                                        u64 bytenr, u64 bytes)
7010 {
7011         struct extent_entry *entry = NULL;
7012
7013         list_for_each_entry(entry, entries, list) {
7014                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7015                         return entry;
7016         }
7017
7018         return NULL;
7019 }
7020
7021 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7022 {
7023         struct extent_entry *entry, *best = NULL, *prev = NULL;
7024
7025         list_for_each_entry(entry, entries, list) {
7026                 /*
7027                  * If there are as many broken entries as entries then we know
7028                  * not to trust this particular entry.
7029                  */
7030                 if (entry->broken == entry->count)
7031                         continue;
7032
7033                 /*
7034                  * Special case, when there are only two entries and 'best' is
7035                  * the first one
7036                  */
7037                 if (!prev) {
7038                         best = entry;
7039                         prev = entry;
7040                         continue;
7041                 }
7042
7043                 /*
7044                  * If our current entry == best then we can't be sure our best
7045                  * is really the best, so we need to keep searching.
7046                  */
7047                 if (best && best->count == entry->count) {
7048                         prev = entry;
7049                         best = NULL;
7050                         continue;
7051                 }
7052
7053                 /* Prev == entry, not good enough, have to keep searching */
7054                 if (!prev->broken && prev->count == entry->count)
7055                         continue;
7056
7057                 if (!best)
7058                         best = (prev->count > entry->count) ? prev : entry;
7059                 else if (best->count < entry->count)
7060                         best = entry;
7061                 prev = entry;
7062         }
7063
7064         return best;
7065 }
7066
7067 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7068                       struct data_backref *dback, struct extent_entry *entry)
7069 {
7070         struct btrfs_trans_handle *trans;
7071         struct btrfs_root *root;
7072         struct btrfs_file_extent_item *fi;
7073         struct extent_buffer *leaf;
7074         struct btrfs_key key;
7075         u64 bytenr, bytes;
7076         int ret, err;
7077
7078         key.objectid = dback->root;
7079         key.type = BTRFS_ROOT_ITEM_KEY;
7080         key.offset = (u64)-1;
7081         root = btrfs_read_fs_root(info, &key);
7082         if (IS_ERR(root)) {
7083                 fprintf(stderr, "Couldn't find root for our ref\n");
7084                 return -EINVAL;
7085         }
7086
7087         /*
7088          * The backref points to the original offset of the extent if it was
7089          * split, so we need to search down to the offset we have and then walk
7090          * forward until we find the backref we're looking for.
7091          */
7092         key.objectid = dback->owner;
7093         key.type = BTRFS_EXTENT_DATA_KEY;
7094         key.offset = dback->offset;
7095         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7096         if (ret < 0) {
7097                 fprintf(stderr, "Error looking up ref %d\n", ret);
7098                 return ret;
7099         }
7100
7101         while (1) {
7102                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7103                         ret = btrfs_next_leaf(root, path);
7104                         if (ret) {
7105                                 fprintf(stderr, "Couldn't find our ref, next\n");
7106                                 return -EINVAL;
7107                         }
7108                 }
7109                 leaf = path->nodes[0];
7110                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7111                 if (key.objectid != dback->owner ||
7112                     key.type != BTRFS_EXTENT_DATA_KEY) {
7113                         fprintf(stderr, "Couldn't find our ref, search\n");
7114                         return -EINVAL;
7115                 }
7116                 fi = btrfs_item_ptr(leaf, path->slots[0],
7117                                     struct btrfs_file_extent_item);
7118                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7119                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7120
7121                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7122                         break;
7123                 path->slots[0]++;
7124         }
7125
7126         btrfs_release_path(path);
7127
7128         trans = btrfs_start_transaction(root, 1);
7129         if (IS_ERR(trans))
7130                 return PTR_ERR(trans);
7131
7132         /*
7133          * Ok we have the key of the file extent we want to fix, now we can cow
7134          * down to the thing and fix it.
7135          */
7136         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7137         if (ret < 0) {
7138                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7139                         key.objectid, key.type, key.offset, ret);
7140                 goto out;
7141         }
7142         if (ret > 0) {
7143                 fprintf(stderr, "Well that's odd, we just found this key "
7144                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7145                         key.offset);
7146                 ret = -EINVAL;
7147                 goto out;
7148         }
7149         leaf = path->nodes[0];
7150         fi = btrfs_item_ptr(leaf, path->slots[0],
7151                             struct btrfs_file_extent_item);
7152
7153         if (btrfs_file_extent_compression(leaf, fi) &&
7154             dback->disk_bytenr != entry->bytenr) {
7155                 fprintf(stderr, "Ref doesn't match the record start and is "
7156                         "compressed, please take a btrfs-image of this file "
7157                         "system and send it to a btrfs developer so they can "
7158                         "complete this functionality for bytenr %Lu\n",
7159                         dback->disk_bytenr);
7160                 ret = -EINVAL;
7161                 goto out;
7162         }
7163
7164         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7165                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7166         } else if (dback->disk_bytenr > entry->bytenr) {
7167                 u64 off_diff, offset;
7168
7169                 off_diff = dback->disk_bytenr - entry->bytenr;
7170                 offset = btrfs_file_extent_offset(leaf, fi);
7171                 if (dback->disk_bytenr + offset +
7172                     btrfs_file_extent_num_bytes(leaf, fi) >
7173                     entry->bytenr + entry->bytes) {
7174                         fprintf(stderr, "Ref is past the entry end, please "
7175                                 "take a btrfs-image of this file system and "
7176                                 "send it to a btrfs developer, ref %Lu\n",
7177                                 dback->disk_bytenr);
7178                         ret = -EINVAL;
7179                         goto out;
7180                 }
7181                 offset += off_diff;
7182                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7183                 btrfs_set_file_extent_offset(leaf, fi, offset);
7184         } else if (dback->disk_bytenr < entry->bytenr) {
7185                 u64 offset;
7186
7187                 offset = btrfs_file_extent_offset(leaf, fi);
7188                 if (dback->disk_bytenr + offset < entry->bytenr) {
7189                         fprintf(stderr, "Ref is before the entry start, please"
7190                                 " take a btrfs-image of this file system and "
7191                                 "send it to a btrfs developer, ref %Lu\n",
7192                                 dback->disk_bytenr);
7193                         ret = -EINVAL;
7194                         goto out;
7195                 }
7196
7197                 offset += dback->disk_bytenr;
7198                 offset -= entry->bytenr;
7199                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7200                 btrfs_set_file_extent_offset(leaf, fi, offset);
7201         }
7202
7203         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7204
7205         /*
7206          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7207          * only do this if we aren't using compression, otherwise it's a
7208          * trickier case.
7209          */
7210         if (!btrfs_file_extent_compression(leaf, fi))
7211                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7212         else
7213                 printf("ram bytes may be wrong?\n");
7214         btrfs_mark_buffer_dirty(leaf);
7215 out:
7216         err = btrfs_commit_transaction(trans, root);
7217         btrfs_release_path(path);
7218         return ret ? ret : err;
7219 }
7220
7221 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7222                            struct extent_record *rec)
7223 {
7224         struct extent_backref *back;
7225         struct data_backref *dback;
7226         struct extent_entry *entry, *best = NULL;
7227         LIST_HEAD(entries);
7228         int nr_entries = 0;
7229         int broken_entries = 0;
7230         int ret = 0;
7231         short mismatch = 0;
7232
7233         /*
7234          * Metadata is easy and the backrefs should always agree on bytenr and
7235          * size, if not we've got bigger issues.
7236          */
7237         if (rec->metadata)
7238                 return 0;
7239
7240         list_for_each_entry(back, &rec->backrefs, list) {
7241                 if (back->full_backref || !back->is_data)
7242                         continue;
7243
7244                 dback = to_data_backref(back);
7245
7246                 /*
7247                  * We only pay attention to backrefs that we found a real
7248                  * backref for.
7249                  */
7250                 if (dback->found_ref == 0)
7251                         continue;
7252
7253                 /*
7254                  * For now we only catch when the bytes don't match, not the
7255                  * bytenr.  We can easily do this at the same time, but I want
7256                  * to have a fs image to test on before we just add repair
7257                  * functionality willy-nilly so we know we won't screw up the
7258                  * repair.
7259                  */
7260
7261                 entry = find_entry(&entries, dback->disk_bytenr,
7262                                    dback->bytes);
7263                 if (!entry) {
7264                         entry = malloc(sizeof(struct extent_entry));
7265                         if (!entry) {
7266                                 ret = -ENOMEM;
7267                                 goto out;
7268                         }
7269                         memset(entry, 0, sizeof(*entry));
7270                         entry->bytenr = dback->disk_bytenr;
7271                         entry->bytes = dback->bytes;
7272                         list_add_tail(&entry->list, &entries);
7273                         nr_entries++;
7274                 }
7275
7276                 /*
7277                  * If we only have on entry we may think the entries agree when
7278                  * in reality they don't so we have to do some extra checking.
7279                  */
7280                 if (dback->disk_bytenr != rec->start ||
7281                     dback->bytes != rec->nr || back->broken)
7282                         mismatch = 1;
7283
7284                 if (back->broken) {
7285                         entry->broken++;
7286                         broken_entries++;
7287                 }
7288
7289                 entry->count++;
7290         }
7291
7292         /* Yay all the backrefs agree, carry on good sir */
7293         if (nr_entries <= 1 && !mismatch)
7294                 goto out;
7295
7296         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7297                 "%Lu\n", rec->start);
7298
7299         /*
7300          * First we want to see if the backrefs can agree amongst themselves who
7301          * is right, so figure out which one of the entries has the highest
7302          * count.
7303          */
7304         best = find_most_right_entry(&entries);
7305
7306         /*
7307          * Ok so we may have an even split between what the backrefs think, so
7308          * this is where we use the extent ref to see what it thinks.
7309          */
7310         if (!best) {
7311                 entry = find_entry(&entries, rec->start, rec->nr);
7312                 if (!entry && (!broken_entries || !rec->found_rec)) {
7313                         fprintf(stderr, "Backrefs don't agree with each other "
7314                                 "and extent record doesn't agree with anybody,"
7315                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7316                                 rec->start, rec->nr);
7317                         ret = -EINVAL;
7318                         goto out;
7319                 } else if (!entry) {
7320                         /*
7321                          * Ok our backrefs were broken, we'll assume this is the
7322                          * correct value and add an entry for this range.
7323                          */
7324                         entry = malloc(sizeof(struct extent_entry));
7325                         if (!entry) {
7326                                 ret = -ENOMEM;
7327                                 goto out;
7328                         }
7329                         memset(entry, 0, sizeof(*entry));
7330                         entry->bytenr = rec->start;
7331                         entry->bytes = rec->nr;
7332                         list_add_tail(&entry->list, &entries);
7333                         nr_entries++;
7334                 }
7335                 entry->count++;
7336                 best = find_most_right_entry(&entries);
7337                 if (!best) {
7338                         fprintf(stderr, "Backrefs and extent record evenly "
7339                                 "split on who is right, this is going to "
7340                                 "require user input to fix bytenr %Lu bytes "
7341                                 "%Lu\n", rec->start, rec->nr);
7342                         ret = -EINVAL;
7343                         goto out;
7344                 }
7345         }
7346
7347         /*
7348          * I don't think this can happen currently as we'll abort() if we catch
7349          * this case higher up, but in case somebody removes that we still can't
7350          * deal with it properly here yet, so just bail out of that's the case.
7351          */
7352         if (best->bytenr != rec->start) {
7353                 fprintf(stderr, "Extent start and backref starts don't match, "
7354                         "please use btrfs-image on this file system and send "
7355                         "it to a btrfs developer so they can make fsck fix "
7356                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7357                         rec->start, rec->nr);
7358                 ret = -EINVAL;
7359                 goto out;
7360         }
7361
7362         /*
7363          * Ok great we all agreed on an extent record, let's go find the real
7364          * references and fix up the ones that don't match.
7365          */
7366         list_for_each_entry(back, &rec->backrefs, list) {
7367                 if (back->full_backref || !back->is_data)
7368                         continue;
7369
7370                 dback = to_data_backref(back);
7371
7372                 /*
7373                  * Still ignoring backrefs that don't have a real ref attached
7374                  * to them.
7375                  */
7376                 if (dback->found_ref == 0)
7377                         continue;
7378
7379                 if (dback->bytes == best->bytes &&
7380                     dback->disk_bytenr == best->bytenr)
7381                         continue;
7382
7383                 ret = repair_ref(info, path, dback, best);
7384                 if (ret)
7385                         goto out;
7386         }
7387
7388         /*
7389          * Ok we messed with the actual refs, which means we need to drop our
7390          * entire cache and go back and rescan.  I know this is a huge pain and
7391          * adds a lot of extra work, but it's the only way to be safe.  Once all
7392          * the backrefs agree we may not need to do anything to the extent
7393          * record itself.
7394          */
7395         ret = -EAGAIN;
7396 out:
7397         while (!list_empty(&entries)) {
7398                 entry = list_entry(entries.next, struct extent_entry, list);
7399                 list_del_init(&entry->list);
7400                 free(entry);
7401         }
7402         return ret;
7403 }
7404
7405 static int process_duplicates(struct btrfs_root *root,
7406                               struct cache_tree *extent_cache,
7407                               struct extent_record *rec)
7408 {
7409         struct extent_record *good, *tmp;
7410         struct cache_extent *cache;
7411         int ret;
7412
7413         /*
7414          * If we found a extent record for this extent then return, or if we
7415          * have more than one duplicate we are likely going to need to delete
7416          * something.
7417          */
7418         if (rec->found_rec || rec->num_duplicates > 1)
7419                 return 0;
7420
7421         /* Shouldn't happen but just in case */
7422         BUG_ON(!rec->num_duplicates);
7423
7424         /*
7425          * So this happens if we end up with a backref that doesn't match the
7426          * actual extent entry.  So either the backref is bad or the extent
7427          * entry is bad.  Either way we want to have the extent_record actually
7428          * reflect what we found in the extent_tree, so we need to take the
7429          * duplicate out and use that as the extent_record since the only way we
7430          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7431          */
7432         remove_cache_extent(extent_cache, &rec->cache);
7433
7434         good = to_extent_record(rec->dups.next);
7435         list_del_init(&good->list);
7436         INIT_LIST_HEAD(&good->backrefs);
7437         INIT_LIST_HEAD(&good->dups);
7438         good->cache.start = good->start;
7439         good->cache.size = good->nr;
7440         good->content_checked = 0;
7441         good->owner_ref_checked = 0;
7442         good->num_duplicates = 0;
7443         good->refs = rec->refs;
7444         list_splice_init(&rec->backrefs, &good->backrefs);
7445         while (1) {
7446                 cache = lookup_cache_extent(extent_cache, good->start,
7447                                             good->nr);
7448                 if (!cache)
7449                         break;
7450                 tmp = container_of(cache, struct extent_record, cache);
7451
7452                 /*
7453                  * If we find another overlapping extent and it's found_rec is
7454                  * set then it's a duplicate and we need to try and delete
7455                  * something.
7456                  */
7457                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7458                         if (list_empty(&good->list))
7459                                 list_add_tail(&good->list,
7460                                               &duplicate_extents);
7461                         good->num_duplicates += tmp->num_duplicates + 1;
7462                         list_splice_init(&tmp->dups, &good->dups);
7463                         list_del_init(&tmp->list);
7464                         list_add_tail(&tmp->list, &good->dups);
7465                         remove_cache_extent(extent_cache, &tmp->cache);
7466                         continue;
7467                 }
7468
7469                 /*
7470                  * Ok we have another non extent item backed extent rec, so lets
7471                  * just add it to this extent and carry on like we did above.
7472                  */
7473                 good->refs += tmp->refs;
7474                 list_splice_init(&tmp->backrefs, &good->backrefs);
7475                 remove_cache_extent(extent_cache, &tmp->cache);
7476                 free(tmp);
7477         }
7478         ret = insert_cache_extent(extent_cache, &good->cache);
7479         BUG_ON(ret);
7480         free(rec);
7481         return good->num_duplicates ? 0 : 1;
7482 }
7483
7484 static int delete_duplicate_records(struct btrfs_root *root,
7485                                     struct extent_record *rec)
7486 {
7487         struct btrfs_trans_handle *trans;
7488         LIST_HEAD(delete_list);
7489         struct btrfs_path path;
7490         struct extent_record *tmp, *good, *n;
7491         int nr_del = 0;
7492         int ret = 0, err;
7493         struct btrfs_key key;
7494
7495         btrfs_init_path(&path);
7496
7497         good = rec;
7498         /* Find the record that covers all of the duplicates. */
7499         list_for_each_entry(tmp, &rec->dups, list) {
7500                 if (good->start < tmp->start)
7501                         continue;
7502                 if (good->nr > tmp->nr)
7503                         continue;
7504
7505                 if (tmp->start + tmp->nr < good->start + good->nr) {
7506                         fprintf(stderr, "Ok we have overlapping extents that "
7507                                 "aren't completely covered by each other, this "
7508                                 "is going to require more careful thought.  "
7509                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7510                                 tmp->start, tmp->nr, good->start, good->nr);
7511                         abort();
7512                 }
7513                 good = tmp;
7514         }
7515
7516         if (good != rec)
7517                 list_add_tail(&rec->list, &delete_list);
7518
7519         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7520                 if (tmp == good)
7521                         continue;
7522                 list_move_tail(&tmp->list, &delete_list);
7523         }
7524
7525         root = root->fs_info->extent_root;
7526         trans = btrfs_start_transaction(root, 1);
7527         if (IS_ERR(trans)) {
7528                 ret = PTR_ERR(trans);
7529                 goto out;
7530         }
7531
7532         list_for_each_entry(tmp, &delete_list, list) {
7533                 if (tmp->found_rec == 0)
7534                         continue;
7535                 key.objectid = tmp->start;
7536                 key.type = BTRFS_EXTENT_ITEM_KEY;
7537                 key.offset = tmp->nr;
7538
7539                 /* Shouldn't happen but just in case */
7540                 if (tmp->metadata) {
7541                         fprintf(stderr, "Well this shouldn't happen, extent "
7542                                 "record overlaps but is metadata? "
7543                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7544                         abort();
7545                 }
7546
7547                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7548                 if (ret) {
7549                         if (ret > 0)
7550                                 ret = -EINVAL;
7551                         break;
7552                 }
7553                 ret = btrfs_del_item(trans, root, &path);
7554                 if (ret)
7555                         break;
7556                 btrfs_release_path(&path);
7557                 nr_del++;
7558         }
7559         err = btrfs_commit_transaction(trans, root);
7560         if (err && !ret)
7561                 ret = err;
7562 out:
7563         while (!list_empty(&delete_list)) {
7564                 tmp = to_extent_record(delete_list.next);
7565                 list_del_init(&tmp->list);
7566                 if (tmp == rec)
7567                         continue;
7568                 free(tmp);
7569         }
7570
7571         while (!list_empty(&rec->dups)) {
7572                 tmp = to_extent_record(rec->dups.next);
7573                 list_del_init(&tmp->list);
7574                 free(tmp);
7575         }
7576
7577         btrfs_release_path(&path);
7578
7579         if (!ret && !nr_del)
7580                 rec->num_duplicates = 0;
7581
7582         return ret ? ret : nr_del;
7583 }
7584
7585 static int find_possible_backrefs(struct btrfs_fs_info *info,
7586                                   struct btrfs_path *path,
7587                                   struct cache_tree *extent_cache,
7588                                   struct extent_record *rec)
7589 {
7590         struct btrfs_root *root;
7591         struct extent_backref *back;
7592         struct data_backref *dback;
7593         struct cache_extent *cache;
7594         struct btrfs_file_extent_item *fi;
7595         struct btrfs_key key;
7596         u64 bytenr, bytes;
7597         int ret;
7598
7599         list_for_each_entry(back, &rec->backrefs, list) {
7600                 /* Don't care about full backrefs (poor unloved backrefs) */
7601                 if (back->full_backref || !back->is_data)
7602                         continue;
7603
7604                 dback = to_data_backref(back);
7605
7606                 /* We found this one, we don't need to do a lookup */
7607                 if (dback->found_ref)
7608                         continue;
7609
7610                 key.objectid = dback->root;
7611                 key.type = BTRFS_ROOT_ITEM_KEY;
7612                 key.offset = (u64)-1;
7613
7614                 root = btrfs_read_fs_root(info, &key);
7615
7616                 /* No root, definitely a bad ref, skip */
7617                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7618                         continue;
7619                 /* Other err, exit */
7620                 if (IS_ERR(root))
7621                         return PTR_ERR(root);
7622
7623                 key.objectid = dback->owner;
7624                 key.type = BTRFS_EXTENT_DATA_KEY;
7625                 key.offset = dback->offset;
7626                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7627                 if (ret) {
7628                         btrfs_release_path(path);
7629                         if (ret < 0)
7630                                 return ret;
7631                         /* Didn't find it, we can carry on */
7632                         ret = 0;
7633                         continue;
7634                 }
7635
7636                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7637                                     struct btrfs_file_extent_item);
7638                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7639                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7640                 btrfs_release_path(path);
7641                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7642                 if (cache) {
7643                         struct extent_record *tmp;
7644                         tmp = container_of(cache, struct extent_record, cache);
7645
7646                         /*
7647                          * If we found an extent record for the bytenr for this
7648                          * particular backref then we can't add it to our
7649                          * current extent record.  We only want to add backrefs
7650                          * that don't have a corresponding extent item in the
7651                          * extent tree since they likely belong to this record
7652                          * and we need to fix it if it doesn't match bytenrs.
7653                          */
7654                         if  (tmp->found_rec)
7655                                 continue;
7656                 }
7657
7658                 dback->found_ref += 1;
7659                 dback->disk_bytenr = bytenr;
7660                 dback->bytes = bytes;
7661
7662                 /*
7663                  * Set this so the verify backref code knows not to trust the
7664                  * values in this backref.
7665                  */
7666                 back->broken = 1;
7667         }
7668
7669         return 0;
7670 }
7671
7672 /*
7673  * Record orphan data ref into corresponding root.
7674  *
7675  * Return 0 if the extent item contains data ref and recorded.
7676  * Return 1 if the extent item contains no useful data ref
7677  *   On that case, it may contains only shared_dataref or metadata backref
7678  *   or the file extent exists(this should be handled by the extent bytenr
7679  *   recovery routine)
7680  * Return <0 if something goes wrong.
7681  */
7682 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7683                                       struct extent_record *rec)
7684 {
7685         struct btrfs_key key;
7686         struct btrfs_root *dest_root;
7687         struct extent_backref *back;
7688         struct data_backref *dback;
7689         struct orphan_data_extent *orphan;
7690         struct btrfs_path path;
7691         int recorded_data_ref = 0;
7692         int ret = 0;
7693
7694         if (rec->metadata)
7695                 return 1;
7696         btrfs_init_path(&path);
7697         list_for_each_entry(back, &rec->backrefs, list) {
7698                 if (back->full_backref || !back->is_data ||
7699                     !back->found_extent_tree)
7700                         continue;
7701                 dback = to_data_backref(back);
7702                 if (dback->found_ref)
7703                         continue;
7704                 key.objectid = dback->root;
7705                 key.type = BTRFS_ROOT_ITEM_KEY;
7706                 key.offset = (u64)-1;
7707
7708                 dest_root = btrfs_read_fs_root(fs_info, &key);
7709
7710                 /* For non-exist root we just skip it */
7711                 if (IS_ERR(dest_root) || !dest_root)
7712                         continue;
7713
7714                 key.objectid = dback->owner;
7715                 key.type = BTRFS_EXTENT_DATA_KEY;
7716                 key.offset = dback->offset;
7717
7718                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7719                 btrfs_release_path(&path);
7720                 /*
7721                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7722                  * we need to record it for inode/file extent rebuild.
7723                  * For ret > 0, we record it only for file extent rebuild.
7724                  * For ret == 0, the file extent exists but only bytenr
7725                  * mismatch, let the original bytenr fix routine to handle,
7726                  * don't record it.
7727                  */
7728                 if (ret == 0)
7729                         continue;
7730                 ret = 0;
7731                 orphan = malloc(sizeof(*orphan));
7732                 if (!orphan) {
7733                         ret = -ENOMEM;
7734                         goto out;
7735                 }
7736                 INIT_LIST_HEAD(&orphan->list);
7737                 orphan->root = dback->root;
7738                 orphan->objectid = dback->owner;
7739                 orphan->offset = dback->offset;
7740                 orphan->disk_bytenr = rec->cache.start;
7741                 orphan->disk_len = rec->cache.size;
7742                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7743                 recorded_data_ref = 1;
7744         }
7745 out:
7746         btrfs_release_path(&path);
7747         if (!ret)
7748                 return !recorded_data_ref;
7749         else
7750                 return ret;
7751 }
7752
7753 /*
7754  * when an incorrect extent item is found, this will delete
7755  * all of the existing entries for it and recreate them
7756  * based on what the tree scan found.
7757  */
7758 static int fixup_extent_refs(struct btrfs_fs_info *info,
7759                              struct cache_tree *extent_cache,
7760                              struct extent_record *rec)
7761 {
7762         struct btrfs_trans_handle *trans = NULL;
7763         int ret;
7764         struct btrfs_path path;
7765         struct list_head *cur = rec->backrefs.next;
7766         struct cache_extent *cache;
7767         struct extent_backref *back;
7768         int allocated = 0;
7769         u64 flags = 0;
7770
7771         if (rec->flag_block_full_backref)
7772                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7773
7774         btrfs_init_path(&path);
7775         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7776                 /*
7777                  * Sometimes the backrefs themselves are so broken they don't
7778                  * get attached to any meaningful rec, so first go back and
7779                  * check any of our backrefs that we couldn't find and throw
7780                  * them into the list if we find the backref so that
7781                  * verify_backrefs can figure out what to do.
7782                  */
7783                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7784                 if (ret < 0)
7785                         goto out;
7786         }
7787
7788         /* step one, make sure all of the backrefs agree */
7789         ret = verify_backrefs(info, &path, rec);
7790         if (ret < 0)
7791                 goto out;
7792
7793         trans = btrfs_start_transaction(info->extent_root, 1);
7794         if (IS_ERR(trans)) {
7795                 ret = PTR_ERR(trans);
7796                 goto out;
7797         }
7798
7799         /* step two, delete all the existing records */
7800         ret = delete_extent_records(trans, info->extent_root, &path,
7801                                     rec->start, rec->max_size);
7802
7803         if (ret < 0)
7804                 goto out;
7805
7806         /* was this block corrupt?  If so, don't add references to it */
7807         cache = lookup_cache_extent(info->corrupt_blocks,
7808                                     rec->start, rec->max_size);
7809         if (cache) {
7810                 ret = 0;
7811                 goto out;
7812         }
7813
7814         /* step three, recreate all the refs we did find */
7815         while(cur != &rec->backrefs) {
7816                 back = to_extent_backref(cur);
7817                 cur = cur->next;
7818
7819                 /*
7820                  * if we didn't find any references, don't create a
7821                  * new extent record
7822                  */
7823                 if (!back->found_ref)
7824                         continue;
7825
7826                 rec->bad_full_backref = 0;
7827                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7828                 allocated = 1;
7829
7830                 if (ret)
7831                         goto out;
7832         }
7833 out:
7834         if (trans) {
7835                 int err = btrfs_commit_transaction(trans, info->extent_root);
7836                 if (!ret)
7837                         ret = err;
7838         }
7839
7840         btrfs_release_path(&path);
7841         return ret;
7842 }
7843
7844 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7845                               struct extent_record *rec)
7846 {
7847         struct btrfs_trans_handle *trans;
7848         struct btrfs_root *root = fs_info->extent_root;
7849         struct btrfs_path path;
7850         struct btrfs_extent_item *ei;
7851         struct btrfs_key key;
7852         u64 flags;
7853         int ret = 0;
7854
7855         key.objectid = rec->start;
7856         if (rec->metadata) {
7857                 key.type = BTRFS_METADATA_ITEM_KEY;
7858                 key.offset = rec->info_level;
7859         } else {
7860                 key.type = BTRFS_EXTENT_ITEM_KEY;
7861                 key.offset = rec->max_size;
7862         }
7863
7864         trans = btrfs_start_transaction(root, 0);
7865         if (IS_ERR(trans))
7866                 return PTR_ERR(trans);
7867
7868         btrfs_init_path(&path);
7869         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7870         if (ret < 0) {
7871                 btrfs_release_path(&path);
7872                 btrfs_commit_transaction(trans, root);
7873                 return ret;
7874         } else if (ret) {
7875                 fprintf(stderr, "Didn't find extent for %llu\n",
7876                         (unsigned long long)rec->start);
7877                 btrfs_release_path(&path);
7878                 btrfs_commit_transaction(trans, root);
7879                 return -ENOENT;
7880         }
7881
7882         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7883                             struct btrfs_extent_item);
7884         flags = btrfs_extent_flags(path.nodes[0], ei);
7885         if (rec->flag_block_full_backref) {
7886                 fprintf(stderr, "setting full backref on %llu\n",
7887                         (unsigned long long)key.objectid);
7888                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7889         } else {
7890                 fprintf(stderr, "clearing full backref on %llu\n",
7891                         (unsigned long long)key.objectid);
7892                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7893         }
7894         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7895         btrfs_mark_buffer_dirty(path.nodes[0]);
7896         btrfs_release_path(&path);
7897         return btrfs_commit_transaction(trans, root);
7898 }
7899
7900 /* right now we only prune from the extent allocation tree */
7901 static int prune_one_block(struct btrfs_trans_handle *trans,
7902                            struct btrfs_fs_info *info,
7903                            struct btrfs_corrupt_block *corrupt)
7904 {
7905         int ret;
7906         struct btrfs_path path;
7907         struct extent_buffer *eb;
7908         u64 found;
7909         int slot;
7910         int nritems;
7911         int level = corrupt->level + 1;
7912
7913         btrfs_init_path(&path);
7914 again:
7915         /* we want to stop at the parent to our busted block */
7916         path.lowest_level = level;
7917
7918         ret = btrfs_search_slot(trans, info->extent_root,
7919                                 &corrupt->key, &path, -1, 1);
7920
7921         if (ret < 0)
7922                 goto out;
7923
7924         eb = path.nodes[level];
7925         if (!eb) {
7926                 ret = -ENOENT;
7927                 goto out;
7928         }
7929
7930         /*
7931          * hopefully the search gave us the block we want to prune,
7932          * lets try that first
7933          */
7934         slot = path.slots[level];
7935         found =  btrfs_node_blockptr(eb, slot);
7936         if (found == corrupt->cache.start)
7937                 goto del_ptr;
7938
7939         nritems = btrfs_header_nritems(eb);
7940
7941         /* the search failed, lets scan this node and hope we find it */
7942         for (slot = 0; slot < nritems; slot++) {
7943                 found =  btrfs_node_blockptr(eb, slot);
7944                 if (found == corrupt->cache.start)
7945                         goto del_ptr;
7946         }
7947         /*
7948          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7949          * to this block
7950          */
7951         if (eb == info->extent_root->node) {
7952                 ret = -ENOENT;
7953                 goto out;
7954         } else {
7955                 level++;
7956                 btrfs_release_path(&path);
7957                 goto again;
7958         }
7959
7960 del_ptr:
7961         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7962         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7963
7964 out:
7965         btrfs_release_path(&path);
7966         return ret;
7967 }
7968
7969 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7970 {
7971         struct btrfs_trans_handle *trans = NULL;
7972         struct cache_extent *cache;
7973         struct btrfs_corrupt_block *corrupt;
7974
7975         while (1) {
7976                 cache = search_cache_extent(info->corrupt_blocks, 0);
7977                 if (!cache)
7978                         break;
7979                 if (!trans) {
7980                         trans = btrfs_start_transaction(info->extent_root, 1);
7981                         if (IS_ERR(trans))
7982                                 return PTR_ERR(trans);
7983                 }
7984                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7985                 prune_one_block(trans, info, corrupt);
7986                 remove_cache_extent(info->corrupt_blocks, cache);
7987         }
7988         if (trans)
7989                 return btrfs_commit_transaction(trans, info->extent_root);
7990         return 0;
7991 }
7992
7993 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7994 {
7995         struct btrfs_block_group_cache *cache;
7996         u64 start, end;
7997         int ret;
7998
7999         while (1) {
8000                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8001                                             &start, &end, EXTENT_DIRTY);
8002                 if (ret)
8003                         break;
8004                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8005                                    GFP_NOFS);
8006         }
8007
8008         start = 0;
8009         while (1) {
8010                 cache = btrfs_lookup_first_block_group(fs_info, start);
8011                 if (!cache)
8012                         break;
8013                 if (cache->cached)
8014                         cache->cached = 0;
8015                 start = cache->key.objectid + cache->key.offset;
8016         }
8017 }
8018
8019 static int check_extent_refs(struct btrfs_root *root,
8020                              struct cache_tree *extent_cache)
8021 {
8022         struct extent_record *rec;
8023         struct cache_extent *cache;
8024         int err = 0;
8025         int ret = 0;
8026         int fixed = 0;
8027         int had_dups = 0;
8028         int recorded = 0;
8029
8030         if (repair) {
8031                 /*
8032                  * if we're doing a repair, we have to make sure
8033                  * we don't allocate from the problem extents.
8034                  * In the worst case, this will be all the
8035                  * extents in the FS
8036                  */
8037                 cache = search_cache_extent(extent_cache, 0);
8038                 while(cache) {
8039                         rec = container_of(cache, struct extent_record, cache);
8040                         set_extent_dirty(root->fs_info->excluded_extents,
8041                                          rec->start,
8042                                          rec->start + rec->max_size - 1,
8043                                          GFP_NOFS);
8044                         cache = next_cache_extent(cache);
8045                 }
8046
8047                 /* pin down all the corrupted blocks too */
8048                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8049                 while(cache) {
8050                         set_extent_dirty(root->fs_info->excluded_extents,
8051                                          cache->start,
8052                                          cache->start + cache->size - 1,
8053                                          GFP_NOFS);
8054                         cache = next_cache_extent(cache);
8055                 }
8056                 prune_corrupt_blocks(root->fs_info);
8057                 reset_cached_block_groups(root->fs_info);
8058         }
8059
8060         reset_cached_block_groups(root->fs_info);
8061
8062         /*
8063          * We need to delete any duplicate entries we find first otherwise we
8064          * could mess up the extent tree when we have backrefs that actually
8065          * belong to a different extent item and not the weird duplicate one.
8066          */
8067         while (repair && !list_empty(&duplicate_extents)) {
8068                 rec = to_extent_record(duplicate_extents.next);
8069                 list_del_init(&rec->list);
8070
8071                 /* Sometimes we can find a backref before we find an actual
8072                  * extent, so we need to process it a little bit to see if there
8073                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8074                  * if this is a backref screwup.  If we need to delete stuff
8075                  * process_duplicates() will return 0, otherwise it will return
8076                  * 1 and we
8077                  */
8078                 if (process_duplicates(root, extent_cache, rec))
8079                         continue;
8080                 ret = delete_duplicate_records(root, rec);
8081                 if (ret < 0)
8082                         return ret;
8083                 /*
8084                  * delete_duplicate_records will return the number of entries
8085                  * deleted, so if it's greater than 0 then we know we actually
8086                  * did something and we need to remove.
8087                  */
8088                 if (ret)
8089                         had_dups = 1;
8090         }
8091
8092         if (had_dups)
8093                 return -EAGAIN;
8094
8095         while(1) {
8096                 int cur_err = 0;
8097
8098                 fixed = 0;
8099                 recorded = 0;
8100                 cache = search_cache_extent(extent_cache, 0);
8101                 if (!cache)
8102                         break;
8103                 rec = container_of(cache, struct extent_record, cache);
8104                 if (rec->num_duplicates) {
8105                         fprintf(stderr, "extent item %llu has multiple extent "
8106                                 "items\n", (unsigned long long)rec->start);
8107                         err = 1;
8108                         cur_err = 1;
8109                 }
8110
8111                 if (rec->refs != rec->extent_item_refs) {
8112                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8113                                 (unsigned long long)rec->start,
8114                                 (unsigned long long)rec->nr);
8115                         fprintf(stderr, "extent item %llu, found %llu\n",
8116                                 (unsigned long long)rec->extent_item_refs,
8117                                 (unsigned long long)rec->refs);
8118                         ret = record_orphan_data_extents(root->fs_info, rec);
8119                         if (ret < 0)
8120                                 goto repair_abort;
8121                         if (ret == 0) {
8122                                 recorded = 1;
8123                         } else {
8124                                 /*
8125                                  * we can't use the extent to repair file
8126                                  * extent, let the fallback method handle it.
8127                                  */
8128                                 if (!fixed && repair) {
8129                                         ret = fixup_extent_refs(
8130                                                         root->fs_info,
8131                                                         extent_cache, rec);
8132                                         if (ret)
8133                                                 goto repair_abort;
8134                                         fixed = 1;
8135                                 }
8136                         }
8137                         err = 1;
8138                         cur_err = 1;
8139                 }
8140                 if (all_backpointers_checked(rec, 1)) {
8141                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8142                                 (unsigned long long)rec->start,
8143                                 (unsigned long long)rec->nr);
8144
8145                         if (!fixed && !recorded && repair) {
8146                                 ret = fixup_extent_refs(root->fs_info,
8147                                                         extent_cache, rec);
8148                                 if (ret)
8149                                         goto repair_abort;
8150                                 fixed = 1;
8151                         }
8152                         cur_err = 1;
8153                         err = 1;
8154                 }
8155                 if (!rec->owner_ref_checked) {
8156                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8157                                 (unsigned long long)rec->start,
8158                                 (unsigned long long)rec->nr);
8159                         if (!fixed && !recorded && repair) {
8160                                 ret = fixup_extent_refs(root->fs_info,
8161                                                         extent_cache, rec);
8162                                 if (ret)
8163                                         goto repair_abort;
8164                                 fixed = 1;
8165                         }
8166                         err = 1;
8167                         cur_err = 1;
8168                 }
8169                 if (rec->bad_full_backref) {
8170                         fprintf(stderr, "bad full backref, on [%llu]\n",
8171                                 (unsigned long long)rec->start);
8172                         if (repair) {
8173                                 ret = fixup_extent_flags(root->fs_info, rec);
8174                                 if (ret)
8175                                         goto repair_abort;
8176                                 fixed = 1;
8177                         }
8178                         err = 1;
8179                         cur_err = 1;
8180                 }
8181                 /*
8182                  * Although it's not a extent ref's problem, we reuse this
8183                  * routine for error reporting.
8184                  * No repair function yet.
8185                  */
8186                 if (rec->crossing_stripes) {
8187                         fprintf(stderr,
8188                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8189                                 rec->start, rec->start + rec->max_size);
8190                         err = 1;
8191                         cur_err = 1;
8192                 }
8193
8194                 if (rec->wrong_chunk_type) {
8195                         fprintf(stderr,
8196                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8197                                 rec->start, rec->start + rec->max_size);
8198                         err = 1;
8199                         cur_err = 1;
8200                 }
8201
8202                 remove_cache_extent(extent_cache, cache);
8203                 free_all_extent_backrefs(rec);
8204                 if (!init_extent_tree && repair && (!cur_err || fixed))
8205                         clear_extent_dirty(root->fs_info->excluded_extents,
8206                                            rec->start,
8207                                            rec->start + rec->max_size - 1,
8208                                            GFP_NOFS);
8209                 free(rec);
8210         }
8211 repair_abort:
8212         if (repair) {
8213                 if (ret && ret != -EAGAIN) {
8214                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8215                         exit(1);
8216                 } else if (!ret) {
8217                         struct btrfs_trans_handle *trans;
8218
8219                         root = root->fs_info->extent_root;
8220                         trans = btrfs_start_transaction(root, 1);
8221                         if (IS_ERR(trans)) {
8222                                 ret = PTR_ERR(trans);
8223                                 goto repair_abort;
8224                         }
8225
8226                         btrfs_fix_block_accounting(trans, root);
8227                         ret = btrfs_commit_transaction(trans, root);
8228                         if (ret)
8229                                 goto repair_abort;
8230                 }
8231                 if (err)
8232                         fprintf(stderr, "repaired damaged extent references\n");
8233                 return ret;
8234         }
8235         return err;
8236 }
8237
8238 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8239 {
8240         u64 stripe_size;
8241
8242         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8243                 stripe_size = length;
8244                 stripe_size /= num_stripes;
8245         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8246                 stripe_size = length * 2;
8247                 stripe_size /= num_stripes;
8248         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8249                 stripe_size = length;
8250                 stripe_size /= (num_stripes - 1);
8251         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8252                 stripe_size = length;
8253                 stripe_size /= (num_stripes - 2);
8254         } else {
8255                 stripe_size = length;
8256         }
8257         return stripe_size;
8258 }
8259
8260 /*
8261  * Check the chunk with its block group/dev list ref:
8262  * Return 0 if all refs seems valid.
8263  * Return 1 if part of refs seems valid, need later check for rebuild ref
8264  * like missing block group and needs to search extent tree to rebuild them.
8265  * Return -1 if essential refs are missing and unable to rebuild.
8266  */
8267 static int check_chunk_refs(struct chunk_record *chunk_rec,
8268                             struct block_group_tree *block_group_cache,
8269                             struct device_extent_tree *dev_extent_cache,
8270                             int silent)
8271 {
8272         struct cache_extent *block_group_item;
8273         struct block_group_record *block_group_rec;
8274         struct cache_extent *dev_extent_item;
8275         struct device_extent_record *dev_extent_rec;
8276         u64 devid;
8277         u64 offset;
8278         u64 length;
8279         int metadump_v2 = 0;
8280         int i;
8281         int ret = 0;
8282
8283         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8284                                                chunk_rec->offset,
8285                                                chunk_rec->length);
8286         if (block_group_item) {
8287                 block_group_rec = container_of(block_group_item,
8288                                                struct block_group_record,
8289                                                cache);
8290                 if (chunk_rec->length != block_group_rec->offset ||
8291                     chunk_rec->offset != block_group_rec->objectid ||
8292                     (!metadump_v2 &&
8293                      chunk_rec->type_flags != block_group_rec->flags)) {
8294                         if (!silent)
8295                                 fprintf(stderr,
8296                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8297                                         chunk_rec->objectid,
8298                                         chunk_rec->type,
8299                                         chunk_rec->offset,
8300                                         chunk_rec->length,
8301                                         chunk_rec->offset,
8302                                         chunk_rec->type_flags,
8303                                         block_group_rec->objectid,
8304                                         block_group_rec->type,
8305                                         block_group_rec->offset,
8306                                         block_group_rec->offset,
8307                                         block_group_rec->objectid,
8308                                         block_group_rec->flags);
8309                         ret = -1;
8310                 } else {
8311                         list_del_init(&block_group_rec->list);
8312                         chunk_rec->bg_rec = block_group_rec;
8313                 }
8314         } else {
8315                 if (!silent)
8316                         fprintf(stderr,
8317                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8318                                 chunk_rec->objectid,
8319                                 chunk_rec->type,
8320                                 chunk_rec->offset,
8321                                 chunk_rec->length,
8322                                 chunk_rec->offset,
8323                                 chunk_rec->type_flags);
8324                 ret = 1;
8325         }
8326
8327         if (metadump_v2)
8328                 return ret;
8329
8330         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8331                                     chunk_rec->num_stripes);
8332         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8333                 devid = chunk_rec->stripes[i].devid;
8334                 offset = chunk_rec->stripes[i].offset;
8335                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8336                                                        devid, offset, length);
8337                 if (dev_extent_item) {
8338                         dev_extent_rec = container_of(dev_extent_item,
8339                                                 struct device_extent_record,
8340                                                 cache);
8341                         if (dev_extent_rec->objectid != devid ||
8342                             dev_extent_rec->offset != offset ||
8343                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8344                             dev_extent_rec->length != length) {
8345                                 if (!silent)
8346                                         fprintf(stderr,
8347                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8348                                                 chunk_rec->objectid,
8349                                                 chunk_rec->type,
8350                                                 chunk_rec->offset,
8351                                                 chunk_rec->stripes[i].devid,
8352                                                 chunk_rec->stripes[i].offset,
8353                                                 dev_extent_rec->objectid,
8354                                                 dev_extent_rec->offset,
8355                                                 dev_extent_rec->length);
8356                                 ret = -1;
8357                         } else {
8358                                 list_move(&dev_extent_rec->chunk_list,
8359                                           &chunk_rec->dextents);
8360                         }
8361                 } else {
8362                         if (!silent)
8363                                 fprintf(stderr,
8364                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8365                                         chunk_rec->objectid,
8366                                         chunk_rec->type,
8367                                         chunk_rec->offset,
8368                                         chunk_rec->stripes[i].devid,
8369                                         chunk_rec->stripes[i].offset);
8370                         ret = -1;
8371                 }
8372         }
8373         return ret;
8374 }
8375
8376 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8377 int check_chunks(struct cache_tree *chunk_cache,
8378                  struct block_group_tree *block_group_cache,
8379                  struct device_extent_tree *dev_extent_cache,
8380                  struct list_head *good, struct list_head *bad,
8381                  struct list_head *rebuild, int silent)
8382 {
8383         struct cache_extent *chunk_item;
8384         struct chunk_record *chunk_rec;
8385         struct block_group_record *bg_rec;
8386         struct device_extent_record *dext_rec;
8387         int err;
8388         int ret = 0;
8389
8390         chunk_item = first_cache_extent(chunk_cache);
8391         while (chunk_item) {
8392                 chunk_rec = container_of(chunk_item, struct chunk_record,
8393                                          cache);
8394                 err = check_chunk_refs(chunk_rec, block_group_cache,
8395                                        dev_extent_cache, silent);
8396                 if (err < 0)
8397                         ret = err;
8398                 if (err == 0 && good)
8399                         list_add_tail(&chunk_rec->list, good);
8400                 if (err > 0 && rebuild)
8401                         list_add_tail(&chunk_rec->list, rebuild);
8402                 if (err < 0 && bad)
8403                         list_add_tail(&chunk_rec->list, bad);
8404                 chunk_item = next_cache_extent(chunk_item);
8405         }
8406
8407         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8408                 if (!silent)
8409                         fprintf(stderr,
8410                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8411                                 bg_rec->objectid,
8412                                 bg_rec->offset,
8413                                 bg_rec->flags);
8414                 if (!ret)
8415                         ret = 1;
8416         }
8417
8418         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8419                             chunk_list) {
8420                 if (!silent)
8421                         fprintf(stderr,
8422                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8423                                 dext_rec->objectid,
8424                                 dext_rec->offset,
8425                                 dext_rec->length);
8426                 if (!ret)
8427                         ret = 1;
8428         }
8429         return ret;
8430 }
8431
8432
8433 static int check_device_used(struct device_record *dev_rec,
8434                              struct device_extent_tree *dext_cache)
8435 {
8436         struct cache_extent *cache;
8437         struct device_extent_record *dev_extent_rec;
8438         u64 total_byte = 0;
8439
8440         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8441         while (cache) {
8442                 dev_extent_rec = container_of(cache,
8443                                               struct device_extent_record,
8444                                               cache);
8445                 if (dev_extent_rec->objectid != dev_rec->devid)
8446                         break;
8447
8448                 list_del_init(&dev_extent_rec->device_list);
8449                 total_byte += dev_extent_rec->length;
8450                 cache = next_cache_extent(cache);
8451         }
8452
8453         if (total_byte != dev_rec->byte_used) {
8454                 fprintf(stderr,
8455                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8456                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8457                         dev_rec->type, dev_rec->offset);
8458                 return -1;
8459         } else {
8460                 return 0;
8461         }
8462 }
8463
8464 /* check btrfs_dev_item -> btrfs_dev_extent */
8465 static int check_devices(struct rb_root *dev_cache,
8466                          struct device_extent_tree *dev_extent_cache)
8467 {
8468         struct rb_node *dev_node;
8469         struct device_record *dev_rec;
8470         struct device_extent_record *dext_rec;
8471         int err;
8472         int ret = 0;
8473
8474         dev_node = rb_first(dev_cache);
8475         while (dev_node) {
8476                 dev_rec = container_of(dev_node, struct device_record, node);
8477                 err = check_device_used(dev_rec, dev_extent_cache);
8478                 if (err)
8479                         ret = err;
8480
8481                 dev_node = rb_next(dev_node);
8482         }
8483         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8484                             device_list) {
8485                 fprintf(stderr,
8486                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8487                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8488                 if (!ret)
8489                         ret = 1;
8490         }
8491         return ret;
8492 }
8493
8494 static int add_root_item_to_list(struct list_head *head,
8495                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8496                                   u8 level, u8 drop_level,
8497                                   int level_size, struct btrfs_key *drop_key)
8498 {
8499
8500         struct root_item_record *ri_rec;
8501         ri_rec = malloc(sizeof(*ri_rec));
8502         if (!ri_rec)
8503                 return -ENOMEM;
8504         ri_rec->bytenr = bytenr;
8505         ri_rec->objectid = objectid;
8506         ri_rec->level = level;
8507         ri_rec->level_size = level_size;
8508         ri_rec->drop_level = drop_level;
8509         ri_rec->last_snapshot = last_snapshot;
8510         if (drop_key)
8511                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8512         list_add_tail(&ri_rec->list, head);
8513
8514         return 0;
8515 }
8516
8517 static void free_root_item_list(struct list_head *list)
8518 {
8519         struct root_item_record *ri_rec;
8520
8521         while (!list_empty(list)) {
8522                 ri_rec = list_first_entry(list, struct root_item_record,
8523                                           list);
8524                 list_del_init(&ri_rec->list);
8525                 free(ri_rec);
8526         }
8527 }
8528
8529 static int deal_root_from_list(struct list_head *list,
8530                                struct btrfs_root *root,
8531                                struct block_info *bits,
8532                                int bits_nr,
8533                                struct cache_tree *pending,
8534                                struct cache_tree *seen,
8535                                struct cache_tree *reada,
8536                                struct cache_tree *nodes,
8537                                struct cache_tree *extent_cache,
8538                                struct cache_tree *chunk_cache,
8539                                struct rb_root *dev_cache,
8540                                struct block_group_tree *block_group_cache,
8541                                struct device_extent_tree *dev_extent_cache)
8542 {
8543         int ret = 0;
8544         u64 last;
8545
8546         while (!list_empty(list)) {
8547                 struct root_item_record *rec;
8548                 struct extent_buffer *buf;
8549                 rec = list_entry(list->next,
8550                                  struct root_item_record, list);
8551                 last = 0;
8552                 buf = read_tree_block(root->fs_info->tree_root,
8553                                       rec->bytenr, rec->level_size, 0);
8554                 if (!extent_buffer_uptodate(buf)) {
8555                         free_extent_buffer(buf);
8556                         ret = -EIO;
8557                         break;
8558                 }
8559                 ret = add_root_to_pending(buf, extent_cache, pending,
8560                                     seen, nodes, rec->objectid);
8561                 if (ret < 0)
8562                         break;
8563                 /*
8564                  * To rebuild extent tree, we need deal with snapshot
8565                  * one by one, otherwise we deal with node firstly which
8566                  * can maximize readahead.
8567                  */
8568                 while (1) {
8569                         ret = run_next_block(root, bits, bits_nr, &last,
8570                                              pending, seen, reada, nodes,
8571                                              extent_cache, chunk_cache,
8572                                              dev_cache, block_group_cache,
8573                                              dev_extent_cache, rec);
8574                         if (ret != 0)
8575                                 break;
8576                 }
8577                 free_extent_buffer(buf);
8578                 list_del(&rec->list);
8579                 free(rec);
8580                 if (ret < 0)
8581                         break;
8582         }
8583         while (ret >= 0) {
8584                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8585                                      reada, nodes, extent_cache, chunk_cache,
8586                                      dev_cache, block_group_cache,
8587                                      dev_extent_cache, NULL);
8588                 if (ret != 0) {
8589                         if (ret > 0)
8590                                 ret = 0;
8591                         break;
8592                 }
8593         }
8594         return ret;
8595 }
8596
8597 static int check_chunks_and_extents(struct btrfs_root *root)
8598 {
8599         struct rb_root dev_cache;
8600         struct cache_tree chunk_cache;
8601         struct block_group_tree block_group_cache;
8602         struct device_extent_tree dev_extent_cache;
8603         struct cache_tree extent_cache;
8604         struct cache_tree seen;
8605         struct cache_tree pending;
8606         struct cache_tree reada;
8607         struct cache_tree nodes;
8608         struct extent_io_tree excluded_extents;
8609         struct cache_tree corrupt_blocks;
8610         struct btrfs_path path;
8611         struct btrfs_key key;
8612         struct btrfs_key found_key;
8613         int ret, err = 0;
8614         struct block_info *bits;
8615         int bits_nr;
8616         struct extent_buffer *leaf;
8617         int slot;
8618         struct btrfs_root_item ri;
8619         struct list_head dropping_trees;
8620         struct list_head normal_trees;
8621         struct btrfs_root *root1;
8622         u64 objectid;
8623         u32 level_size;
8624         u8 level;
8625
8626         dev_cache = RB_ROOT;
8627         cache_tree_init(&chunk_cache);
8628         block_group_tree_init(&block_group_cache);
8629         device_extent_tree_init(&dev_extent_cache);
8630
8631         cache_tree_init(&extent_cache);
8632         cache_tree_init(&seen);
8633         cache_tree_init(&pending);
8634         cache_tree_init(&nodes);
8635         cache_tree_init(&reada);
8636         cache_tree_init(&corrupt_blocks);
8637         extent_io_tree_init(&excluded_extents);
8638         INIT_LIST_HEAD(&dropping_trees);
8639         INIT_LIST_HEAD(&normal_trees);
8640
8641         if (repair) {
8642                 root->fs_info->excluded_extents = &excluded_extents;
8643                 root->fs_info->fsck_extent_cache = &extent_cache;
8644                 root->fs_info->free_extent_hook = free_extent_hook;
8645                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8646         }
8647
8648         bits_nr = 1024;
8649         bits = malloc(bits_nr * sizeof(struct block_info));
8650         if (!bits) {
8651                 perror("malloc");
8652                 exit(1);
8653         }
8654
8655         if (ctx.progress_enabled) {
8656                 ctx.tp = TASK_EXTENTS;
8657                 task_start(ctx.info);
8658         }
8659
8660 again:
8661         root1 = root->fs_info->tree_root;
8662         level = btrfs_header_level(root1->node);
8663         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8664                                     root1->node->start, 0, level, 0,
8665                                     root1->nodesize, NULL);
8666         if (ret < 0)
8667                 goto out;
8668         root1 = root->fs_info->chunk_root;
8669         level = btrfs_header_level(root1->node);
8670         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8671                                     root1->node->start, 0, level, 0,
8672                                     root1->nodesize, NULL);
8673         if (ret < 0)
8674                 goto out;
8675         btrfs_init_path(&path);
8676         key.offset = 0;
8677         key.objectid = 0;
8678         key.type = BTRFS_ROOT_ITEM_KEY;
8679         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8680                                         &key, &path, 0, 0);
8681         if (ret < 0)
8682                 goto out;
8683         while(1) {
8684                 leaf = path.nodes[0];
8685                 slot = path.slots[0];
8686                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8687                         ret = btrfs_next_leaf(root, &path);
8688                         if (ret != 0)
8689                                 break;
8690                         leaf = path.nodes[0];
8691                         slot = path.slots[0];
8692                 }
8693                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8694                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8695                         unsigned long offset;
8696                         u64 last_snapshot;
8697
8698                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8699                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8700                         last_snapshot = btrfs_root_last_snapshot(&ri);
8701                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8702                                 level = btrfs_root_level(&ri);
8703                                 level_size = root->nodesize;
8704                                 ret = add_root_item_to_list(&normal_trees,
8705                                                 found_key.objectid,
8706                                                 btrfs_root_bytenr(&ri),
8707                                                 last_snapshot, level,
8708                                                 0, level_size, NULL);
8709                                 if (ret < 0)
8710                                         goto out;
8711                         } else {
8712                                 level = btrfs_root_level(&ri);
8713                                 level_size = root->nodesize;
8714                                 objectid = found_key.objectid;
8715                                 btrfs_disk_key_to_cpu(&found_key,
8716                                                       &ri.drop_progress);
8717                                 ret = add_root_item_to_list(&dropping_trees,
8718                                                 objectid,
8719                                                 btrfs_root_bytenr(&ri),
8720                                                 last_snapshot, level,
8721                                                 ri.drop_level,
8722                                                 level_size, &found_key);
8723                                 if (ret < 0)
8724                                         goto out;
8725                         }
8726                 }
8727                 path.slots[0]++;
8728         }
8729         btrfs_release_path(&path);
8730
8731         /*
8732          * check_block can return -EAGAIN if it fixes something, please keep
8733          * this in mind when dealing with return values from these functions, if
8734          * we get -EAGAIN we want to fall through and restart the loop.
8735          */
8736         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8737                                   &seen, &reada, &nodes, &extent_cache,
8738                                   &chunk_cache, &dev_cache, &block_group_cache,
8739                                   &dev_extent_cache);
8740         if (ret < 0) {
8741                 if (ret == -EAGAIN)
8742                         goto loop;
8743                 goto out;
8744         }
8745         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8746                                   &pending, &seen, &reada, &nodes,
8747                                   &extent_cache, &chunk_cache, &dev_cache,
8748                                   &block_group_cache, &dev_extent_cache);
8749         if (ret < 0) {
8750                 if (ret == -EAGAIN)
8751                         goto loop;
8752                 goto out;
8753         }
8754
8755         ret = check_chunks(&chunk_cache, &block_group_cache,
8756                            &dev_extent_cache, NULL, NULL, NULL, 0);
8757         if (ret) {
8758                 if (ret == -EAGAIN)
8759                         goto loop;
8760                 err = ret;
8761         }
8762
8763         ret = check_extent_refs(root, &extent_cache);
8764         if (ret < 0) {
8765                 if (ret == -EAGAIN)
8766                         goto loop;
8767                 goto out;
8768         }
8769
8770         ret = check_devices(&dev_cache, &dev_extent_cache);
8771         if (ret && err)
8772                 ret = err;
8773
8774 out:
8775         task_stop(ctx.info);
8776         if (repair) {
8777                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8778                 extent_io_tree_cleanup(&excluded_extents);
8779                 root->fs_info->fsck_extent_cache = NULL;
8780                 root->fs_info->free_extent_hook = NULL;
8781                 root->fs_info->corrupt_blocks = NULL;
8782                 root->fs_info->excluded_extents = NULL;
8783         }
8784         free(bits);
8785         free_chunk_cache_tree(&chunk_cache);
8786         free_device_cache_tree(&dev_cache);
8787         free_block_group_tree(&block_group_cache);
8788         free_device_extent_tree(&dev_extent_cache);
8789         free_extent_cache_tree(&seen);
8790         free_extent_cache_tree(&pending);
8791         free_extent_cache_tree(&reada);
8792         free_extent_cache_tree(&nodes);
8793         return ret;
8794 loop:
8795         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8796         free_extent_cache_tree(&seen);
8797         free_extent_cache_tree(&pending);
8798         free_extent_cache_tree(&reada);
8799         free_extent_cache_tree(&nodes);
8800         free_chunk_cache_tree(&chunk_cache);
8801         free_block_group_tree(&block_group_cache);
8802         free_device_cache_tree(&dev_cache);
8803         free_device_extent_tree(&dev_extent_cache);
8804         free_extent_record_cache(root->fs_info, &extent_cache);
8805         free_root_item_list(&normal_trees);
8806         free_root_item_list(&dropping_trees);
8807         extent_io_tree_cleanup(&excluded_extents);
8808         goto again;
8809 }
8810
8811 /*
8812  * Check backrefs of a tree block given by @bytenr or @eb.
8813  *
8814  * @root:       the root containing the @bytenr or @eb
8815  * @eb:         tree block extent buffer, can be NULL
8816  * @bytenr:     bytenr of the tree block to search
8817  * @level:      tree level of the tree block
8818  * @owner:      owner of the tree block
8819  *
8820  * Return >0 for any error found and output error message
8821  * Return 0 for no error found
8822  */
8823 static int check_tree_block_ref(struct btrfs_root *root,
8824                                 struct extent_buffer *eb, u64 bytenr,
8825                                 int level, u64 owner)
8826 {
8827         struct btrfs_key key;
8828         struct btrfs_root *extent_root = root->fs_info->extent_root;
8829         struct btrfs_path path;
8830         struct btrfs_extent_item *ei;
8831         struct btrfs_extent_inline_ref *iref;
8832         struct extent_buffer *leaf;
8833         unsigned long end;
8834         unsigned long ptr;
8835         int slot;
8836         int skinny_level;
8837         int type;
8838         u32 nodesize = root->nodesize;
8839         u32 item_size;
8840         u64 offset;
8841         int found_ref = 0;
8842         int err = 0;
8843         int ret;
8844
8845         btrfs_init_path(&path);
8846         key.objectid = bytenr;
8847         if (btrfs_fs_incompat(root->fs_info,
8848                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8849                 key.type = BTRFS_METADATA_ITEM_KEY;
8850         else
8851                 key.type = BTRFS_EXTENT_ITEM_KEY;
8852         key.offset = (u64)-1;
8853
8854         /* Search for the backref in extent tree */
8855         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8856         if (ret < 0) {
8857                 err |= BACKREF_MISSING;
8858                 goto out;
8859         }
8860         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8861         if (ret) {
8862                 err |= BACKREF_MISSING;
8863                 goto out;
8864         }
8865
8866         leaf = path.nodes[0];
8867         slot = path.slots[0];
8868         btrfs_item_key_to_cpu(leaf, &key, slot);
8869
8870         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8871
8872         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8873                 skinny_level = (int)key.offset;
8874                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8875         } else {
8876                 struct btrfs_tree_block_info *info;
8877
8878                 info = (struct btrfs_tree_block_info *)(ei + 1);
8879                 skinny_level = btrfs_tree_block_level(leaf, info);
8880                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8881         }
8882
8883         if (eb) {
8884                 u64 header_gen;
8885                 u64 extent_gen;
8886
8887                 if (!(btrfs_extent_flags(leaf, ei) &
8888                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8889                         error(
8890                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8891                                 key.objectid, nodesize,
8892                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8893                         err = BACKREF_MISMATCH;
8894                 }
8895                 header_gen = btrfs_header_generation(eb);
8896                 extent_gen = btrfs_extent_generation(leaf, ei);
8897                 if (header_gen != extent_gen) {
8898                         error(
8899         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8900                                 key.objectid, nodesize, header_gen,
8901                                 extent_gen);
8902                         err = BACKREF_MISMATCH;
8903                 }
8904                 if (level != skinny_level) {
8905                         error(
8906                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8907                                 key.objectid, nodesize, level, skinny_level);
8908                         err = BACKREF_MISMATCH;
8909                 }
8910                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8911                         error(
8912                         "extent[%llu %u] is referred by other roots than %llu",
8913                                 key.objectid, nodesize, root->objectid);
8914                         err = BACKREF_MISMATCH;
8915                 }
8916         }
8917
8918         /*
8919          * Iterate the extent/metadata item to find the exact backref
8920          */
8921         item_size = btrfs_item_size_nr(leaf, slot);
8922         ptr = (unsigned long)iref;
8923         end = (unsigned long)ei + item_size;
8924         while (ptr < end) {
8925                 iref = (struct btrfs_extent_inline_ref *)ptr;
8926                 type = btrfs_extent_inline_ref_type(leaf, iref);
8927                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8928
8929                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8930                         (offset == root->objectid || offset == owner)) {
8931                         found_ref = 1;
8932                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8933                         /* Check if the backref points to valid referencer */
8934                         found_ref = !check_tree_block_ref(root, NULL, offset,
8935                                                           level + 1, owner);
8936                 }
8937
8938                 if (found_ref)
8939                         break;
8940                 ptr += btrfs_extent_inline_ref_size(type);
8941         }
8942
8943         /*
8944          * Inlined extent item doesn't have what we need, check
8945          * TREE_BLOCK_REF_KEY
8946          */
8947         if (!found_ref) {
8948                 btrfs_release_path(&path);
8949                 key.objectid = bytenr;
8950                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8951                 key.offset = root->objectid;
8952
8953                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8954                 if (!ret)
8955                         found_ref = 1;
8956         }
8957         if (!found_ref)
8958                 err |= BACKREF_MISSING;
8959 out:
8960         btrfs_release_path(&path);
8961         if (eb && (err & BACKREF_MISSING))
8962                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8963                         bytenr, nodesize, owner, level);
8964         return err;
8965 }
8966
8967 /*
8968  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8969  *
8970  * Return >0 any error found and output error message
8971  * Return 0 for no error found
8972  */
8973 static int check_extent_data_item(struct btrfs_root *root,
8974                                   struct extent_buffer *eb, int slot)
8975 {
8976         struct btrfs_file_extent_item *fi;
8977         struct btrfs_path path;
8978         struct btrfs_root *extent_root = root->fs_info->extent_root;
8979         struct btrfs_key fi_key;
8980         struct btrfs_key dbref_key;
8981         struct extent_buffer *leaf;
8982         struct btrfs_extent_item *ei;
8983         struct btrfs_extent_inline_ref *iref;
8984         struct btrfs_extent_data_ref *dref;
8985         u64 owner;
8986         u64 file_extent_gen;
8987         u64 disk_bytenr;
8988         u64 disk_num_bytes;
8989         u64 extent_num_bytes;
8990         u64 extent_flags;
8991         u64 extent_gen;
8992         u32 item_size;
8993         unsigned long end;
8994         unsigned long ptr;
8995         int type;
8996         u64 ref_root;
8997         int found_dbackref = 0;
8998         int err = 0;
8999         int ret;
9000
9001         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9002         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9003         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9004
9005         /* Nothing to check for hole and inline data extents */
9006         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9007             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9008                 return 0;
9009
9010         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9011         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9012         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9013
9014         /* Check unaligned disk_num_bytes and num_bytes */
9015         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9016                 error(
9017 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9018                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9019                         root->sectorsize);
9020                 err |= BYTES_UNALIGNED;
9021         } else {
9022                 data_bytes_allocated += disk_num_bytes;
9023         }
9024         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9025                 error(
9026 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9027                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9028                         root->sectorsize);
9029                 err |= BYTES_UNALIGNED;
9030         } else {
9031                 data_bytes_referenced += extent_num_bytes;
9032         }
9033         owner = btrfs_header_owner(eb);
9034
9035         /* Check the extent item of the file extent in extent tree */
9036         btrfs_init_path(&path);
9037         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9038         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9039         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9040
9041         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9042         if (ret) {
9043                 err |= BACKREF_MISSING;
9044                 goto error;
9045         }
9046
9047         leaf = path.nodes[0];
9048         slot = path.slots[0];
9049         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9050
9051         extent_flags = btrfs_extent_flags(leaf, ei);
9052         extent_gen = btrfs_extent_generation(leaf, ei);
9053
9054         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9055                 error(
9056                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9057                     disk_bytenr, disk_num_bytes,
9058                     BTRFS_EXTENT_FLAG_DATA);
9059                 err |= BACKREF_MISMATCH;
9060         }
9061
9062         if (file_extent_gen < extent_gen) {
9063                 error(
9064 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9065                         disk_bytenr, disk_num_bytes, file_extent_gen,
9066                         extent_gen);
9067                 err |= BACKREF_MISMATCH;
9068         }
9069
9070         /* Check data backref inside that extent item */
9071         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9072         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9073         ptr = (unsigned long)iref;
9074         end = (unsigned long)ei + item_size;
9075         while (ptr < end) {
9076                 iref = (struct btrfs_extent_inline_ref *)ptr;
9077                 type = btrfs_extent_inline_ref_type(leaf, iref);
9078                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9079
9080                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9081                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9082                         if (ref_root == owner || ref_root == root->objectid)
9083                                 found_dbackref = 1;
9084                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9085                         found_dbackref = !check_tree_block_ref(root, NULL,
9086                                 btrfs_extent_inline_ref_offset(leaf, iref),
9087                                 0, owner);
9088                 }
9089
9090                 if (found_dbackref)
9091                         break;
9092                 ptr += btrfs_extent_inline_ref_size(type);
9093         }
9094
9095         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9096         if (!found_dbackref) {
9097                 btrfs_release_path(&path);
9098
9099                 btrfs_init_path(&path);
9100                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9101                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9102                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9103                                 fi_key.objectid, fi_key.offset);
9104
9105                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9106                                         &dbref_key, &path, 0, 0);
9107                 if (!ret)
9108                         found_dbackref = 1;
9109         }
9110
9111         if (!found_dbackref)
9112                 err |= BACKREF_MISSING;
9113 error:
9114         btrfs_release_path(&path);
9115         if (err & BACKREF_MISSING) {
9116                 error("data extent[%llu %llu] backref lost",
9117                       disk_bytenr, disk_num_bytes);
9118         }
9119         return err;
9120 }
9121
9122 /*
9123  * Get real tree block level for the case like shared block
9124  * Return >= 0 as tree level
9125  * Return <0 for error
9126  */
9127 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9128 {
9129         struct extent_buffer *eb;
9130         struct btrfs_path path;
9131         struct btrfs_key key;
9132         struct btrfs_extent_item *ei;
9133         u64 flags;
9134         u64 transid;
9135         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9136         u8 backref_level;
9137         u8 header_level;
9138         int ret;
9139
9140         /* Search extent tree for extent generation and level */
9141         key.objectid = bytenr;
9142         key.type = BTRFS_METADATA_ITEM_KEY;
9143         key.offset = (u64)-1;
9144
9145         btrfs_init_path(&path);
9146         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9147         if (ret < 0)
9148                 goto release_out;
9149         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9150         if (ret < 0)
9151                 goto release_out;
9152         if (ret > 0) {
9153                 ret = -ENOENT;
9154                 goto release_out;
9155         }
9156
9157         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9158         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9159                             struct btrfs_extent_item);
9160         flags = btrfs_extent_flags(path.nodes[0], ei);
9161         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9162                 ret = -ENOENT;
9163                 goto release_out;
9164         }
9165
9166         /* Get transid for later read_tree_block() check */
9167         transid = btrfs_extent_generation(path.nodes[0], ei);
9168
9169         /* Get backref level as one source */
9170         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9171                 backref_level = key.offset;
9172         } else {
9173                 struct btrfs_tree_block_info *info;
9174
9175                 info = (struct btrfs_tree_block_info *)(ei + 1);
9176                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9177         }
9178         btrfs_release_path(&path);
9179
9180         /* Get level from tree block as an alternative source */
9181         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9182         if (!extent_buffer_uptodate(eb)) {
9183                 free_extent_buffer(eb);
9184                 return -EIO;
9185         }
9186         header_level = btrfs_header_level(eb);
9187         free_extent_buffer(eb);
9188
9189         if (header_level != backref_level)
9190                 return -EIO;
9191         return header_level;
9192
9193 release_out:
9194         btrfs_release_path(&path);
9195         return ret;
9196 }
9197
9198 /*
9199  * Check if a tree block backref is valid (points to a valid tree block)
9200  * if level == -1, level will be resolved
9201  * Return >0 for any error found and print error message
9202  */
9203 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9204                                     u64 bytenr, int level)
9205 {
9206         struct btrfs_root *root;
9207         struct btrfs_key key;
9208         struct btrfs_path path;
9209         struct extent_buffer *eb;
9210         struct extent_buffer *node;
9211         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9212         int err = 0;
9213         int ret;
9214
9215         /* Query level for level == -1 special case */
9216         if (level == -1)
9217                 level = query_tree_block_level(fs_info, bytenr);
9218         if (level < 0) {
9219                 err |= REFERENCER_MISSING;
9220                 goto out;
9221         }
9222
9223         key.objectid = root_id;
9224         key.type = BTRFS_ROOT_ITEM_KEY;
9225         key.offset = (u64)-1;
9226
9227         root = btrfs_read_fs_root(fs_info, &key);
9228         if (IS_ERR(root)) {
9229                 err |= REFERENCER_MISSING;
9230                 goto out;
9231         }
9232
9233         /* Read out the tree block to get item/node key */
9234         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9235         if (!extent_buffer_uptodate(eb)) {
9236                 err |= REFERENCER_MISSING;
9237                 free_extent_buffer(eb);
9238                 goto out;
9239         }
9240
9241         /* Empty tree, no need to check key */
9242         if (!btrfs_header_nritems(eb) && !level) {
9243                 free_extent_buffer(eb);
9244                 goto out;
9245         }
9246
9247         if (level)
9248                 btrfs_node_key_to_cpu(eb, &key, 0);
9249         else
9250                 btrfs_item_key_to_cpu(eb, &key, 0);
9251
9252         free_extent_buffer(eb);
9253
9254         btrfs_init_path(&path);
9255         path.lowest_level = level;
9256         /* Search with the first key, to ensure we can reach it */
9257         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9258         if (ret < 0) {
9259                 err |= REFERENCER_MISSING;
9260                 goto release_out;
9261         }
9262
9263         node = path.nodes[level];
9264         if (btrfs_header_bytenr(node) != bytenr) {
9265                 error(
9266         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9267                         bytenr, nodesize, bytenr,
9268                         btrfs_header_bytenr(node));
9269                 err |= REFERENCER_MISMATCH;
9270         }
9271         if (btrfs_header_level(node) != level) {
9272                 error(
9273         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9274                         bytenr, nodesize, level,
9275                         btrfs_header_level(node));
9276                 err |= REFERENCER_MISMATCH;
9277         }
9278
9279 release_out:
9280         btrfs_release_path(&path);
9281 out:
9282         if (err & REFERENCER_MISSING) {
9283                 if (level < 0)
9284                         error("extent [%llu %d] lost referencer (owner: %llu)",
9285                                 bytenr, nodesize, root_id);
9286                 else
9287                         error(
9288                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9289                                 bytenr, nodesize, root_id, level);
9290         }
9291
9292         return err;
9293 }
9294
9295 /*
9296  * Check referencer for shared block backref
9297  * If level == -1, this function will resolve the level.
9298  */
9299 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9300                                      u64 parent, u64 bytenr, int level)
9301 {
9302         struct extent_buffer *eb;
9303         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9304         u32 nr;
9305         int found_parent = 0;
9306         int i;
9307
9308         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9309         if (!extent_buffer_uptodate(eb))
9310                 goto out;
9311
9312         if (level == -1)
9313                 level = query_tree_block_level(fs_info, bytenr);
9314         if (level < 0)
9315                 goto out;
9316
9317         if (level + 1 != btrfs_header_level(eb))
9318                 goto out;
9319
9320         nr = btrfs_header_nritems(eb);
9321         for (i = 0; i < nr; i++) {
9322                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9323                         found_parent = 1;
9324                         break;
9325                 }
9326         }
9327 out:
9328         free_extent_buffer(eb);
9329         if (!found_parent) {
9330                 error(
9331         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9332                         bytenr, nodesize, parent, level);
9333                 return REFERENCER_MISSING;
9334         }
9335         return 0;
9336 }
9337
9338 /*
9339  * Check referencer for normal (inlined) data ref
9340  * If len == 0, it will be resolved by searching in extent tree
9341  */
9342 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9343                                      u64 root_id, u64 objectid, u64 offset,
9344                                      u64 bytenr, u64 len, u32 count)
9345 {
9346         struct btrfs_root *root;
9347         struct btrfs_root *extent_root = fs_info->extent_root;
9348         struct btrfs_key key;
9349         struct btrfs_path path;
9350         struct extent_buffer *leaf;
9351         struct btrfs_file_extent_item *fi;
9352         u32 found_count = 0;
9353         int slot;
9354         int ret = 0;
9355
9356         if (!len) {
9357                 key.objectid = bytenr;
9358                 key.type = BTRFS_EXTENT_ITEM_KEY;
9359                 key.offset = (u64)-1;
9360
9361                 btrfs_init_path(&path);
9362                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9363                 if (ret < 0)
9364                         goto out;
9365                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9366                 if (ret)
9367                         goto out;
9368                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9369                 if (key.objectid != bytenr ||
9370                     key.type != BTRFS_EXTENT_ITEM_KEY)
9371                         goto out;
9372                 len = key.offset;
9373                 btrfs_release_path(&path);
9374         }
9375         key.objectid = root_id;
9376         key.type = BTRFS_ROOT_ITEM_KEY;
9377         key.offset = (u64)-1;
9378         btrfs_init_path(&path);
9379
9380         root = btrfs_read_fs_root(fs_info, &key);
9381         if (IS_ERR(root))
9382                 goto out;
9383
9384         key.objectid = objectid;
9385         key.type = BTRFS_EXTENT_DATA_KEY;
9386         /*
9387          * It can be nasty as data backref offset is
9388          * file offset - file extent offset, which is smaller or
9389          * equal to original backref offset.  The only special case is
9390          * overflow.  So we need to special check and do further search.
9391          */
9392         key.offset = offset & (1ULL << 63) ? 0 : offset;
9393
9394         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9395         if (ret < 0)
9396                 goto out;
9397
9398         /*
9399          * Search afterwards to get correct one
9400          * NOTE: As we must do a comprehensive check on the data backref to
9401          * make sure the dref count also matches, we must iterate all file
9402          * extents for that inode.
9403          */
9404         while (1) {
9405                 leaf = path.nodes[0];
9406                 slot = path.slots[0];
9407
9408                 btrfs_item_key_to_cpu(leaf, &key, slot);
9409                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9410                         break;
9411                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9412                 /*
9413                  * Except normal disk bytenr and disk num bytes, we still
9414                  * need to do extra check on dbackref offset as
9415                  * dbackref offset = file_offset - file_extent_offset
9416                  */
9417                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9418                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9419                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9420                     offset)
9421                         found_count++;
9422
9423                 ret = btrfs_next_item(root, &path);
9424                 if (ret)
9425                         break;
9426         }
9427 out:
9428         btrfs_release_path(&path);
9429         if (found_count != count) {
9430                 error(
9431 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9432                         bytenr, len, root_id, objectid, offset, count, found_count);
9433                 return REFERENCER_MISSING;
9434         }
9435         return 0;
9436 }
9437
9438 /*
9439  * Check if the referencer of a shared data backref exists
9440  */
9441 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9442                                      u64 parent, u64 bytenr)
9443 {
9444         struct extent_buffer *eb;
9445         struct btrfs_key key;
9446         struct btrfs_file_extent_item *fi;
9447         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9448         u32 nr;
9449         int found_parent = 0;
9450         int i;
9451
9452         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9453         if (!extent_buffer_uptodate(eb))
9454                 goto out;
9455
9456         nr = btrfs_header_nritems(eb);
9457         for (i = 0; i < nr; i++) {
9458                 btrfs_item_key_to_cpu(eb, &key, i);
9459                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9460                         continue;
9461
9462                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9463                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9464                         continue;
9465
9466                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9467                         found_parent = 1;
9468                         break;
9469                 }
9470         }
9471
9472 out:
9473         free_extent_buffer(eb);
9474         if (!found_parent) {
9475                 error("shared extent %llu referencer lost (parent: %llu)",
9476                         bytenr, parent);
9477                 return REFERENCER_MISSING;
9478         }
9479         return 0;
9480 }
9481
9482 /*
9483  * This function will check a given extent item, including its backref and
9484  * itself (like crossing stripe boundary and type)
9485  *
9486  * Since we don't use extent_record anymore, introduce new error bit
9487  */
9488 static int check_extent_item(struct btrfs_fs_info *fs_info,
9489                              struct extent_buffer *eb, int slot)
9490 {
9491         struct btrfs_extent_item *ei;
9492         struct btrfs_extent_inline_ref *iref;
9493         struct btrfs_extent_data_ref *dref;
9494         unsigned long end;
9495         unsigned long ptr;
9496         int type;
9497         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9498         u32 item_size = btrfs_item_size_nr(eb, slot);
9499         u64 flags;
9500         u64 offset;
9501         int metadata = 0;
9502         int level;
9503         struct btrfs_key key;
9504         int ret;
9505         int err = 0;
9506
9507         btrfs_item_key_to_cpu(eb, &key, slot);
9508         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9509                 bytes_used += key.offset;
9510         else
9511                 bytes_used += nodesize;
9512
9513         if (item_size < sizeof(*ei)) {
9514                 /*
9515                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9516                  * old thing when on disk format is still un-determined.
9517                  * No need to care about it anymore
9518                  */
9519                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9520                 return -ENOTTY;
9521         }
9522
9523         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9524         flags = btrfs_extent_flags(eb, ei);
9525
9526         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9527                 metadata = 1;
9528         if (metadata && check_crossing_stripes(global_info, key.objectid,
9529                                                eb->len)) {
9530                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9531                       key.objectid, key.objectid + nodesize);
9532                 err |= CROSSING_STRIPE_BOUNDARY;
9533         }
9534
9535         ptr = (unsigned long)(ei + 1);
9536
9537         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9538                 /* Old EXTENT_ITEM metadata */
9539                 struct btrfs_tree_block_info *info;
9540
9541                 info = (struct btrfs_tree_block_info *)ptr;
9542                 level = btrfs_tree_block_level(eb, info);
9543                 ptr += sizeof(struct btrfs_tree_block_info);
9544         } else {
9545                 /* New METADATA_ITEM */
9546                 level = key.offset;
9547         }
9548         end = (unsigned long)ei + item_size;
9549
9550         if (ptr >= end) {
9551                 err |= ITEM_SIZE_MISMATCH;
9552                 goto out;
9553         }
9554
9555         /* Now check every backref in this extent item */
9556 next:
9557         iref = (struct btrfs_extent_inline_ref *)ptr;
9558         type = btrfs_extent_inline_ref_type(eb, iref);
9559         offset = btrfs_extent_inline_ref_offset(eb, iref);
9560         switch (type) {
9561         case BTRFS_TREE_BLOCK_REF_KEY:
9562                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9563                                                level);
9564                 err |= ret;
9565                 break;
9566         case BTRFS_SHARED_BLOCK_REF_KEY:
9567                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9568                                                  level);
9569                 err |= ret;
9570                 break;
9571         case BTRFS_EXTENT_DATA_REF_KEY:
9572                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9573                 ret = check_extent_data_backref(fs_info,
9574                                 btrfs_extent_data_ref_root(eb, dref),
9575                                 btrfs_extent_data_ref_objectid(eb, dref),
9576                                 btrfs_extent_data_ref_offset(eb, dref),
9577                                 key.objectid, key.offset,
9578                                 btrfs_extent_data_ref_count(eb, dref));
9579                 err |= ret;
9580                 break;
9581         case BTRFS_SHARED_DATA_REF_KEY:
9582                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9583                 err |= ret;
9584                 break;
9585         default:
9586                 error("extent[%llu %d %llu] has unknown ref type: %d",
9587                         key.objectid, key.type, key.offset, type);
9588                 err |= UNKNOWN_TYPE;
9589                 goto out;
9590         }
9591
9592         ptr += btrfs_extent_inline_ref_size(type);
9593         if (ptr < end)
9594                 goto next;
9595
9596 out:
9597         return err;
9598 }
9599
9600 /*
9601  * Check if a dev extent item is referred correctly by its chunk
9602  */
9603 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9604                                  struct extent_buffer *eb, int slot)
9605 {
9606         struct btrfs_root *chunk_root = fs_info->chunk_root;
9607         struct btrfs_dev_extent *ptr;
9608         struct btrfs_path path;
9609         struct btrfs_key chunk_key;
9610         struct btrfs_key devext_key;
9611         struct btrfs_chunk *chunk;
9612         struct extent_buffer *l;
9613         int num_stripes;
9614         u64 length;
9615         int i;
9616         int found_chunk = 0;
9617         int ret;
9618
9619         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9620         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9621         length = btrfs_dev_extent_length(eb, ptr);
9622
9623         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9624         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9625         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9626
9627         btrfs_init_path(&path);
9628         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9629         if (ret)
9630                 goto out;
9631
9632         l = path.nodes[0];
9633         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9634         if (btrfs_chunk_length(l, chunk) != length)
9635                 goto out;
9636
9637         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9638         for (i = 0; i < num_stripes; i++) {
9639                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9640                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9641
9642                 if (devid == devext_key.objectid &&
9643                     offset == devext_key.offset) {
9644                         found_chunk = 1;
9645                         break;
9646                 }
9647         }
9648 out:
9649         btrfs_release_path(&path);
9650         if (!found_chunk) {
9651                 error(
9652                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9653                         devext_key.objectid, devext_key.offset, length);
9654                 return REFERENCER_MISSING;
9655         }
9656         return 0;
9657 }
9658
9659 /*
9660  * Check if the used space is correct with the dev item
9661  */
9662 static int check_dev_item(struct btrfs_fs_info *fs_info,
9663                           struct extent_buffer *eb, int slot)
9664 {
9665         struct btrfs_root *dev_root = fs_info->dev_root;
9666         struct btrfs_dev_item *dev_item;
9667         struct btrfs_path path;
9668         struct btrfs_key key;
9669         struct btrfs_dev_extent *ptr;
9670         u64 dev_id;
9671         u64 used;
9672         u64 total = 0;
9673         int ret;
9674
9675         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9676         dev_id = btrfs_device_id(eb, dev_item);
9677         used = btrfs_device_bytes_used(eb, dev_item);
9678
9679         key.objectid = dev_id;
9680         key.type = BTRFS_DEV_EXTENT_KEY;
9681         key.offset = 0;
9682
9683         btrfs_init_path(&path);
9684         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9685         if (ret < 0) {
9686                 btrfs_item_key_to_cpu(eb, &key, slot);
9687                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9688                         key.objectid, key.type, key.offset);
9689                 btrfs_release_path(&path);
9690                 return REFERENCER_MISSING;
9691         }
9692
9693         /* Iterate dev_extents to calculate the used space of a device */
9694         while (1) {
9695                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9696
9697                 if (key.objectid > dev_id)
9698                         break;
9699                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9700                         goto next;
9701
9702                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9703                                      struct btrfs_dev_extent);
9704                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9705 next:
9706                 ret = btrfs_next_item(dev_root, &path);
9707                 if (ret)
9708                         break;
9709         }
9710         btrfs_release_path(&path);
9711
9712         if (used != total) {
9713                 btrfs_item_key_to_cpu(eb, &key, slot);
9714                 error(
9715 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9716                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9717                         BTRFS_DEV_EXTENT_KEY, dev_id);
9718                 return ACCOUNTING_MISMATCH;
9719         }
9720         return 0;
9721 }
9722
9723 /*
9724  * Check a block group item with its referener (chunk) and its used space
9725  * with extent/metadata item
9726  */
9727 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9728                                   struct extent_buffer *eb, int slot)
9729 {
9730         struct btrfs_root *extent_root = fs_info->extent_root;
9731         struct btrfs_root *chunk_root = fs_info->chunk_root;
9732         struct btrfs_block_group_item *bi;
9733         struct btrfs_block_group_item bg_item;
9734         struct btrfs_path path;
9735         struct btrfs_key bg_key;
9736         struct btrfs_key chunk_key;
9737         struct btrfs_key extent_key;
9738         struct btrfs_chunk *chunk;
9739         struct extent_buffer *leaf;
9740         struct btrfs_extent_item *ei;
9741         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9742         u64 flags;
9743         u64 bg_flags;
9744         u64 used;
9745         u64 total = 0;
9746         int ret;
9747         int err = 0;
9748
9749         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9750         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9751         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9752         used = btrfs_block_group_used(&bg_item);
9753         bg_flags = btrfs_block_group_flags(&bg_item);
9754
9755         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9756         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9757         chunk_key.offset = bg_key.objectid;
9758
9759         btrfs_init_path(&path);
9760         /* Search for the referencer chunk */
9761         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9762         if (ret) {
9763                 error(
9764                 "block group[%llu %llu] did not find the related chunk item",
9765                         bg_key.objectid, bg_key.offset);
9766                 err |= REFERENCER_MISSING;
9767         } else {
9768                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9769                                         struct btrfs_chunk);
9770                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9771                                                 bg_key.offset) {
9772                         error(
9773         "block group[%llu %llu] related chunk item length does not match",
9774                                 bg_key.objectid, bg_key.offset);
9775                         err |= REFERENCER_MISMATCH;
9776                 }
9777         }
9778         btrfs_release_path(&path);
9779
9780         /* Search from the block group bytenr */
9781         extent_key.objectid = bg_key.objectid;
9782         extent_key.type = 0;
9783         extent_key.offset = 0;
9784
9785         btrfs_init_path(&path);
9786         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9787         if (ret < 0)
9788                 goto out;
9789
9790         /* Iterate extent tree to account used space */
9791         while (1) {
9792                 leaf = path.nodes[0];
9793                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9794                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9795                         break;
9796
9797                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9798                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9799                         goto next;
9800                 if (extent_key.objectid < bg_key.objectid)
9801                         goto next;
9802
9803                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9804                         total += nodesize;
9805                 else
9806                         total += extent_key.offset;
9807
9808                 ei = btrfs_item_ptr(leaf, path.slots[0],
9809                                     struct btrfs_extent_item);
9810                 flags = btrfs_extent_flags(leaf, ei);
9811                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9812                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9813                                 error(
9814                         "bad extent[%llu, %llu) type mismatch with chunk",
9815                                         extent_key.objectid,
9816                                         extent_key.objectid + extent_key.offset);
9817                                 err |= CHUNK_TYPE_MISMATCH;
9818                         }
9819                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9820                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9821                                     BTRFS_BLOCK_GROUP_METADATA))) {
9822                                 error(
9823                         "bad extent[%llu, %llu) type mismatch with chunk",
9824                                         extent_key.objectid,
9825                                         extent_key.objectid + nodesize);
9826                                 err |= CHUNK_TYPE_MISMATCH;
9827                         }
9828                 }
9829 next:
9830                 ret = btrfs_next_item(extent_root, &path);
9831                 if (ret)
9832                         break;
9833         }
9834
9835 out:
9836         btrfs_release_path(&path);
9837
9838         if (total != used) {
9839                 error(
9840                 "block group[%llu %llu] used %llu but extent items used %llu",
9841                         bg_key.objectid, bg_key.offset, used, total);
9842                 err |= ACCOUNTING_MISMATCH;
9843         }
9844         return err;
9845 }
9846
9847 /*
9848  * Check a chunk item.
9849  * Including checking all referred dev_extents and block group
9850  */
9851 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9852                             struct extent_buffer *eb, int slot)
9853 {
9854         struct btrfs_root *extent_root = fs_info->extent_root;
9855         struct btrfs_root *dev_root = fs_info->dev_root;
9856         struct btrfs_path path;
9857         struct btrfs_key chunk_key;
9858         struct btrfs_key bg_key;
9859         struct btrfs_key devext_key;
9860         struct btrfs_chunk *chunk;
9861         struct extent_buffer *leaf;
9862         struct btrfs_block_group_item *bi;
9863         struct btrfs_block_group_item bg_item;
9864         struct btrfs_dev_extent *ptr;
9865         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9866         u64 length;
9867         u64 chunk_end;
9868         u64 type;
9869         u64 profile;
9870         int num_stripes;
9871         u64 offset;
9872         u64 objectid;
9873         int i;
9874         int ret;
9875         int err = 0;
9876
9877         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9878         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9879         length = btrfs_chunk_length(eb, chunk);
9880         chunk_end = chunk_key.offset + length;
9881         if (!IS_ALIGNED(length, sectorsize)) {
9882                 error("chunk[%llu %llu) not aligned to %u",
9883                         chunk_key.offset, chunk_end, sectorsize);
9884                 err |= BYTES_UNALIGNED;
9885                 goto out;
9886         }
9887
9888         type = btrfs_chunk_type(eb, chunk);
9889         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9890         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9891                 error("chunk[%llu %llu) has no chunk type",
9892                         chunk_key.offset, chunk_end);
9893                 err |= UNKNOWN_TYPE;
9894         }
9895         if (profile && (profile & (profile - 1))) {
9896                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9897                         chunk_key.offset, chunk_end, profile);
9898                 err |= UNKNOWN_TYPE;
9899         }
9900
9901         bg_key.objectid = chunk_key.offset;
9902         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9903         bg_key.offset = length;
9904
9905         btrfs_init_path(&path);
9906         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9907         if (ret) {
9908                 error(
9909                 "chunk[%llu %llu) did not find the related block group item",
9910                         chunk_key.offset, chunk_end);
9911                 err |= REFERENCER_MISSING;
9912         } else{
9913                 leaf = path.nodes[0];
9914                 bi = btrfs_item_ptr(leaf, path.slots[0],
9915                                     struct btrfs_block_group_item);
9916                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9917                                    sizeof(bg_item));
9918                 if (btrfs_block_group_flags(&bg_item) != type) {
9919                         error(
9920 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9921                                 chunk_key.offset, chunk_end, type,
9922                                 btrfs_block_group_flags(&bg_item));
9923                         err |= REFERENCER_MISSING;
9924                 }
9925         }
9926
9927         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9928         for (i = 0; i < num_stripes; i++) {
9929                 btrfs_release_path(&path);
9930                 btrfs_init_path(&path);
9931                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9932                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9933                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9934
9935                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9936                                         0, 0);
9937                 if (ret)
9938                         goto not_match_dev;
9939
9940                 leaf = path.nodes[0];
9941                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9942                                      struct btrfs_dev_extent);
9943                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9944                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9945                 if (objectid != chunk_key.objectid ||
9946                     offset != chunk_key.offset ||
9947                     btrfs_dev_extent_length(leaf, ptr) != length)
9948                         goto not_match_dev;
9949                 continue;
9950 not_match_dev:
9951                 err |= BACKREF_MISSING;
9952                 error(
9953                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9954                         chunk_key.objectid, chunk_end, i);
9955                 continue;
9956         }
9957         btrfs_release_path(&path);
9958 out:
9959         return err;
9960 }
9961
9962 /*
9963  * Main entry function to check known items and update related accounting info
9964  */
9965 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9966 {
9967         struct btrfs_fs_info *fs_info = root->fs_info;
9968         struct btrfs_key key;
9969         int slot = 0;
9970         int type;
9971         struct btrfs_extent_data_ref *dref;
9972         int ret;
9973         int err = 0;
9974
9975 next:
9976         btrfs_item_key_to_cpu(eb, &key, slot);
9977         type = key.type;
9978
9979         switch (type) {
9980         case BTRFS_EXTENT_DATA_KEY:
9981                 ret = check_extent_data_item(root, eb, slot);
9982                 err |= ret;
9983                 break;
9984         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9985                 ret = check_block_group_item(fs_info, eb, slot);
9986                 err |= ret;
9987                 break;
9988         case BTRFS_DEV_ITEM_KEY:
9989                 ret = check_dev_item(fs_info, eb, slot);
9990                 err |= ret;
9991                 break;
9992         case BTRFS_CHUNK_ITEM_KEY:
9993                 ret = check_chunk_item(fs_info, eb, slot);
9994                 err |= ret;
9995                 break;
9996         case BTRFS_DEV_EXTENT_KEY:
9997                 ret = check_dev_extent_item(fs_info, eb, slot);
9998                 err |= ret;
9999                 break;
10000         case BTRFS_EXTENT_ITEM_KEY:
10001         case BTRFS_METADATA_ITEM_KEY:
10002                 ret = check_extent_item(fs_info, eb, slot);
10003                 err |= ret;
10004                 break;
10005         case BTRFS_EXTENT_CSUM_KEY:
10006                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10007                 break;
10008         case BTRFS_TREE_BLOCK_REF_KEY:
10009                 ret = check_tree_block_backref(fs_info, key.offset,
10010                                                key.objectid, -1);
10011                 err |= ret;
10012                 break;
10013         case BTRFS_EXTENT_DATA_REF_KEY:
10014                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10015                 ret = check_extent_data_backref(fs_info,
10016                                 btrfs_extent_data_ref_root(eb, dref),
10017                                 btrfs_extent_data_ref_objectid(eb, dref),
10018                                 btrfs_extent_data_ref_offset(eb, dref),
10019                                 key.objectid, 0,
10020                                 btrfs_extent_data_ref_count(eb, dref));
10021                 err |= ret;
10022                 break;
10023         case BTRFS_SHARED_BLOCK_REF_KEY:
10024                 ret = check_shared_block_backref(fs_info, key.offset,
10025                                                  key.objectid, -1);
10026                 err |= ret;
10027                 break;
10028         case BTRFS_SHARED_DATA_REF_KEY:
10029                 ret = check_shared_data_backref(fs_info, key.offset,
10030                                                 key.objectid);
10031                 err |= ret;
10032                 break;
10033         default:
10034                 break;
10035         }
10036
10037         if (++slot < btrfs_header_nritems(eb))
10038                 goto next;
10039
10040         return err;
10041 }
10042
10043 /*
10044  * Helper function for later fs/subvol tree check.  To determine if a tree
10045  * block should be checked.
10046  * This function will ensure only the direct referencer with lowest rootid to
10047  * check a fs/subvolume tree block.
10048  *
10049  * Backref check at extent tree would detect errors like missing subvolume
10050  * tree, so we can do aggressive check to reduce duplicated checks.
10051  */
10052 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10053 {
10054         struct btrfs_root *extent_root = root->fs_info->extent_root;
10055         struct btrfs_key key;
10056         struct btrfs_path path;
10057         struct extent_buffer *leaf;
10058         int slot;
10059         struct btrfs_extent_item *ei;
10060         unsigned long ptr;
10061         unsigned long end;
10062         int type;
10063         u32 item_size;
10064         u64 offset;
10065         struct btrfs_extent_inline_ref *iref;
10066         int ret;
10067
10068         btrfs_init_path(&path);
10069         key.objectid = btrfs_header_bytenr(eb);
10070         key.type = BTRFS_METADATA_ITEM_KEY;
10071         key.offset = (u64)-1;
10072
10073         /*
10074          * Any failure in backref resolving means we can't determine
10075          * whom the tree block belongs to.
10076          * So in that case, we need to check that tree block
10077          */
10078         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10079         if (ret < 0)
10080                 goto need_check;
10081
10082         ret = btrfs_previous_extent_item(extent_root, &path,
10083                                          btrfs_header_bytenr(eb));
10084         if (ret)
10085                 goto need_check;
10086
10087         leaf = path.nodes[0];
10088         slot = path.slots[0];
10089         btrfs_item_key_to_cpu(leaf, &key, slot);
10090         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10091
10092         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10093                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10094         } else {
10095                 struct btrfs_tree_block_info *info;
10096
10097                 info = (struct btrfs_tree_block_info *)(ei + 1);
10098                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10099         }
10100
10101         item_size = btrfs_item_size_nr(leaf, slot);
10102         ptr = (unsigned long)iref;
10103         end = (unsigned long)ei + item_size;
10104         while (ptr < end) {
10105                 iref = (struct btrfs_extent_inline_ref *)ptr;
10106                 type = btrfs_extent_inline_ref_type(leaf, iref);
10107                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10108
10109                 /*
10110                  * We only check the tree block if current root is
10111                  * the lowest referencer of it.
10112                  */
10113                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10114                     offset < root->objectid) {
10115                         btrfs_release_path(&path);
10116                         return 0;
10117                 }
10118
10119                 ptr += btrfs_extent_inline_ref_size(type);
10120         }
10121         /*
10122          * Normally we should also check keyed tree block ref, but that may be
10123          * very time consuming.  Inlined ref should already make us skip a lot
10124          * of refs now.  So skip search keyed tree block ref.
10125          */
10126
10127 need_check:
10128         btrfs_release_path(&path);
10129         return 1;
10130 }
10131
10132 /*
10133  * Traversal function for tree block. We will do:
10134  * 1) Skip shared fs/subvolume tree blocks
10135  * 2) Update related bytes accounting
10136  * 3) Pre-order traversal
10137  */
10138 static int traverse_tree_block(struct btrfs_root *root,
10139                                 struct extent_buffer *node)
10140 {
10141         struct extent_buffer *eb;
10142         struct btrfs_key key;
10143         struct btrfs_key drop_key;
10144         int level;
10145         u64 nr;
10146         int i;
10147         int err = 0;
10148         int ret;
10149
10150         /*
10151          * Skip shared fs/subvolume tree block, in that case they will
10152          * be checked by referencer with lowest rootid
10153          */
10154         if (is_fstree(root->objectid) && !should_check(root, node))
10155                 return 0;
10156
10157         /* Update bytes accounting */
10158         total_btree_bytes += node->len;
10159         if (fs_root_objectid(btrfs_header_owner(node)))
10160                 total_fs_tree_bytes += node->len;
10161         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10162                 total_extent_tree_bytes += node->len;
10163         if (!found_old_backref &&
10164             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10165             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10166             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10167                 found_old_backref = 1;
10168
10169         /* pre-order tranversal, check itself first */
10170         level = btrfs_header_level(node);
10171         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10172                                    btrfs_header_level(node),
10173                                    btrfs_header_owner(node));
10174         err |= ret;
10175         if (err)
10176                 error(
10177         "check %s failed root %llu bytenr %llu level %d, force continue check",
10178                         level ? "node":"leaf", root->objectid,
10179                         btrfs_header_bytenr(node), btrfs_header_level(node));
10180
10181         if (!level) {
10182                 btree_space_waste += btrfs_leaf_free_space(root, node);
10183                 ret = check_leaf_items(root, node);
10184                 err |= ret;
10185                 return err;
10186         }
10187
10188         nr = btrfs_header_nritems(node);
10189         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10190         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10191                 sizeof(struct btrfs_key_ptr);
10192
10193         /* Then check all its children */
10194         for (i = 0; i < nr; i++) {
10195                 u64 blocknr = btrfs_node_blockptr(node, i);
10196
10197                 btrfs_node_key_to_cpu(node, &key, i);
10198                 if (level == root->root_item.drop_level &&
10199                     is_dropped_key(&key, &drop_key))
10200                         continue;
10201
10202                 /*
10203                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10204                  * to call the function itself.
10205                  */
10206                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10207                 if (extent_buffer_uptodate(eb)) {
10208                         ret = traverse_tree_block(root, eb);
10209                         err |= ret;
10210                 }
10211                 free_extent_buffer(eb);
10212         }
10213
10214         return err;
10215 }
10216
10217 /*
10218  * Low memory usage version check_chunks_and_extents.
10219  */
10220 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10221 {
10222         struct btrfs_path path;
10223         struct btrfs_key key;
10224         struct btrfs_root *root1;
10225         struct btrfs_root *cur_root;
10226         int err = 0;
10227         int ret;
10228
10229         root1 = root->fs_info->chunk_root;
10230         ret = traverse_tree_block(root1, root1->node);
10231         err |= ret;
10232
10233         root1 = root->fs_info->tree_root;
10234         ret = traverse_tree_block(root1, root1->node);
10235         err |= ret;
10236
10237         btrfs_init_path(&path);
10238         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10239         key.offset = 0;
10240         key.type = BTRFS_ROOT_ITEM_KEY;
10241
10242         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10243         if (ret) {
10244                 error("cannot find extent treet in tree_root");
10245                 goto out;
10246         }
10247
10248         while (1) {
10249                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10250                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10251                         goto next;
10252                 key.offset = (u64)-1;
10253
10254                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10255                 if (IS_ERR(cur_root) || !cur_root) {
10256                         error("failed to read tree: %lld", key.objectid);
10257                         goto next;
10258                 }
10259
10260                 ret = traverse_tree_block(cur_root, cur_root->node);
10261                 err |= ret;
10262
10263 next:
10264                 ret = btrfs_next_item(root1, &path);
10265                 if (ret)
10266                         goto out;
10267         }
10268
10269 out:
10270         btrfs_release_path(&path);
10271         return err;
10272 }
10273
10274 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10275                            struct btrfs_root *root, int overwrite)
10276 {
10277         struct extent_buffer *c;
10278         struct extent_buffer *old = root->node;
10279         int level;
10280         int ret;
10281         struct btrfs_disk_key disk_key = {0,0,0};
10282
10283         level = 0;
10284
10285         if (overwrite) {
10286                 c = old;
10287                 extent_buffer_get(c);
10288                 goto init;
10289         }
10290         c = btrfs_alloc_free_block(trans, root,
10291                                    root->nodesize,
10292                                    root->root_key.objectid,
10293                                    &disk_key, level, 0, 0);
10294         if (IS_ERR(c)) {
10295                 c = old;
10296                 extent_buffer_get(c);
10297                 overwrite = 1;
10298         }
10299 init:
10300         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10301         btrfs_set_header_level(c, level);
10302         btrfs_set_header_bytenr(c, c->start);
10303         btrfs_set_header_generation(c, trans->transid);
10304         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10305         btrfs_set_header_owner(c, root->root_key.objectid);
10306
10307         write_extent_buffer(c, root->fs_info->fsid,
10308                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10309
10310         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10311                             btrfs_header_chunk_tree_uuid(c),
10312                             BTRFS_UUID_SIZE);
10313
10314         btrfs_mark_buffer_dirty(c);
10315         /*
10316          * this case can happen in the following case:
10317          *
10318          * 1.overwrite previous root.
10319          *
10320          * 2.reinit reloc data root, this is because we skip pin
10321          * down reloc data tree before which means we can allocate
10322          * same block bytenr here.
10323          */
10324         if (old->start == c->start) {
10325                 btrfs_set_root_generation(&root->root_item,
10326                                           trans->transid);
10327                 root->root_item.level = btrfs_header_level(root->node);
10328                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10329                                         &root->root_key, &root->root_item);
10330                 if (ret) {
10331                         free_extent_buffer(c);
10332                         return ret;
10333                 }
10334         }
10335         free_extent_buffer(old);
10336         root->node = c;
10337         add_root_to_dirty_list(root);
10338         return 0;
10339 }
10340
10341 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10342                                 struct extent_buffer *eb, int tree_root)
10343 {
10344         struct extent_buffer *tmp;
10345         struct btrfs_root_item *ri;
10346         struct btrfs_key key;
10347         u64 bytenr;
10348         u32 nodesize;
10349         int level = btrfs_header_level(eb);
10350         int nritems;
10351         int ret;
10352         int i;
10353
10354         /*
10355          * If we have pinned this block before, don't pin it again.
10356          * This can not only avoid forever loop with broken filesystem
10357          * but also give us some speedups.
10358          */
10359         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10360                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10361                 return 0;
10362
10363         btrfs_pin_extent(fs_info, eb->start, eb->len);
10364
10365         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10366         nritems = btrfs_header_nritems(eb);
10367         for (i = 0; i < nritems; i++) {
10368                 if (level == 0) {
10369                         btrfs_item_key_to_cpu(eb, &key, i);
10370                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10371                                 continue;
10372                         /* Skip the extent root and reloc roots */
10373                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10374                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10375                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10376                                 continue;
10377                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10378                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10379
10380                         /*
10381                          * If at any point we start needing the real root we
10382                          * will have to build a stump root for the root we are
10383                          * in, but for now this doesn't actually use the root so
10384                          * just pass in extent_root.
10385                          */
10386                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10387                                               nodesize, 0);
10388                         if (!extent_buffer_uptodate(tmp)) {
10389                                 fprintf(stderr, "Error reading root block\n");
10390                                 return -EIO;
10391                         }
10392                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10393                         free_extent_buffer(tmp);
10394                         if (ret)
10395                                 return ret;
10396                 } else {
10397                         bytenr = btrfs_node_blockptr(eb, i);
10398
10399                         /* If we aren't the tree root don't read the block */
10400                         if (level == 1 && !tree_root) {
10401                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10402                                 continue;
10403                         }
10404
10405                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10406                                               nodesize, 0);
10407                         if (!extent_buffer_uptodate(tmp)) {
10408                                 fprintf(stderr, "Error reading tree block\n");
10409                                 return -EIO;
10410                         }
10411                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10412                         free_extent_buffer(tmp);
10413                         if (ret)
10414                                 return ret;
10415                 }
10416         }
10417
10418         return 0;
10419 }
10420
10421 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10422 {
10423         int ret;
10424
10425         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10426         if (ret)
10427                 return ret;
10428
10429         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10430 }
10431
10432 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10433 {
10434         struct btrfs_block_group_cache *cache;
10435         struct btrfs_path path;
10436         struct extent_buffer *leaf;
10437         struct btrfs_chunk *chunk;
10438         struct btrfs_key key;
10439         int ret;
10440         u64 start;
10441
10442         btrfs_init_path(&path);
10443         key.objectid = 0;
10444         key.type = BTRFS_CHUNK_ITEM_KEY;
10445         key.offset = 0;
10446         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10447         if (ret < 0) {
10448                 btrfs_release_path(&path);
10449                 return ret;
10450         }
10451
10452         /*
10453          * We do this in case the block groups were screwed up and had alloc
10454          * bits that aren't actually set on the chunks.  This happens with
10455          * restored images every time and could happen in real life I guess.
10456          */
10457         fs_info->avail_data_alloc_bits = 0;
10458         fs_info->avail_metadata_alloc_bits = 0;
10459         fs_info->avail_system_alloc_bits = 0;
10460
10461         /* First we need to create the in-memory block groups */
10462         while (1) {
10463                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10464                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10465                         if (ret < 0) {
10466                                 btrfs_release_path(&path);
10467                                 return ret;
10468                         }
10469                         if (ret) {
10470                                 ret = 0;
10471                                 break;
10472                         }
10473                 }
10474                 leaf = path.nodes[0];
10475                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10476                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10477                         path.slots[0]++;
10478                         continue;
10479                 }
10480
10481                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10482                 btrfs_add_block_group(fs_info, 0,
10483                                       btrfs_chunk_type(leaf, chunk),
10484                                       key.objectid, key.offset,
10485                                       btrfs_chunk_length(leaf, chunk));
10486                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10487                                  key.offset + btrfs_chunk_length(leaf, chunk),
10488                                  GFP_NOFS);
10489                 path.slots[0]++;
10490         }
10491         start = 0;
10492         while (1) {
10493                 cache = btrfs_lookup_first_block_group(fs_info, start);
10494                 if (!cache)
10495                         break;
10496                 cache->cached = 1;
10497                 start = cache->key.objectid + cache->key.offset;
10498         }
10499
10500         btrfs_release_path(&path);
10501         return 0;
10502 }
10503
10504 static int reset_balance(struct btrfs_trans_handle *trans,
10505                          struct btrfs_fs_info *fs_info)
10506 {
10507         struct btrfs_root *root = fs_info->tree_root;
10508         struct btrfs_path path;
10509         struct extent_buffer *leaf;
10510         struct btrfs_key key;
10511         int del_slot, del_nr = 0;
10512         int ret;
10513         int found = 0;
10514
10515         btrfs_init_path(&path);
10516         key.objectid = BTRFS_BALANCE_OBJECTID;
10517         key.type = BTRFS_BALANCE_ITEM_KEY;
10518         key.offset = 0;
10519         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10520         if (ret) {
10521                 if (ret > 0)
10522                         ret = 0;
10523                 if (!ret)
10524                         goto reinit_data_reloc;
10525                 else
10526                         goto out;
10527         }
10528
10529         ret = btrfs_del_item(trans, root, &path);
10530         if (ret)
10531                 goto out;
10532         btrfs_release_path(&path);
10533
10534         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10535         key.type = BTRFS_ROOT_ITEM_KEY;
10536         key.offset = 0;
10537         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10538         if (ret < 0)
10539                 goto out;
10540         while (1) {
10541                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10542                         if (!found)
10543                                 break;
10544
10545                         if (del_nr) {
10546                                 ret = btrfs_del_items(trans, root, &path,
10547                                                       del_slot, del_nr);
10548                                 del_nr = 0;
10549                                 if (ret)
10550                                         goto out;
10551                         }
10552                         key.offset++;
10553                         btrfs_release_path(&path);
10554
10555                         found = 0;
10556                         ret = btrfs_search_slot(trans, root, &key, &path,
10557                                                 -1, 1);
10558                         if (ret < 0)
10559                                 goto out;
10560                         continue;
10561                 }
10562                 found = 1;
10563                 leaf = path.nodes[0];
10564                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10565                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10566                         break;
10567                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10568                         path.slots[0]++;
10569                         continue;
10570                 }
10571                 if (!del_nr) {
10572                         del_slot = path.slots[0];
10573                         del_nr = 1;
10574                 } else {
10575                         del_nr++;
10576                 }
10577                 path.slots[0]++;
10578         }
10579
10580         if (del_nr) {
10581                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10582                 if (ret)
10583                         goto out;
10584         }
10585         btrfs_release_path(&path);
10586
10587 reinit_data_reloc:
10588         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10589         key.type = BTRFS_ROOT_ITEM_KEY;
10590         key.offset = (u64)-1;
10591         root = btrfs_read_fs_root(fs_info, &key);
10592         if (IS_ERR(root)) {
10593                 fprintf(stderr, "Error reading data reloc tree\n");
10594                 ret = PTR_ERR(root);
10595                 goto out;
10596         }
10597         record_root_in_trans(trans, root);
10598         ret = btrfs_fsck_reinit_root(trans, root, 0);
10599         if (ret)
10600                 goto out;
10601         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10602 out:
10603         btrfs_release_path(&path);
10604         return ret;
10605 }
10606
10607 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10608                               struct btrfs_fs_info *fs_info)
10609 {
10610         u64 start = 0;
10611         int ret;
10612
10613         /*
10614          * The only reason we don't do this is because right now we're just
10615          * walking the trees we find and pinning down their bytes, we don't look
10616          * at any of the leaves.  In order to do mixed groups we'd have to check
10617          * the leaves of any fs roots and pin down the bytes for any file
10618          * extents we find.  Not hard but why do it if we don't have to?
10619          */
10620         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10621                 fprintf(stderr, "We don't support re-initing the extent tree "
10622                         "for mixed block groups yet, please notify a btrfs "
10623                         "developer you want to do this so they can add this "
10624                         "functionality.\n");
10625                 return -EINVAL;
10626         }
10627
10628         /*
10629          * first we need to walk all of the trees except the extent tree and pin
10630          * down the bytes that are in use so we don't overwrite any existing
10631          * metadata.
10632          */
10633         ret = pin_metadata_blocks(fs_info);
10634         if (ret) {
10635                 fprintf(stderr, "error pinning down used bytes\n");
10636                 return ret;
10637         }
10638
10639         /*
10640          * Need to drop all the block groups since we're going to recreate all
10641          * of them again.
10642          */
10643         btrfs_free_block_groups(fs_info);
10644         ret = reset_block_groups(fs_info);
10645         if (ret) {
10646                 fprintf(stderr, "error resetting the block groups\n");
10647                 return ret;
10648         }
10649
10650         /* Ok we can allocate now, reinit the extent root */
10651         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10652         if (ret) {
10653                 fprintf(stderr, "extent root initialization failed\n");
10654                 /*
10655                  * When the transaction code is updated we should end the
10656                  * transaction, but for now progs only knows about commit so
10657                  * just return an error.
10658                  */
10659                 return ret;
10660         }
10661
10662         /*
10663          * Now we have all the in-memory block groups setup so we can make
10664          * allocations properly, and the metadata we care about is safe since we
10665          * pinned all of it above.
10666          */
10667         while (1) {
10668                 struct btrfs_block_group_cache *cache;
10669
10670                 cache = btrfs_lookup_first_block_group(fs_info, start);
10671                 if (!cache)
10672                         break;
10673                 start = cache->key.objectid + cache->key.offset;
10674                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10675                                         &cache->key, &cache->item,
10676                                         sizeof(cache->item));
10677                 if (ret) {
10678                         fprintf(stderr, "Error adding block group\n");
10679                         return ret;
10680                 }
10681                 btrfs_extent_post_op(trans, fs_info->extent_root);
10682         }
10683
10684         ret = reset_balance(trans, fs_info);
10685         if (ret)
10686                 fprintf(stderr, "error resetting the pending balance\n");
10687
10688         return ret;
10689 }
10690
10691 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10692 {
10693         struct btrfs_path path;
10694         struct btrfs_trans_handle *trans;
10695         struct btrfs_key key;
10696         int ret;
10697
10698         printf("Recowing metadata block %llu\n", eb->start);
10699         key.objectid = btrfs_header_owner(eb);
10700         key.type = BTRFS_ROOT_ITEM_KEY;
10701         key.offset = (u64)-1;
10702
10703         root = btrfs_read_fs_root(root->fs_info, &key);
10704         if (IS_ERR(root)) {
10705                 fprintf(stderr, "Couldn't find owner root %llu\n",
10706                         key.objectid);
10707                 return PTR_ERR(root);
10708         }
10709
10710         trans = btrfs_start_transaction(root, 1);
10711         if (IS_ERR(trans))
10712                 return PTR_ERR(trans);
10713
10714         btrfs_init_path(&path);
10715         path.lowest_level = btrfs_header_level(eb);
10716         if (path.lowest_level)
10717                 btrfs_node_key_to_cpu(eb, &key, 0);
10718         else
10719                 btrfs_item_key_to_cpu(eb, &key, 0);
10720
10721         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10722         btrfs_commit_transaction(trans, root);
10723         btrfs_release_path(&path);
10724         return ret;
10725 }
10726
10727 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10728 {
10729         struct btrfs_path path;
10730         struct btrfs_trans_handle *trans;
10731         struct btrfs_key key;
10732         int ret;
10733
10734         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10735                bad->key.type, bad->key.offset);
10736         key.objectid = bad->root_id;
10737         key.type = BTRFS_ROOT_ITEM_KEY;
10738         key.offset = (u64)-1;
10739
10740         root = btrfs_read_fs_root(root->fs_info, &key);
10741         if (IS_ERR(root)) {
10742                 fprintf(stderr, "Couldn't find owner root %llu\n",
10743                         key.objectid);
10744                 return PTR_ERR(root);
10745         }
10746
10747         trans = btrfs_start_transaction(root, 1);
10748         if (IS_ERR(trans))
10749                 return PTR_ERR(trans);
10750
10751         btrfs_init_path(&path);
10752         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10753         if (ret) {
10754                 if (ret > 0)
10755                         ret = 0;
10756                 goto out;
10757         }
10758         ret = btrfs_del_item(trans, root, &path);
10759 out:
10760         btrfs_commit_transaction(trans, root);
10761         btrfs_release_path(&path);
10762         return ret;
10763 }
10764
10765 static int zero_log_tree(struct btrfs_root *root)
10766 {
10767         struct btrfs_trans_handle *trans;
10768         int ret;
10769
10770         trans = btrfs_start_transaction(root, 1);
10771         if (IS_ERR(trans)) {
10772                 ret = PTR_ERR(trans);
10773                 return ret;
10774         }
10775         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10776         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10777         ret = btrfs_commit_transaction(trans, root);
10778         return ret;
10779 }
10780
10781 static int populate_csum(struct btrfs_trans_handle *trans,
10782                          struct btrfs_root *csum_root, char *buf, u64 start,
10783                          u64 len)
10784 {
10785         u64 offset = 0;
10786         u64 sectorsize;
10787         int ret = 0;
10788
10789         while (offset < len) {
10790                 sectorsize = csum_root->sectorsize;
10791                 ret = read_extent_data(csum_root, buf, start + offset,
10792                                        &sectorsize, 0);
10793                 if (ret)
10794                         break;
10795                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10796                                             start + offset, buf, sectorsize);
10797                 if (ret)
10798                         break;
10799                 offset += sectorsize;
10800         }
10801         return ret;
10802 }
10803
10804 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10805                                       struct btrfs_root *csum_root,
10806                                       struct btrfs_root *cur_root)
10807 {
10808         struct btrfs_path path;
10809         struct btrfs_key key;
10810         struct extent_buffer *node;
10811         struct btrfs_file_extent_item *fi;
10812         char *buf = NULL;
10813         u64 start = 0;
10814         u64 len = 0;
10815         int slot = 0;
10816         int ret = 0;
10817
10818         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10819         if (!buf)
10820                 return -ENOMEM;
10821
10822         btrfs_init_path(&path);
10823         key.objectid = 0;
10824         key.offset = 0;
10825         key.type = 0;
10826         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
10827         if (ret < 0)
10828                 goto out;
10829         /* Iterate all regular file extents and fill its csum */
10830         while (1) {
10831                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10832
10833                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10834                         goto next;
10835                 node = path.nodes[0];
10836                 slot = path.slots[0];
10837                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10838                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10839                         goto next;
10840                 start = btrfs_file_extent_disk_bytenr(node, fi);
10841                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10842
10843                 ret = populate_csum(trans, csum_root, buf, start, len);
10844                 if (ret == -EEXIST)
10845                         ret = 0;
10846                 if (ret < 0)
10847                         goto out;
10848 next:
10849                 /*
10850                  * TODO: if next leaf is corrupted, jump to nearest next valid
10851                  * leaf.
10852                  */
10853                 ret = btrfs_next_item(cur_root, &path);
10854                 if (ret < 0)
10855                         goto out;
10856                 if (ret > 0) {
10857                         ret = 0;
10858                         goto out;
10859                 }
10860         }
10861
10862 out:
10863         btrfs_release_path(&path);
10864         free(buf);
10865         return ret;
10866 }
10867
10868 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10869                                   struct btrfs_root *csum_root)
10870 {
10871         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10872         struct btrfs_path path;
10873         struct btrfs_root *tree_root = fs_info->tree_root;
10874         struct btrfs_root *cur_root;
10875         struct extent_buffer *node;
10876         struct btrfs_key key;
10877         int slot = 0;
10878         int ret = 0;
10879
10880         btrfs_init_path(&path);
10881         key.objectid = BTRFS_FS_TREE_OBJECTID;
10882         key.offset = 0;
10883         key.type = BTRFS_ROOT_ITEM_KEY;
10884         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
10885         if (ret < 0)
10886                 goto out;
10887         if (ret > 0) {
10888                 ret = -ENOENT;
10889                 goto out;
10890         }
10891
10892         while (1) {
10893                 node = path.nodes[0];
10894                 slot = path.slots[0];
10895                 btrfs_item_key_to_cpu(node, &key, slot);
10896                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10897                         goto out;
10898                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10899                         goto next;
10900                 if (!is_fstree(key.objectid))
10901                         goto next;
10902                 key.offset = (u64)-1;
10903
10904                 cur_root = btrfs_read_fs_root(fs_info, &key);
10905                 if (IS_ERR(cur_root) || !cur_root) {
10906                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10907                                 key.objectid);
10908                         goto out;
10909                 }
10910                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10911                                 cur_root);
10912                 if (ret < 0)
10913                         goto out;
10914 next:
10915                 ret = btrfs_next_item(tree_root, &path);
10916                 if (ret > 0) {
10917                         ret = 0;
10918                         goto out;
10919                 }
10920                 if (ret < 0)
10921                         goto out;
10922         }
10923
10924 out:
10925         btrfs_release_path(&path);
10926         return ret;
10927 }
10928
10929 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10930                                       struct btrfs_root *csum_root)
10931 {
10932         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10933         struct btrfs_path path;
10934         struct btrfs_extent_item *ei;
10935         struct extent_buffer *leaf;
10936         char *buf;
10937         struct btrfs_key key;
10938         int ret;
10939
10940         btrfs_init_path(&path);
10941         key.objectid = 0;
10942         key.type = BTRFS_EXTENT_ITEM_KEY;
10943         key.offset = 0;
10944         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10945         if (ret < 0) {
10946                 btrfs_release_path(&path);
10947                 return ret;
10948         }
10949
10950         buf = malloc(csum_root->sectorsize);
10951         if (!buf) {
10952                 btrfs_release_path(&path);
10953                 return -ENOMEM;
10954         }
10955
10956         while (1) {
10957                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10958                         ret = btrfs_next_leaf(extent_root, &path);
10959                         if (ret < 0)
10960                                 break;
10961                         if (ret) {
10962                                 ret = 0;
10963                                 break;
10964                         }
10965                 }
10966                 leaf = path.nodes[0];
10967
10968                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10969                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10970                         path.slots[0]++;
10971                         continue;
10972                 }
10973
10974                 ei = btrfs_item_ptr(leaf, path.slots[0],
10975                                     struct btrfs_extent_item);
10976                 if (!(btrfs_extent_flags(leaf, ei) &
10977                       BTRFS_EXTENT_FLAG_DATA)) {
10978                         path.slots[0]++;
10979                         continue;
10980                 }
10981
10982                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10983                                     key.offset);
10984                 if (ret)
10985                         break;
10986                 path.slots[0]++;
10987         }
10988
10989         btrfs_release_path(&path);
10990         free(buf);
10991         return ret;
10992 }
10993
10994 /*
10995  * Recalculate the csum and put it into the csum tree.
10996  *
10997  * Extent tree init will wipe out all the extent info, so in that case, we
10998  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10999  * will use fs/subvol trees to init the csum tree.
11000  */
11001 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11002                           struct btrfs_root *csum_root,
11003                           int search_fs_tree)
11004 {
11005         if (search_fs_tree)
11006                 return fill_csum_tree_from_fs(trans, csum_root);
11007         else
11008                 return fill_csum_tree_from_extent(trans, csum_root);
11009 }
11010
11011 static void free_roots_info_cache(void)
11012 {
11013         if (!roots_info_cache)
11014                 return;
11015
11016         while (!cache_tree_empty(roots_info_cache)) {
11017                 struct cache_extent *entry;
11018                 struct root_item_info *rii;
11019
11020                 entry = first_cache_extent(roots_info_cache);
11021                 if (!entry)
11022                         break;
11023                 remove_cache_extent(roots_info_cache, entry);
11024                 rii = container_of(entry, struct root_item_info, cache_extent);
11025                 free(rii);
11026         }
11027
11028         free(roots_info_cache);
11029         roots_info_cache = NULL;
11030 }
11031
11032 static int build_roots_info_cache(struct btrfs_fs_info *info)
11033 {
11034         int ret = 0;
11035         struct btrfs_key key;
11036         struct extent_buffer *leaf;
11037         struct btrfs_path path;
11038
11039         if (!roots_info_cache) {
11040                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11041                 if (!roots_info_cache)
11042                         return -ENOMEM;
11043                 cache_tree_init(roots_info_cache);
11044         }
11045
11046         btrfs_init_path(&path);
11047         key.objectid = 0;
11048         key.type = BTRFS_EXTENT_ITEM_KEY;
11049         key.offset = 0;
11050         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11051         if (ret < 0)
11052                 goto out;
11053         leaf = path.nodes[0];
11054
11055         while (1) {
11056                 struct btrfs_key found_key;
11057                 struct btrfs_extent_item *ei;
11058                 struct btrfs_extent_inline_ref *iref;
11059                 int slot = path.slots[0];
11060                 int type;
11061                 u64 flags;
11062                 u64 root_id;
11063                 u8 level;
11064                 struct cache_extent *entry;
11065                 struct root_item_info *rii;
11066
11067                 if (slot >= btrfs_header_nritems(leaf)) {
11068                         ret = btrfs_next_leaf(info->extent_root, &path);
11069                         if (ret < 0) {
11070                                 break;
11071                         } else if (ret) {
11072                                 ret = 0;
11073                                 break;
11074                         }
11075                         leaf = path.nodes[0];
11076                         slot = path.slots[0];
11077                 }
11078
11079                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11080
11081                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11082                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11083                         goto next;
11084
11085                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11086                 flags = btrfs_extent_flags(leaf, ei);
11087
11088                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11089                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11090                         goto next;
11091
11092                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11093                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11094                         level = found_key.offset;
11095                 } else {
11096                         struct btrfs_tree_block_info *binfo;
11097
11098                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11099                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11100                         level = btrfs_tree_block_level(leaf, binfo);
11101                 }
11102
11103                 /*
11104                  * For a root extent, it must be of the following type and the
11105                  * first (and only one) iref in the item.
11106                  */
11107                 type = btrfs_extent_inline_ref_type(leaf, iref);
11108                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11109                         goto next;
11110
11111                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11112                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11113                 if (!entry) {
11114                         rii = malloc(sizeof(struct root_item_info));
11115                         if (!rii) {
11116                                 ret = -ENOMEM;
11117                                 goto out;
11118                         }
11119                         rii->cache_extent.start = root_id;
11120                         rii->cache_extent.size = 1;
11121                         rii->level = (u8)-1;
11122                         entry = &rii->cache_extent;
11123                         ret = insert_cache_extent(roots_info_cache, entry);
11124                         ASSERT(ret == 0);
11125                 } else {
11126                         rii = container_of(entry, struct root_item_info,
11127                                            cache_extent);
11128                 }
11129
11130                 ASSERT(rii->cache_extent.start == root_id);
11131                 ASSERT(rii->cache_extent.size == 1);
11132
11133                 if (level > rii->level || rii->level == (u8)-1) {
11134                         rii->level = level;
11135                         rii->bytenr = found_key.objectid;
11136                         rii->gen = btrfs_extent_generation(leaf, ei);
11137                         rii->node_count = 1;
11138                 } else if (level == rii->level) {
11139                         rii->node_count++;
11140                 }
11141 next:
11142                 path.slots[0]++;
11143         }
11144
11145 out:
11146         btrfs_release_path(&path);
11147
11148         return ret;
11149 }
11150
11151 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11152                                   struct btrfs_path *path,
11153                                   const struct btrfs_key *root_key,
11154                                   const int read_only_mode)
11155 {
11156         const u64 root_id = root_key->objectid;
11157         struct cache_extent *entry;
11158         struct root_item_info *rii;
11159         struct btrfs_root_item ri;
11160         unsigned long offset;
11161
11162         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11163         if (!entry) {
11164                 fprintf(stderr,
11165                         "Error: could not find extent items for root %llu\n",
11166                         root_key->objectid);
11167                 return -ENOENT;
11168         }
11169
11170         rii = container_of(entry, struct root_item_info, cache_extent);
11171         ASSERT(rii->cache_extent.start == root_id);
11172         ASSERT(rii->cache_extent.size == 1);
11173
11174         if (rii->node_count != 1) {
11175                 fprintf(stderr,
11176                         "Error: could not find btree root extent for root %llu\n",
11177                         root_id);
11178                 return -ENOENT;
11179         }
11180
11181         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11182         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11183
11184         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11185             btrfs_root_level(&ri) != rii->level ||
11186             btrfs_root_generation(&ri) != rii->gen) {
11187
11188                 /*
11189                  * If we're in repair mode but our caller told us to not update
11190                  * the root item, i.e. just check if it needs to be updated, don't
11191                  * print this message, since the caller will call us again shortly
11192                  * for the same root item without read only mode (the caller will
11193                  * open a transaction first).
11194                  */
11195                 if (!(read_only_mode && repair))
11196                         fprintf(stderr,
11197                                 "%sroot item for root %llu,"
11198                                 " current bytenr %llu, current gen %llu, current level %u,"
11199                                 " new bytenr %llu, new gen %llu, new level %u\n",
11200                                 (read_only_mode ? "" : "fixing "),
11201                                 root_id,
11202                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11203                                 btrfs_root_level(&ri),
11204                                 rii->bytenr, rii->gen, rii->level);
11205
11206                 if (btrfs_root_generation(&ri) > rii->gen) {
11207                         fprintf(stderr,
11208                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11209                                 root_id, btrfs_root_generation(&ri), rii->gen);
11210                         return -EINVAL;
11211                 }
11212
11213                 if (!read_only_mode) {
11214                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11215                         btrfs_set_root_level(&ri, rii->level);
11216                         btrfs_set_root_generation(&ri, rii->gen);
11217                         write_extent_buffer(path->nodes[0], &ri,
11218                                             offset, sizeof(ri));
11219                 }
11220
11221                 return 1;
11222         }
11223
11224         return 0;
11225 }
11226
11227 /*
11228  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11229  * caused read-only snapshots to be corrupted if they were created at a moment
11230  * when the source subvolume/snapshot had orphan items. The issue was that the
11231  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11232  * node instead of the post orphan cleanup root node.
11233  * So this function, and its callees, just detects and fixes those cases. Even
11234  * though the regression was for read-only snapshots, this function applies to
11235  * any snapshot/subvolume root.
11236  * This must be run before any other repair code - not doing it so, makes other
11237  * repair code delete or modify backrefs in the extent tree for example, which
11238  * will result in an inconsistent fs after repairing the root items.
11239  */
11240 static int repair_root_items(struct btrfs_fs_info *info)
11241 {
11242         struct btrfs_path path;
11243         struct btrfs_key key;
11244         struct extent_buffer *leaf;
11245         struct btrfs_trans_handle *trans = NULL;
11246         int ret = 0;
11247         int bad_roots = 0;
11248         int need_trans = 0;
11249
11250         btrfs_init_path(&path);
11251
11252         ret = build_roots_info_cache(info);
11253         if (ret)
11254                 goto out;
11255
11256         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11257         key.type = BTRFS_ROOT_ITEM_KEY;
11258         key.offset = 0;
11259
11260 again:
11261         /*
11262          * Avoid opening and committing transactions if a leaf doesn't have
11263          * any root items that need to be fixed, so that we avoid rotating
11264          * backup roots unnecessarily.
11265          */
11266         if (need_trans) {
11267                 trans = btrfs_start_transaction(info->tree_root, 1);
11268                 if (IS_ERR(trans)) {
11269                         ret = PTR_ERR(trans);
11270                         goto out;
11271                 }
11272         }
11273
11274         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11275                                 0, trans ? 1 : 0);
11276         if (ret < 0)
11277                 goto out;
11278         leaf = path.nodes[0];
11279
11280         while (1) {
11281                 struct btrfs_key found_key;
11282
11283                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11284                         int no_more_keys = find_next_key(&path, &key);
11285
11286                         btrfs_release_path(&path);
11287                         if (trans) {
11288                                 ret = btrfs_commit_transaction(trans,
11289                                                                info->tree_root);
11290                                 trans = NULL;
11291                                 if (ret < 0)
11292                                         goto out;
11293                         }
11294                         need_trans = 0;
11295                         if (no_more_keys)
11296                                 break;
11297                         goto again;
11298                 }
11299
11300                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11301
11302                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11303                         goto next;
11304                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11305                         goto next;
11306
11307                 ret = maybe_repair_root_item(info, &path, &found_key,
11308                                              trans ? 0 : 1);
11309                 if (ret < 0)
11310                         goto out;
11311                 if (ret) {
11312                         if (!trans && repair) {
11313                                 need_trans = 1;
11314                                 key = found_key;
11315                                 btrfs_release_path(&path);
11316                                 goto again;
11317                         }
11318                         bad_roots++;
11319                 }
11320 next:
11321                 path.slots[0]++;
11322         }
11323         ret = 0;
11324 out:
11325         free_roots_info_cache();
11326         btrfs_release_path(&path);
11327         if (trans)
11328                 btrfs_commit_transaction(trans, info->tree_root);
11329         if (ret < 0)
11330                 return ret;
11331
11332         return bad_roots;
11333 }
11334
11335 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11336 {
11337         struct btrfs_trans_handle *trans;
11338         struct btrfs_block_group_cache *bg_cache;
11339         u64 current = 0;
11340         int ret = 0;
11341
11342         /* Clear all free space cache inodes and its extent data */
11343         while (1) {
11344                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11345                 if (!bg_cache)
11346                         break;
11347                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11348                 if (ret < 0)
11349                         return ret;
11350                 current = bg_cache->key.objectid + bg_cache->key.offset;
11351         }
11352
11353         /* Don't forget to set cache_generation to -1 */
11354         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11355         if (IS_ERR(trans)) {
11356                 error("failed to update super block cache generation");
11357                 return PTR_ERR(trans);
11358         }
11359         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11360         btrfs_commit_transaction(trans, fs_info->tree_root);
11361
11362         return ret;
11363 }
11364
11365 const char * const cmd_check_usage[] = {
11366         "btrfs check [options] <device>",
11367         "Check structural integrity of a filesystem (unmounted).",
11368         "Check structural integrity of an unmounted filesystem. Verify internal",
11369         "trees' consistency and item connectivity. In the repair mode try to",
11370         "fix the problems found. ",
11371         "WARNING: the repair mode is considered dangerous",
11372         "",
11373         "-s|--super <superblock>     use this superblock copy",
11374         "-b|--backup                 use the first valid backup root copy",
11375         "--repair                    try to repair the filesystem",
11376         "--readonly                  run in read-only mode (default)",
11377         "--init-csum-tree            create a new CRC tree",
11378         "--init-extent-tree          create a new extent tree",
11379         "--mode <MODE>               allows choice of memory/IO trade-offs",
11380         "                            where MODE is one of:",
11381         "                            original - read inodes and extents to memory (requires",
11382         "                                       more memory, does less IO)",
11383         "                            lowmem   - try to use less memory but read blocks again",
11384         "                                       when needed",
11385         "--check-data-csum           verify checksums of data blocks",
11386         "-Q|--qgroup-report          print a report on qgroup consistency",
11387         "-E|--subvol-extents <subvolid>",
11388         "                            print subvolume extents and sharing state",
11389         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11390         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11391         "-p|--progress               indicate progress",
11392         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11393         NULL
11394 };
11395
11396 int cmd_check(int argc, char **argv)
11397 {
11398         struct cache_tree root_cache;
11399         struct btrfs_root *root;
11400         struct btrfs_fs_info *info;
11401         u64 bytenr = 0;
11402         u64 subvolid = 0;
11403         u64 tree_root_bytenr = 0;
11404         u64 chunk_root_bytenr = 0;
11405         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11406         int ret;
11407         u64 num;
11408         int init_csum_tree = 0;
11409         int readonly = 0;
11410         int clear_space_cache = 0;
11411         int qgroup_report = 0;
11412         int qgroups_repaired = 0;
11413         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11414
11415         while(1) {
11416                 int c;
11417                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11418                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11419                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11420                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11421                 static const struct option long_options[] = {
11422                         { "super", required_argument, NULL, 's' },
11423                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11424                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11425                         { "init-csum-tree", no_argument, NULL,
11426                                 GETOPT_VAL_INIT_CSUM },
11427                         { "init-extent-tree", no_argument, NULL,
11428                                 GETOPT_VAL_INIT_EXTENT },
11429                         { "check-data-csum", no_argument, NULL,
11430                                 GETOPT_VAL_CHECK_CSUM },
11431                         { "backup", no_argument, NULL, 'b' },
11432                         { "subvol-extents", required_argument, NULL, 'E' },
11433                         { "qgroup-report", no_argument, NULL, 'Q' },
11434                         { "tree-root", required_argument, NULL, 'r' },
11435                         { "chunk-root", required_argument, NULL,
11436                                 GETOPT_VAL_CHUNK_TREE },
11437                         { "progress", no_argument, NULL, 'p' },
11438                         { "mode", required_argument, NULL,
11439                                 GETOPT_VAL_MODE },
11440                         { "clear-space-cache", required_argument, NULL,
11441                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11442                         { NULL, 0, NULL, 0}
11443                 };
11444
11445                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11446                 if (c < 0)
11447                         break;
11448                 switch(c) {
11449                         case 'a': /* ignored */ break;
11450                         case 'b':
11451                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11452                                 break;
11453                         case 's':
11454                                 num = arg_strtou64(optarg);
11455                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11456                                         error(
11457                                         "super mirror should be less than %d",
11458                                                 BTRFS_SUPER_MIRROR_MAX);
11459                                         exit(1);
11460                                 }
11461                                 bytenr = btrfs_sb_offset(((int)num));
11462                                 printf("using SB copy %llu, bytenr %llu\n", num,
11463                                        (unsigned long long)bytenr);
11464                                 break;
11465                         case 'Q':
11466                                 qgroup_report = 1;
11467                                 break;
11468                         case 'E':
11469                                 subvolid = arg_strtou64(optarg);
11470                                 break;
11471                         case 'r':
11472                                 tree_root_bytenr = arg_strtou64(optarg);
11473                                 break;
11474                         case GETOPT_VAL_CHUNK_TREE:
11475                                 chunk_root_bytenr = arg_strtou64(optarg);
11476                                 break;
11477                         case 'p':
11478                                 ctx.progress_enabled = true;
11479                                 break;
11480                         case '?':
11481                         case 'h':
11482                                 usage(cmd_check_usage);
11483                         case GETOPT_VAL_REPAIR:
11484                                 printf("enabling repair mode\n");
11485                                 repair = 1;
11486                                 ctree_flags |= OPEN_CTREE_WRITES;
11487                                 break;
11488                         case GETOPT_VAL_READONLY:
11489                                 readonly = 1;
11490                                 break;
11491                         case GETOPT_VAL_INIT_CSUM:
11492                                 printf("Creating a new CRC tree\n");
11493                                 init_csum_tree = 1;
11494                                 repair = 1;
11495                                 ctree_flags |= OPEN_CTREE_WRITES;
11496                                 break;
11497                         case GETOPT_VAL_INIT_EXTENT:
11498                                 init_extent_tree = 1;
11499                                 ctree_flags |= (OPEN_CTREE_WRITES |
11500                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11501                                 repair = 1;
11502                                 break;
11503                         case GETOPT_VAL_CHECK_CSUM:
11504                                 check_data_csum = 1;
11505                                 break;
11506                         case GETOPT_VAL_MODE:
11507                                 check_mode = parse_check_mode(optarg);
11508                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11509                                         error("unknown mode: %s", optarg);
11510                                         exit(1);
11511                                 }
11512                                 break;
11513                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11514                                 if (strcmp(optarg, "v1") == 0) {
11515                                         clear_space_cache = 1;
11516                                 } else if (strcmp(optarg, "v2") == 0) {
11517                                         clear_space_cache = 2;
11518                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11519                                 } else {
11520                                         error(
11521                 "invalid argument to --clear-space-cache, must be v1 or v2");
11522                                         exit(1);
11523                                 }
11524                                 ctree_flags |= OPEN_CTREE_WRITES;
11525                                 break;
11526                 }
11527         }
11528
11529         if (check_argc_exact(argc - optind, 1))
11530                 usage(cmd_check_usage);
11531
11532         if (ctx.progress_enabled) {
11533                 ctx.tp = TASK_NOTHING;
11534                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11535         }
11536
11537         /* This check is the only reason for --readonly to exist */
11538         if (readonly && repair) {
11539                 error("repair options are not compatible with --readonly");
11540                 exit(1);
11541         }
11542
11543         /*
11544          * Not supported yet
11545          */
11546         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11547                 error("low memory mode doesn't support repair yet");
11548                 exit(1);
11549         }
11550
11551         radix_tree_init();
11552         cache_tree_init(&root_cache);
11553
11554         if((ret = check_mounted(argv[optind])) < 0) {
11555                 error("could not check mount status: %s", strerror(-ret));
11556                 goto err_out;
11557         } else if(ret) {
11558                 error("%s is currently mounted, aborting", argv[optind]);
11559                 ret = -EBUSY;
11560                 goto err_out;
11561         }
11562
11563         /* only allow partial opening under repair mode */
11564         if (repair)
11565                 ctree_flags |= OPEN_CTREE_PARTIAL;
11566
11567         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11568                                   chunk_root_bytenr, ctree_flags);
11569         if (!info) {
11570                 error("cannot open file system");
11571                 ret = -EIO;
11572                 goto err_out;
11573         }
11574
11575         global_info = info;
11576         root = info->fs_root;
11577         if (clear_space_cache == 1) {
11578                 if (btrfs_fs_compat_ro(info,
11579                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11580                         error(
11581                 "free space cache v2 detected, use --clear-space-cache v2");
11582                         ret = 1;
11583                         goto close_out;
11584                 }
11585                 printf("Clearing free space cache\n");
11586                 ret = clear_free_space_cache(info);
11587                 if (ret) {
11588                         error("failed to clear free space cache");
11589                         ret = 1;
11590                 } else {
11591                         printf("Free space cache cleared\n");
11592                 }
11593                 goto close_out;
11594         } else if (clear_space_cache == 2) {
11595                 if (!btrfs_fs_compat_ro(info,
11596                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11597                         printf("no free space cache v2 to clear\n");
11598                         ret = 0;
11599                         goto close_out;
11600                 }
11601                 printf("Clear free space cache v2\n");
11602                 ret = btrfs_clear_free_space_tree(info);
11603                 if (ret) {
11604                         error("failed to clear free space cache v2: %d", ret);
11605                         ret = 1;
11606                 } else {
11607                         printf("free space cache v2 cleared\n");
11608                 }
11609                 goto close_out;
11610         }
11611
11612         /*
11613          * repair mode will force us to commit transaction which
11614          * will make us fail to load log tree when mounting.
11615          */
11616         if (repair && btrfs_super_log_root(info->super_copy)) {
11617                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11618                 if (!ret) {
11619                         ret = 1;
11620                         goto close_out;
11621                 }
11622                 ret = zero_log_tree(root);
11623                 if (ret) {
11624                         error("failed to zero log tree: %d", ret);
11625                         goto close_out;
11626                 }
11627         }
11628
11629         uuid_unparse(info->super_copy->fsid, uuidbuf);
11630         if (qgroup_report) {
11631                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11632                        uuidbuf);
11633                 ret = qgroup_verify_all(info);
11634                 if (ret == 0)
11635                         report_qgroups(1);
11636                 goto close_out;
11637         }
11638         if (subvolid) {
11639                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11640                        subvolid, argv[optind], uuidbuf);
11641                 ret = print_extent_state(info, subvolid);
11642                 goto close_out;
11643         }
11644         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11645
11646         if (!extent_buffer_uptodate(info->tree_root->node) ||
11647             !extent_buffer_uptodate(info->dev_root->node) ||
11648             !extent_buffer_uptodate(info->chunk_root->node)) {
11649                 error("critical roots corrupted, unable to check the filesystem");
11650                 ret = -EIO;
11651                 goto close_out;
11652         }
11653
11654         if (init_extent_tree || init_csum_tree) {
11655                 struct btrfs_trans_handle *trans;
11656
11657                 trans = btrfs_start_transaction(info->extent_root, 0);
11658                 if (IS_ERR(trans)) {
11659                         error("error starting transaction");
11660                         ret = PTR_ERR(trans);
11661                         goto close_out;
11662                 }
11663
11664                 if (init_extent_tree) {
11665                         printf("Creating a new extent tree\n");
11666                         ret = reinit_extent_tree(trans, info);
11667                         if (ret)
11668                                 goto close_out;
11669                 }
11670
11671                 if (init_csum_tree) {
11672                         printf("Reinitialize checksum tree\n");
11673                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11674                         if (ret) {
11675                                 error("checksum tree initialization failed: %d",
11676                                                 ret);
11677                                 ret = -EIO;
11678                                 goto close_out;
11679                         }
11680
11681                         ret = fill_csum_tree(trans, info->csum_root,
11682                                              init_extent_tree);
11683                         if (ret) {
11684                                 error("checksum tree refilling failed: %d", ret);
11685                                 return -EIO;
11686                         }
11687                 }
11688                 /*
11689                  * Ok now we commit and run the normal fsck, which will add
11690                  * extent entries for all of the items it finds.
11691                  */
11692                 ret = btrfs_commit_transaction(trans, info->extent_root);
11693                 if (ret)
11694                         goto close_out;
11695         }
11696         if (!extent_buffer_uptodate(info->extent_root->node)) {
11697                 error("critical: extent_root, unable to check the filesystem");
11698                 ret = -EIO;
11699                 goto close_out;
11700         }
11701         if (!extent_buffer_uptodate(info->csum_root->node)) {
11702                 error("critical: csum_root, unable to check the filesystem");
11703                 ret = -EIO;
11704                 goto close_out;
11705         }
11706
11707         if (!ctx.progress_enabled)
11708                 fprintf(stderr, "checking extents\n");
11709         if (check_mode == CHECK_MODE_LOWMEM)
11710                 ret = check_chunks_and_extents_v2(root);
11711         else
11712                 ret = check_chunks_and_extents(root);
11713         if (ret)
11714                 error(
11715                 "errors found in extent allocation tree or chunk allocation");
11716
11717         ret = repair_root_items(info);
11718         if (ret < 0)
11719                 goto close_out;
11720         if (repair) {
11721                 fprintf(stderr, "Fixed %d roots.\n", ret);
11722                 ret = 0;
11723         } else if (ret > 0) {
11724                 fprintf(stderr,
11725                        "Found %d roots with an outdated root item.\n",
11726                        ret);
11727                 fprintf(stderr,
11728                         "Please run a filesystem check with the option --repair to fix them.\n");
11729                 ret = 1;
11730                 goto close_out;
11731         }
11732
11733         if (!ctx.progress_enabled) {
11734                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11735                         fprintf(stderr, "checking free space tree\n");
11736                 else
11737                         fprintf(stderr, "checking free space cache\n");
11738         }
11739         ret = check_space_cache(root);
11740         if (ret)
11741                 goto out;
11742
11743         /*
11744          * We used to have to have these hole extents in between our real
11745          * extents so if we don't have this flag set we need to make sure there
11746          * are no gaps in the file extents for inodes, otherwise we can just
11747          * ignore it when this happens.
11748          */
11749         no_holes = btrfs_fs_incompat(root->fs_info,
11750                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11751         if (!ctx.progress_enabled)
11752                 fprintf(stderr, "checking fs roots\n");
11753         ret = check_fs_roots(root, &root_cache);
11754         if (ret)
11755                 goto out;
11756
11757         fprintf(stderr, "checking csums\n");
11758         ret = check_csums(root);
11759         if (ret)
11760                 goto out;
11761
11762         fprintf(stderr, "checking root refs\n");
11763         ret = check_root_refs(root, &root_cache);
11764         if (ret)
11765                 goto out;
11766
11767         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11768                 struct extent_buffer *eb;
11769
11770                 eb = list_first_entry(&root->fs_info->recow_ebs,
11771                                       struct extent_buffer, recow);
11772                 list_del_init(&eb->recow);
11773                 ret = recow_extent_buffer(root, eb);
11774                 if (ret)
11775                         break;
11776         }
11777
11778         while (!list_empty(&delete_items)) {
11779                 struct bad_item *bad;
11780
11781                 bad = list_first_entry(&delete_items, struct bad_item, list);
11782                 list_del_init(&bad->list);
11783                 if (repair)
11784                         ret = delete_bad_item(root, bad);
11785                 free(bad);
11786         }
11787
11788         if (info->quota_enabled) {
11789                 int err;
11790                 fprintf(stderr, "checking quota groups\n");
11791                 err = qgroup_verify_all(info);
11792                 if (err)
11793                         goto out;
11794                 report_qgroups(0);
11795                 err = repair_qgroups(info, &qgroups_repaired);
11796                 if (err)
11797                         goto out;
11798         }
11799
11800         if (!list_empty(&root->fs_info->recow_ebs)) {
11801                 error("transid errors in file system");
11802                 ret = 1;
11803         }
11804 out:
11805         /* Don't override original ret */
11806         if (!ret && qgroups_repaired)
11807                 ret = qgroups_repaired;
11808
11809         if (found_old_backref) { /*
11810                  * there was a disk format change when mixed
11811                  * backref was in testing tree. The old format
11812                  * existed about one week.
11813                  */
11814                 printf("\n * Found old mixed backref format. "
11815                        "The old format is not supported! *"
11816                        "\n * Please mount the FS in readonly mode, "
11817                        "backup data and re-format the FS. *\n\n");
11818                 ret = 1;
11819         }
11820         printf("found %llu bytes used err is %d\n",
11821                (unsigned long long)bytes_used, ret);
11822         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11823         printf("total tree bytes: %llu\n",
11824                (unsigned long long)total_btree_bytes);
11825         printf("total fs tree bytes: %llu\n",
11826                (unsigned long long)total_fs_tree_bytes);
11827         printf("total extent tree bytes: %llu\n",
11828                (unsigned long long)total_extent_tree_bytes);
11829         printf("btree space waste bytes: %llu\n",
11830                (unsigned long long)btree_space_waste);
11831         printf("file data blocks allocated: %llu\n referenced %llu\n",
11832                 (unsigned long long)data_bytes_allocated,
11833                 (unsigned long long)data_bytes_referenced);
11834
11835         free_qgroup_counts();
11836         free_root_recs_tree(&root_cache);
11837 close_out:
11838         close_ctree(root);
11839 err_out:
11840         if (ctx.progress_enabled)
11841                 task_deinit(ctx.info);
11842
11843         return ret;
11844 }