btrfs-progs: check: introduce function to find inode_ref
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
3834
3835 /*
3836  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3837  * INODE_REF/INODE_EXTREF match.
3838  *
3839  * @root:       the root of the fs/file tree
3840  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3841  * @key:        the key of the DIR_ITEM/DIR_INDEX
3842  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3843  *              distinguish root_dir between normal dir/file
3844  * @name:       the name in the INODE_REF/INODE_EXTREF
3845  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3846  * @mode:       the st_mode of INODE_ITEM
3847  *
3848  * Return 0 if no error occurred.
3849  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3850  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3851  * dir/file.
3852  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3853  * not match for normal dir/file.
3854  */
3855 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3856                          struct btrfs_key *key, u64 index, char *name,
3857                          u32 namelen, u32 mode)
3858 {
3859         struct btrfs_path path;
3860         struct extent_buffer *node;
3861         struct btrfs_dir_item *di;
3862         struct btrfs_key location;
3863         char namebuf[BTRFS_NAME_LEN] = {0};
3864         u32 total;
3865         u32 cur = 0;
3866         u32 len;
3867         u32 name_len;
3868         u32 data_len;
3869         u8 filetype;
3870         int slot;
3871         int ret;
3872
3873         btrfs_init_path(&path);
3874         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3875         if (ret < 0) {
3876                 ret = DIR_ITEM_MISSING;
3877                 goto out;
3878         }
3879
3880         /* Process root dir and goto out*/
3881         if (index == 0) {
3882                 if (ret == 0) {
3883                         ret = ROOT_DIR_ERROR;
3884                         error(
3885                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3886                                 root->objectid,
3887                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3888                                         "REF" : "EXTREF",
3889                                 ref_key->objectid, ref_key->offset,
3890                                 key->type == BTRFS_DIR_ITEM_KEY ?
3891                                         "DIR_ITEM" : "DIR_INDEX");
3892                 } else {
3893                         ret = 0;
3894                 }
3895
3896                 goto out;
3897         }
3898
3899         /* Process normal file/dir */
3900         if (ret > 0) {
3901                 ret = DIR_ITEM_MISSING;
3902                 error(
3903                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3904                         root->objectid,
3905                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3906                         ref_key->objectid, ref_key->offset,
3907                         key->type == BTRFS_DIR_ITEM_KEY ?
3908                                 "DIR_ITEM" : "DIR_INDEX",
3909                         key->objectid, key->offset, namelen, name,
3910                         imode_to_type(mode));
3911                 goto out;
3912         }
3913
3914         /* Check whether inode_id/filetype/name match */
3915         node = path.nodes[0];
3916         slot = path.slots[0];
3917         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3918         total = btrfs_item_size_nr(node, slot);
3919         while (cur < total) {
3920                 ret = DIR_ITEM_MISMATCH;
3921                 name_len = btrfs_dir_name_len(node, di);
3922                 data_len = btrfs_dir_data_len(node, di);
3923
3924                 btrfs_dir_item_key_to_cpu(node, di, &location);
3925                 if (location.objectid != ref_key->objectid ||
3926                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3927                     location.offset != 0)
3928                         goto next;
3929
3930                 filetype = btrfs_dir_type(node, di);
3931                 if (imode_to_type(mode) != filetype)
3932                         goto next;
3933
3934                 if (name_len <= BTRFS_NAME_LEN) {
3935                         len = name_len;
3936                 } else {
3937                         len = BTRFS_NAME_LEN;
3938                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3939                         root->objectid,
3940                         key->type == BTRFS_DIR_ITEM_KEY ?
3941                         "DIR_ITEM" : "DIR_INDEX",
3942                         key->objectid, key->offset, name_len);
3943                 }
3944                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3945                 if (len != namelen || strncmp(namebuf, name, len))
3946                         goto next;
3947
3948                 ret = 0;
3949                 goto out;
3950 next:
3951                 len = sizeof(*di) + name_len + data_len;
3952                 di = (struct btrfs_dir_item *)((char *)di + len);
3953                 cur += len;
3954         }
3955         if (ret == DIR_ITEM_MISMATCH)
3956                 error(
3957                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3958                         root->objectid,
3959                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3960                         ref_key->objectid, ref_key->offset,
3961                         key->type == BTRFS_DIR_ITEM_KEY ?
3962                                 "DIR_ITEM" : "DIR_INDEX",
3963                         key->objectid, key->offset, namelen, name,
3964                         imode_to_type(mode));
3965 out:
3966         btrfs_release_path(&path);
3967         return ret;
3968 }
3969
3970 /*
3971  * Traverse the given INODE_REF and call find_dir_item() to find related
3972  * DIR_ITEM/DIR_INDEX.
3973  *
3974  * @root:       the root of the fs/file tree
3975  * @ref_key:    the key of the INODE_REF
3976  * @refs:       the count of INODE_REF
3977  * @mode:       the st_mode of INODE_ITEM
3978  *
3979  * Return 0 if no error occurred.
3980  */
3981 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3982                            struct extent_buffer *node, int slot, u64 *refs,
3983                            int mode)
3984 {
3985         struct btrfs_key key;
3986         struct btrfs_inode_ref *ref;
3987         char namebuf[BTRFS_NAME_LEN] = {0};
3988         u32 total;
3989         u32 cur = 0;
3990         u32 len;
3991         u32 name_len;
3992         u64 index;
3993         int ret, err = 0;
3994
3995         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
3996         total = btrfs_item_size_nr(node, slot);
3997
3998 next:
3999         /* Update inode ref count */
4000         (*refs)++;
4001
4002         index = btrfs_inode_ref_index(node, ref);
4003         name_len = btrfs_inode_ref_name_len(node, ref);
4004         if (name_len <= BTRFS_NAME_LEN) {
4005                 len = name_len;
4006         } else {
4007                 len = BTRFS_NAME_LEN;
4008                 warning("root %llu INODE_REF[%llu %llu] name too long",
4009                         root->objectid, ref_key->objectid, ref_key->offset);
4010         }
4011
4012         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4013
4014         /* Check root dir ref name */
4015         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4016                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4017                       root->objectid, ref_key->objectid, ref_key->offset,
4018                       namebuf);
4019                 err |= ROOT_DIR_ERROR;
4020         }
4021
4022         /* Find related DIR_INDEX */
4023         key.objectid = ref_key->offset;
4024         key.type = BTRFS_DIR_INDEX_KEY;
4025         key.offset = index;
4026         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4027         err |= ret;
4028
4029         /* Find related dir_item */
4030         key.objectid = ref_key->offset;
4031         key.type = BTRFS_DIR_ITEM_KEY;
4032         key.offset = btrfs_name_hash(namebuf, len);
4033         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4034         err |= ret;
4035
4036         len = sizeof(*ref) + name_len;
4037         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4038         cur += len;
4039         if (cur < total)
4040                 goto next;
4041
4042         return err;
4043 }
4044
4045 /*
4046  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4047  * DIR_ITEM/DIR_INDEX.
4048  *
4049  * @root:       the root of the fs/file tree
4050  * @ref_key:    the key of the INODE_EXTREF
4051  * @refs:       the count of INODE_EXTREF
4052  * @mode:       the st_mode of INODE_ITEM
4053  *
4054  * Return 0 if no error occurred.
4055  */
4056 static int check_inode_extref(struct btrfs_root *root,
4057                               struct btrfs_key *ref_key,
4058                               struct extent_buffer *node, int slot, u64 *refs,
4059                               int mode)
4060 {
4061         struct btrfs_key key;
4062         struct btrfs_inode_extref *extref;
4063         char namebuf[BTRFS_NAME_LEN] = {0};
4064         u32 total;
4065         u32 cur = 0;
4066         u32 len;
4067         u32 name_len;
4068         u64 index;
4069         u64 parent;
4070         int ret;
4071         int err = 0;
4072
4073         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4074         total = btrfs_item_size_nr(node, slot);
4075
4076 next:
4077         /* update inode ref count */
4078         (*refs)++;
4079         name_len = btrfs_inode_extref_name_len(node, extref);
4080         index = btrfs_inode_extref_index(node, extref);
4081         parent = btrfs_inode_extref_parent(node, extref);
4082         if (name_len <= BTRFS_NAME_LEN) {
4083                 len = name_len;
4084         } else {
4085                 len = BTRFS_NAME_LEN;
4086                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4087                         root->objectid, ref_key->objectid, ref_key->offset);
4088         }
4089         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4090
4091         /* Check root dir ref name */
4092         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4093                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4094                       root->objectid, ref_key->objectid, ref_key->offset,
4095                       namebuf);
4096                 err |= ROOT_DIR_ERROR;
4097         }
4098
4099         /* find related dir_index */
4100         key.objectid = parent;
4101         key.type = BTRFS_DIR_INDEX_KEY;
4102         key.offset = index;
4103         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4104         err |= ret;
4105
4106         /* find related dir_item */
4107         key.objectid = parent;
4108         key.type = BTRFS_DIR_ITEM_KEY;
4109         key.offset = btrfs_name_hash(namebuf, len);
4110         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4111         err |= ret;
4112
4113         len = sizeof(*extref) + name_len;
4114         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4115         cur += len;
4116
4117         if (cur < total)
4118                 goto next;
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4125  * DIR_ITEM/DIR_INDEX match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @key:        the key of the INODE_REF/INODE_EXTREF
4129  * @name:       the name in the INODE_REF/INODE_EXTREF
4130  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4131  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4132  * to (u64)-1
4133  * @ext_ref:    the EXTENDED_IREF feature
4134  *
4135  * Return 0 if no error occurred.
4136  * Return >0 for error bitmap
4137  */
4138 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4139                           char *name, int namelen, u64 index,
4140                           unsigned int ext_ref)
4141 {
4142         struct btrfs_path path;
4143         struct btrfs_inode_ref *ref;
4144         struct btrfs_inode_extref *extref;
4145         struct extent_buffer *node;
4146         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4147         u32 total;
4148         u32 cur = 0;
4149         u32 len;
4150         u32 ref_namelen;
4151         u64 ref_index;
4152         u64 parent;
4153         u64 dir_id;
4154         int slot;
4155         int ret;
4156
4157         btrfs_init_path(&path);
4158         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4159         if (ret) {
4160                 ret = INODE_REF_MISSING;
4161                 goto extref;
4162         }
4163
4164         node = path.nodes[0];
4165         slot = path.slots[0];
4166
4167         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4168         total = btrfs_item_size_nr(node, slot);
4169
4170         /* Iterate all entry of INODE_REF */
4171         while (cur < total) {
4172                 ret = INODE_REF_MISSING;
4173
4174                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4175                 ref_index = btrfs_inode_ref_index(node, ref);
4176                 if (index != (u64)-1 && index != ref_index)
4177                         goto next_ref;
4178
4179                 if (ref_namelen <= BTRFS_NAME_LEN) {
4180                         len = ref_namelen;
4181                 } else {
4182                         len = BTRFS_NAME_LEN;
4183                         warning("root %llu INODE %s[%llu %llu] name too long",
4184                                 root->objectid,
4185                                 key->type == BTRFS_INODE_REF_KEY ?
4186                                         "REF" : "EXTREF",
4187                                 key->objectid, key->offset);
4188                 }
4189                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4190                                    len);
4191
4192                 if (len != namelen || strncmp(ref_namebuf, name, len))
4193                         goto next_ref;
4194
4195                 ret = 0;
4196                 goto out;
4197 next_ref:
4198                 len = sizeof(*ref) + ref_namelen;
4199                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4200                 cur += len;
4201         }
4202
4203 extref:
4204         /* Skip if not support EXTENDED_IREF feature */
4205         if (!ext_ref)
4206                 goto out;
4207
4208         btrfs_release_path(&path);
4209         btrfs_init_path(&path);
4210
4211         dir_id = key->offset;
4212         key->type = BTRFS_INODE_EXTREF_KEY;
4213         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4214
4215         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4216         if (ret) {
4217                 ret = INODE_REF_MISSING;
4218                 goto out;
4219         }
4220
4221         node = path.nodes[0];
4222         slot = path.slots[0];
4223
4224         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4225         cur = 0;
4226         total = btrfs_item_size_nr(node, slot);
4227
4228         /* Iterate all entry of INODE_EXTREF */
4229         while (cur < total) {
4230                 ret = INODE_REF_MISSING;
4231
4232                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4233                 ref_index = btrfs_inode_extref_index(node, extref);
4234                 parent = btrfs_inode_extref_parent(node, extref);
4235                 if (index != (u64)-1 && index != ref_index)
4236                         goto next_extref;
4237
4238                 if (parent != dir_id)
4239                         goto next_extref;
4240
4241                 if (ref_namelen <= BTRFS_NAME_LEN) {
4242                         len = ref_namelen;
4243                 } else {
4244                         len = BTRFS_NAME_LEN;
4245                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4246                                 root->objectid,
4247                                 key->type == BTRFS_INODE_REF_KEY ?
4248                                         "REF" : "EXTREF",
4249                                 key->objectid, key->offset);
4250                 }
4251                 read_extent_buffer(node, ref_namebuf,
4252                                    (unsigned long)(extref + 1), len);
4253
4254                 if (len != namelen || strncmp(ref_namebuf, name, len))
4255                         goto next_extref;
4256
4257                 ret = 0;
4258                 goto out;
4259
4260 next_extref:
4261                 len = sizeof(*extref) + ref_namelen;
4262                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4263                 cur += len;
4264
4265         }
4266 out:
4267         btrfs_release_path(&path);
4268         return ret;
4269 }
4270
4271 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4272 {
4273         struct list_head *cur = rec->backrefs.next;
4274         struct extent_backref *back;
4275         struct tree_backref *tback;
4276         struct data_backref *dback;
4277         u64 found = 0;
4278         int err = 0;
4279
4280         while(cur != &rec->backrefs) {
4281                 back = to_extent_backref(cur);
4282                 cur = cur->next;
4283                 if (!back->found_extent_tree) {
4284                         err = 1;
4285                         if (!print_errs)
4286                                 goto out;
4287                         if (back->is_data) {
4288                                 dback = to_data_backref(back);
4289                                 fprintf(stderr, "Backref %llu %s %llu"
4290                                         " owner %llu offset %llu num_refs %lu"
4291                                         " not found in extent tree\n",
4292                                         (unsigned long long)rec->start,
4293                                         back->full_backref ?
4294                                         "parent" : "root",
4295                                         back->full_backref ?
4296                                         (unsigned long long)dback->parent:
4297                                         (unsigned long long)dback->root,
4298                                         (unsigned long long)dback->owner,
4299                                         (unsigned long long)dback->offset,
4300                                         (unsigned long)dback->num_refs);
4301                         } else {
4302                                 tback = to_tree_backref(back);
4303                                 fprintf(stderr, "Backref %llu parent %llu"
4304                                         " root %llu not found in extent tree\n",
4305                                         (unsigned long long)rec->start,
4306                                         (unsigned long long)tback->parent,
4307                                         (unsigned long long)tback->root);
4308                         }
4309                 }
4310                 if (!back->is_data && !back->found_ref) {
4311                         err = 1;
4312                         if (!print_errs)
4313                                 goto out;
4314                         tback = to_tree_backref(back);
4315                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4316                                 (unsigned long long)rec->start,
4317                                 back->full_backref ? "parent" : "root",
4318                                 back->full_backref ?
4319                                 (unsigned long long)tback->parent :
4320                                 (unsigned long long)tback->root, back);
4321                 }
4322                 if (back->is_data) {
4323                         dback = to_data_backref(back);
4324                         if (dback->found_ref != dback->num_refs) {
4325                                 err = 1;
4326                                 if (!print_errs)
4327                                         goto out;
4328                                 fprintf(stderr, "Incorrect local backref count"
4329                                         " on %llu %s %llu owner %llu"
4330                                         " offset %llu found %u wanted %u back %p\n",
4331                                         (unsigned long long)rec->start,
4332                                         back->full_backref ?
4333                                         "parent" : "root",
4334                                         back->full_backref ?
4335                                         (unsigned long long)dback->parent:
4336                                         (unsigned long long)dback->root,
4337                                         (unsigned long long)dback->owner,
4338                                         (unsigned long long)dback->offset,
4339                                         dback->found_ref, dback->num_refs, back);
4340                         }
4341                         if (dback->disk_bytenr != rec->start) {
4342                                 err = 1;
4343                                 if (!print_errs)
4344                                         goto out;
4345                                 fprintf(stderr, "Backref disk bytenr does not"
4346                                         " match extent record, bytenr=%llu, "
4347                                         "ref bytenr=%llu\n",
4348                                         (unsigned long long)rec->start,
4349                                         (unsigned long long)dback->disk_bytenr);
4350                         }
4351
4352                         if (dback->bytes != rec->nr) {
4353                                 err = 1;
4354                                 if (!print_errs)
4355                                         goto out;
4356                                 fprintf(stderr, "Backref bytes do not match "
4357                                         "extent backref, bytenr=%llu, ref "
4358                                         "bytes=%llu, backref bytes=%llu\n",
4359                                         (unsigned long long)rec->start,
4360                                         (unsigned long long)rec->nr,
4361                                         (unsigned long long)dback->bytes);
4362                         }
4363                 }
4364                 if (!back->is_data) {
4365                         found += 1;
4366                 } else {
4367                         dback = to_data_backref(back);
4368                         found += dback->found_ref;
4369                 }
4370         }
4371         if (found != rec->refs) {
4372                 err = 1;
4373                 if (!print_errs)
4374                         goto out;
4375                 fprintf(stderr, "Incorrect global backref count "
4376                         "on %llu found %llu wanted %llu\n",
4377                         (unsigned long long)rec->start,
4378                         (unsigned long long)found,
4379                         (unsigned long long)rec->refs);
4380         }
4381 out:
4382         return err;
4383 }
4384
4385 static int free_all_extent_backrefs(struct extent_record *rec)
4386 {
4387         struct extent_backref *back;
4388         struct list_head *cur;
4389         while (!list_empty(&rec->backrefs)) {
4390                 cur = rec->backrefs.next;
4391                 back = to_extent_backref(cur);
4392                 list_del(cur);
4393                 free(back);
4394         }
4395         return 0;
4396 }
4397
4398 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4399                                      struct cache_tree *extent_cache)
4400 {
4401         struct cache_extent *cache;
4402         struct extent_record *rec;
4403
4404         while (1) {
4405                 cache = first_cache_extent(extent_cache);
4406                 if (!cache)
4407                         break;
4408                 rec = container_of(cache, struct extent_record, cache);
4409                 remove_cache_extent(extent_cache, cache);
4410                 free_all_extent_backrefs(rec);
4411                 free(rec);
4412         }
4413 }
4414
4415 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4416                                  struct extent_record *rec)
4417 {
4418         if (rec->content_checked && rec->owner_ref_checked &&
4419             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4420             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4421             !rec->bad_full_backref && !rec->crossing_stripes &&
4422             !rec->wrong_chunk_type) {
4423                 remove_cache_extent(extent_cache, &rec->cache);
4424                 free_all_extent_backrefs(rec);
4425                 list_del_init(&rec->list);
4426                 free(rec);
4427         }
4428         return 0;
4429 }
4430
4431 static int check_owner_ref(struct btrfs_root *root,
4432                             struct extent_record *rec,
4433                             struct extent_buffer *buf)
4434 {
4435         struct extent_backref *node;
4436         struct tree_backref *back;
4437         struct btrfs_root *ref_root;
4438         struct btrfs_key key;
4439         struct btrfs_path path;
4440         struct extent_buffer *parent;
4441         int level;
4442         int found = 0;
4443         int ret;
4444
4445         list_for_each_entry(node, &rec->backrefs, list) {
4446                 if (node->is_data)
4447                         continue;
4448                 if (!node->found_ref)
4449                         continue;
4450                 if (node->full_backref)
4451                         continue;
4452                 back = to_tree_backref(node);
4453                 if (btrfs_header_owner(buf) == back->root)
4454                         return 0;
4455         }
4456         BUG_ON(rec->is_root);
4457
4458         /* try to find the block by search corresponding fs tree */
4459         key.objectid = btrfs_header_owner(buf);
4460         key.type = BTRFS_ROOT_ITEM_KEY;
4461         key.offset = (u64)-1;
4462
4463         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4464         if (IS_ERR(ref_root))
4465                 return 1;
4466
4467         level = btrfs_header_level(buf);
4468         if (level == 0)
4469                 btrfs_item_key_to_cpu(buf, &key, 0);
4470         else
4471                 btrfs_node_key_to_cpu(buf, &key, 0);
4472
4473         btrfs_init_path(&path);
4474         path.lowest_level = level + 1;
4475         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4476         if (ret < 0)
4477                 return 0;
4478
4479         parent = path.nodes[level + 1];
4480         if (parent && buf->start == btrfs_node_blockptr(parent,
4481                                                         path.slots[level + 1]))
4482                 found = 1;
4483
4484         btrfs_release_path(&path);
4485         return found ? 0 : 1;
4486 }
4487
4488 static int is_extent_tree_record(struct extent_record *rec)
4489 {
4490         struct list_head *cur = rec->backrefs.next;
4491         struct extent_backref *node;
4492         struct tree_backref *back;
4493         int is_extent = 0;
4494
4495         while(cur != &rec->backrefs) {
4496                 node = to_extent_backref(cur);
4497                 cur = cur->next;
4498                 if (node->is_data)
4499                         return 0;
4500                 back = to_tree_backref(node);
4501                 if (node->full_backref)
4502                         return 0;
4503                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4504                         is_extent = 1;
4505         }
4506         return is_extent;
4507 }
4508
4509
4510 static int record_bad_block_io(struct btrfs_fs_info *info,
4511                                struct cache_tree *extent_cache,
4512                                u64 start, u64 len)
4513 {
4514         struct extent_record *rec;
4515         struct cache_extent *cache;
4516         struct btrfs_key key;
4517
4518         cache = lookup_cache_extent(extent_cache, start, len);
4519         if (!cache)
4520                 return 0;
4521
4522         rec = container_of(cache, struct extent_record, cache);
4523         if (!is_extent_tree_record(rec))
4524                 return 0;
4525
4526         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4527         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4528 }
4529
4530 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4531                        struct extent_buffer *buf, int slot)
4532 {
4533         if (btrfs_header_level(buf)) {
4534                 struct btrfs_key_ptr ptr1, ptr2;
4535
4536                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4537                                    sizeof(struct btrfs_key_ptr));
4538                 read_extent_buffer(buf, &ptr2,
4539                                    btrfs_node_key_ptr_offset(slot + 1),
4540                                    sizeof(struct btrfs_key_ptr));
4541                 write_extent_buffer(buf, &ptr1,
4542                                     btrfs_node_key_ptr_offset(slot + 1),
4543                                     sizeof(struct btrfs_key_ptr));
4544                 write_extent_buffer(buf, &ptr2,
4545                                     btrfs_node_key_ptr_offset(slot),
4546                                     sizeof(struct btrfs_key_ptr));
4547                 if (slot == 0) {
4548                         struct btrfs_disk_key key;
4549                         btrfs_node_key(buf, &key, 0);
4550                         btrfs_fixup_low_keys(root, path, &key,
4551                                              btrfs_header_level(buf) + 1);
4552                 }
4553         } else {
4554                 struct btrfs_item *item1, *item2;
4555                 struct btrfs_key k1, k2;
4556                 char *item1_data, *item2_data;
4557                 u32 item1_offset, item2_offset, item1_size, item2_size;
4558
4559                 item1 = btrfs_item_nr(slot);
4560                 item2 = btrfs_item_nr(slot + 1);
4561                 btrfs_item_key_to_cpu(buf, &k1, slot);
4562                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4563                 item1_offset = btrfs_item_offset(buf, item1);
4564                 item2_offset = btrfs_item_offset(buf, item2);
4565                 item1_size = btrfs_item_size(buf, item1);
4566                 item2_size = btrfs_item_size(buf, item2);
4567
4568                 item1_data = malloc(item1_size);
4569                 if (!item1_data)
4570                         return -ENOMEM;
4571                 item2_data = malloc(item2_size);
4572                 if (!item2_data) {
4573                         free(item1_data);
4574                         return -ENOMEM;
4575                 }
4576
4577                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4578                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4579
4580                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4581                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4582                 free(item1_data);
4583                 free(item2_data);
4584
4585                 btrfs_set_item_offset(buf, item1, item2_offset);
4586                 btrfs_set_item_offset(buf, item2, item1_offset);
4587                 btrfs_set_item_size(buf, item1, item2_size);
4588                 btrfs_set_item_size(buf, item2, item1_size);
4589
4590                 path->slots[0] = slot;
4591                 btrfs_set_item_key_unsafe(root, path, &k2);
4592                 path->slots[0] = slot + 1;
4593                 btrfs_set_item_key_unsafe(root, path, &k1);
4594         }
4595         return 0;
4596 }
4597
4598 static int fix_key_order(struct btrfs_trans_handle *trans,
4599                          struct btrfs_root *root,
4600                          struct btrfs_path *path)
4601 {
4602         struct extent_buffer *buf;
4603         struct btrfs_key k1, k2;
4604         int i;
4605         int level = path->lowest_level;
4606         int ret = -EIO;
4607
4608         buf = path->nodes[level];
4609         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4610                 if (level) {
4611                         btrfs_node_key_to_cpu(buf, &k1, i);
4612                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4613                 } else {
4614                         btrfs_item_key_to_cpu(buf, &k1, i);
4615                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4616                 }
4617                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4618                         continue;
4619                 ret = swap_values(root, path, buf, i);
4620                 if (ret)
4621                         break;
4622                 btrfs_mark_buffer_dirty(buf);
4623                 i = 0;
4624         }
4625         return ret;
4626 }
4627
4628 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4629                              struct btrfs_root *root,
4630                              struct btrfs_path *path,
4631                              struct extent_buffer *buf, int slot)
4632 {
4633         struct btrfs_key key;
4634         int nritems = btrfs_header_nritems(buf);
4635
4636         btrfs_item_key_to_cpu(buf, &key, slot);
4637
4638         /* These are all the keys we can deal with missing. */
4639         if (key.type != BTRFS_DIR_INDEX_KEY &&
4640             key.type != BTRFS_EXTENT_ITEM_KEY &&
4641             key.type != BTRFS_METADATA_ITEM_KEY &&
4642             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4643             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4644                 return -1;
4645
4646         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4647                (unsigned long long)key.objectid, key.type,
4648                (unsigned long long)key.offset, slot, buf->start);
4649         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4650                               btrfs_item_nr_offset(slot + 1),
4651                               sizeof(struct btrfs_item) *
4652                               (nritems - slot - 1));
4653         btrfs_set_header_nritems(buf, nritems - 1);
4654         if (slot == 0) {
4655                 struct btrfs_disk_key disk_key;
4656
4657                 btrfs_item_key(buf, &disk_key, 0);
4658                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4659         }
4660         btrfs_mark_buffer_dirty(buf);
4661         return 0;
4662 }
4663
4664 static int fix_item_offset(struct btrfs_trans_handle *trans,
4665                            struct btrfs_root *root,
4666                            struct btrfs_path *path)
4667 {
4668         struct extent_buffer *buf;
4669         int i;
4670         int ret = 0;
4671
4672         /* We should only get this for leaves */
4673         BUG_ON(path->lowest_level);
4674         buf = path->nodes[0];
4675 again:
4676         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4677                 unsigned int shift = 0, offset;
4678
4679                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4680                     BTRFS_LEAF_DATA_SIZE(root)) {
4681                         if (btrfs_item_end_nr(buf, i) >
4682                             BTRFS_LEAF_DATA_SIZE(root)) {
4683                                 ret = delete_bogus_item(trans, root, path,
4684                                                         buf, i);
4685                                 if (!ret)
4686                                         goto again;
4687                                 fprintf(stderr, "item is off the end of the "
4688                                         "leaf, can't fix\n");
4689                                 ret = -EIO;
4690                                 break;
4691                         }
4692                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4693                                 btrfs_item_end_nr(buf, i);
4694                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4695                            btrfs_item_offset_nr(buf, i - 1)) {
4696                         if (btrfs_item_end_nr(buf, i) >
4697                             btrfs_item_offset_nr(buf, i - 1)) {
4698                                 ret = delete_bogus_item(trans, root, path,
4699                                                         buf, i);
4700                                 if (!ret)
4701                                         goto again;
4702                                 fprintf(stderr, "items overlap, can't fix\n");
4703                                 ret = -EIO;
4704                                 break;
4705                         }
4706                         shift = btrfs_item_offset_nr(buf, i - 1) -
4707                                 btrfs_item_end_nr(buf, i);
4708                 }
4709                 if (!shift)
4710                         continue;
4711
4712                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4713                        i, shift, (unsigned long long)buf->start);
4714                 offset = btrfs_item_offset_nr(buf, i);
4715                 memmove_extent_buffer(buf,
4716                                       btrfs_leaf_data(buf) + offset + shift,
4717                                       btrfs_leaf_data(buf) + offset,
4718                                       btrfs_item_size_nr(buf, i));
4719                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4720                                       offset + shift);
4721                 btrfs_mark_buffer_dirty(buf);
4722         }
4723
4724         /*
4725          * We may have moved things, in which case we want to exit so we don't
4726          * write those changes out.  Once we have proper abort functionality in
4727          * progs this can be changed to something nicer.
4728          */
4729         BUG_ON(ret);
4730         return ret;
4731 }
4732
4733 /*
4734  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4735  * then just return -EIO.
4736  */
4737 static int try_to_fix_bad_block(struct btrfs_root *root,
4738                                 struct extent_buffer *buf,
4739                                 enum btrfs_tree_block_status status)
4740 {
4741         struct btrfs_trans_handle *trans;
4742         struct ulist *roots;
4743         struct ulist_node *node;
4744         struct btrfs_root *search_root;
4745         struct btrfs_path path;
4746         struct ulist_iterator iter;
4747         struct btrfs_key root_key, key;
4748         int ret;
4749
4750         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4751             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4752                 return -EIO;
4753
4754         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4755         if (ret)
4756                 return -EIO;
4757
4758         btrfs_init_path(&path);
4759         ULIST_ITER_INIT(&iter);
4760         while ((node = ulist_next(roots, &iter))) {
4761                 root_key.objectid = node->val;
4762                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4763                 root_key.offset = (u64)-1;
4764
4765                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4766                 if (IS_ERR(root)) {
4767                         ret = -EIO;
4768                         break;
4769                 }
4770
4771
4772                 trans = btrfs_start_transaction(search_root, 0);
4773                 if (IS_ERR(trans)) {
4774                         ret = PTR_ERR(trans);
4775                         break;
4776                 }
4777
4778                 path.lowest_level = btrfs_header_level(buf);
4779                 path.skip_check_block = 1;
4780                 if (path.lowest_level)
4781                         btrfs_node_key_to_cpu(buf, &key, 0);
4782                 else
4783                         btrfs_item_key_to_cpu(buf, &key, 0);
4784                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4785                 if (ret) {
4786                         ret = -EIO;
4787                         btrfs_commit_transaction(trans, search_root);
4788                         break;
4789                 }
4790                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4791                         ret = fix_key_order(trans, search_root, &path);
4792                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4793                         ret = fix_item_offset(trans, search_root, &path);
4794                 if (ret) {
4795                         btrfs_commit_transaction(trans, search_root);
4796                         break;
4797                 }
4798                 btrfs_release_path(&path);
4799                 btrfs_commit_transaction(trans, search_root);
4800         }
4801         ulist_free(roots);
4802         btrfs_release_path(&path);
4803         return ret;
4804 }
4805
4806 static int check_block(struct btrfs_root *root,
4807                        struct cache_tree *extent_cache,
4808                        struct extent_buffer *buf, u64 flags)
4809 {
4810         struct extent_record *rec;
4811         struct cache_extent *cache;
4812         struct btrfs_key key;
4813         enum btrfs_tree_block_status status;
4814         int ret = 0;
4815         int level;
4816
4817         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4818         if (!cache)
4819                 return 1;
4820         rec = container_of(cache, struct extent_record, cache);
4821         rec->generation = btrfs_header_generation(buf);
4822
4823         level = btrfs_header_level(buf);
4824         if (btrfs_header_nritems(buf) > 0) {
4825
4826                 if (level == 0)
4827                         btrfs_item_key_to_cpu(buf, &key, 0);
4828                 else
4829                         btrfs_node_key_to_cpu(buf, &key, 0);
4830
4831                 rec->info_objectid = key.objectid;
4832         }
4833         rec->info_level = level;
4834
4835         if (btrfs_is_leaf(buf))
4836                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4837         else
4838                 status = btrfs_check_node(root, &rec->parent_key, buf);
4839
4840         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4841                 if (repair)
4842                         status = try_to_fix_bad_block(root, buf, status);
4843                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4844                         ret = -EIO;
4845                         fprintf(stderr, "bad block %llu\n",
4846                                 (unsigned long long)buf->start);
4847                 } else {
4848                         /*
4849                          * Signal to callers we need to start the scan over
4850                          * again since we'll have cowed blocks.
4851                          */
4852                         ret = -EAGAIN;
4853                 }
4854         } else {
4855                 rec->content_checked = 1;
4856                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4857                         rec->owner_ref_checked = 1;
4858                 else {
4859                         ret = check_owner_ref(root, rec, buf);
4860                         if (!ret)
4861                                 rec->owner_ref_checked = 1;
4862                 }
4863         }
4864         if (!ret)
4865                 maybe_free_extent_rec(extent_cache, rec);
4866         return ret;
4867 }
4868
4869 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4870                                                 u64 parent, u64 root)
4871 {
4872         struct list_head *cur = rec->backrefs.next;
4873         struct extent_backref *node;
4874         struct tree_backref *back;
4875
4876         while(cur != &rec->backrefs) {
4877                 node = to_extent_backref(cur);
4878                 cur = cur->next;
4879                 if (node->is_data)
4880                         continue;
4881                 back = to_tree_backref(node);
4882                 if (parent > 0) {
4883                         if (!node->full_backref)
4884                                 continue;
4885                         if (parent == back->parent)
4886                                 return back;
4887                 } else {
4888                         if (node->full_backref)
4889                                 continue;
4890                         if (back->root == root)
4891                                 return back;
4892                 }
4893         }
4894         return NULL;
4895 }
4896
4897 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4898                                                 u64 parent, u64 root)
4899 {
4900         struct tree_backref *ref = malloc(sizeof(*ref));
4901
4902         if (!ref)
4903                 return NULL;
4904         memset(&ref->node, 0, sizeof(ref->node));
4905         if (parent > 0) {
4906                 ref->parent = parent;
4907                 ref->node.full_backref = 1;
4908         } else {
4909                 ref->root = root;
4910                 ref->node.full_backref = 0;
4911         }
4912         list_add_tail(&ref->node.list, &rec->backrefs);
4913
4914         return ref;
4915 }
4916
4917 static struct data_backref *find_data_backref(struct extent_record *rec,
4918                                                 u64 parent, u64 root,
4919                                                 u64 owner, u64 offset,
4920                                                 int found_ref,
4921                                                 u64 disk_bytenr, u64 bytes)
4922 {
4923         struct list_head *cur = rec->backrefs.next;
4924         struct extent_backref *node;
4925         struct data_backref *back;
4926
4927         while(cur != &rec->backrefs) {
4928                 node = to_extent_backref(cur);
4929                 cur = cur->next;
4930                 if (!node->is_data)
4931                         continue;
4932                 back = to_data_backref(node);
4933                 if (parent > 0) {
4934                         if (!node->full_backref)
4935                                 continue;
4936                         if (parent == back->parent)
4937                                 return back;
4938                 } else {
4939                         if (node->full_backref)
4940                                 continue;
4941                         if (back->root == root && back->owner == owner &&
4942                             back->offset == offset) {
4943                                 if (found_ref && node->found_ref &&
4944                                     (back->bytes != bytes ||
4945                                     back->disk_bytenr != disk_bytenr))
4946                                         continue;
4947                                 return back;
4948                         }
4949                 }
4950         }
4951         return NULL;
4952 }
4953
4954 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4955                                                 u64 parent, u64 root,
4956                                                 u64 owner, u64 offset,
4957                                                 u64 max_size)
4958 {
4959         struct data_backref *ref = malloc(sizeof(*ref));
4960
4961         if (!ref)
4962                 return NULL;
4963         memset(&ref->node, 0, sizeof(ref->node));
4964         ref->node.is_data = 1;
4965
4966         if (parent > 0) {
4967                 ref->parent = parent;
4968                 ref->owner = 0;
4969                 ref->offset = 0;
4970                 ref->node.full_backref = 1;
4971         } else {
4972                 ref->root = root;
4973                 ref->owner = owner;
4974                 ref->offset = offset;
4975                 ref->node.full_backref = 0;
4976         }
4977         ref->bytes = max_size;
4978         ref->found_ref = 0;
4979         ref->num_refs = 0;
4980         list_add_tail(&ref->node.list, &rec->backrefs);
4981         if (max_size > rec->max_size)
4982                 rec->max_size = max_size;
4983         return ref;
4984 }
4985
4986 /* Check if the type of extent matches with its chunk */
4987 static void check_extent_type(struct extent_record *rec)
4988 {
4989         struct btrfs_block_group_cache *bg_cache;
4990
4991         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4992         if (!bg_cache)
4993                 return;
4994
4995         /* data extent, check chunk directly*/
4996         if (!rec->metadata) {
4997                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4998                         rec->wrong_chunk_type = 1;
4999                 return;
5000         }
5001
5002         /* metadata extent, check the obvious case first */
5003         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5004                                  BTRFS_BLOCK_GROUP_METADATA))) {
5005                 rec->wrong_chunk_type = 1;
5006                 return;
5007         }
5008
5009         /*
5010          * Check SYSTEM extent, as it's also marked as metadata, we can only
5011          * make sure it's a SYSTEM extent by its backref
5012          */
5013         if (!list_empty(&rec->backrefs)) {
5014                 struct extent_backref *node;
5015                 struct tree_backref *tback;
5016                 u64 bg_type;
5017
5018                 node = to_extent_backref(rec->backrefs.next);
5019                 if (node->is_data) {
5020                         /* tree block shouldn't have data backref */
5021                         rec->wrong_chunk_type = 1;
5022                         return;
5023                 }
5024                 tback = container_of(node, struct tree_backref, node);
5025
5026                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5027                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5028                 else
5029                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5030                 if (!(bg_cache->flags & bg_type))
5031                         rec->wrong_chunk_type = 1;
5032         }
5033 }
5034
5035 /*
5036  * Allocate a new extent record, fill default values from @tmpl and insert int
5037  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5038  * the cache, otherwise it fails.
5039  */
5040 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5041                 struct extent_record *tmpl)
5042 {
5043         struct extent_record *rec;
5044         int ret = 0;
5045
5046         rec = malloc(sizeof(*rec));
5047         if (!rec)
5048                 return -ENOMEM;
5049         rec->start = tmpl->start;
5050         rec->max_size = tmpl->max_size;
5051         rec->nr = max(tmpl->nr, tmpl->max_size);
5052         rec->found_rec = tmpl->found_rec;
5053         rec->content_checked = tmpl->content_checked;
5054         rec->owner_ref_checked = tmpl->owner_ref_checked;
5055         rec->num_duplicates = 0;
5056         rec->metadata = tmpl->metadata;
5057         rec->flag_block_full_backref = FLAG_UNSET;
5058         rec->bad_full_backref = 0;
5059         rec->crossing_stripes = 0;
5060         rec->wrong_chunk_type = 0;
5061         rec->is_root = tmpl->is_root;
5062         rec->refs = tmpl->refs;
5063         rec->extent_item_refs = tmpl->extent_item_refs;
5064         rec->parent_generation = tmpl->parent_generation;
5065         INIT_LIST_HEAD(&rec->backrefs);
5066         INIT_LIST_HEAD(&rec->dups);
5067         INIT_LIST_HEAD(&rec->list);
5068         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5069         rec->cache.start = tmpl->start;
5070         rec->cache.size = tmpl->nr;
5071         ret = insert_cache_extent(extent_cache, &rec->cache);
5072         if (ret) {
5073                 free(rec);
5074                 return ret;
5075         }
5076         bytes_used += rec->nr;
5077
5078         if (tmpl->metadata)
5079                 rec->crossing_stripes = check_crossing_stripes(global_info,
5080                                 rec->start, global_info->tree_root->nodesize);
5081         check_extent_type(rec);
5082         return ret;
5083 }
5084
5085 /*
5086  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5087  * some are hints:
5088  * - refs              - if found, increase refs
5089  * - is_root           - if found, set
5090  * - content_checked   - if found, set
5091  * - owner_ref_checked - if found, set
5092  *
5093  * If not found, create a new one, initialize and insert.
5094  */
5095 static int add_extent_rec(struct cache_tree *extent_cache,
5096                 struct extent_record *tmpl)
5097 {
5098         struct extent_record *rec;
5099         struct cache_extent *cache;
5100         int ret = 0;
5101         int dup = 0;
5102
5103         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5104         if (cache) {
5105                 rec = container_of(cache, struct extent_record, cache);
5106                 if (tmpl->refs)
5107                         rec->refs++;
5108                 if (rec->nr == 1)
5109                         rec->nr = max(tmpl->nr, tmpl->max_size);
5110
5111                 /*
5112                  * We need to make sure to reset nr to whatever the extent
5113                  * record says was the real size, this way we can compare it to
5114                  * the backrefs.
5115                  */
5116                 if (tmpl->found_rec) {
5117                         if (tmpl->start != rec->start || rec->found_rec) {
5118                                 struct extent_record *tmp;
5119
5120                                 dup = 1;
5121                                 if (list_empty(&rec->list))
5122                                         list_add_tail(&rec->list,
5123                                                       &duplicate_extents);
5124
5125                                 /*
5126                                  * We have to do this song and dance in case we
5127                                  * find an extent record that falls inside of
5128                                  * our current extent record but does not have
5129                                  * the same objectid.
5130                                  */
5131                                 tmp = malloc(sizeof(*tmp));
5132                                 if (!tmp)
5133                                         return -ENOMEM;
5134                                 tmp->start = tmpl->start;
5135                                 tmp->max_size = tmpl->max_size;
5136                                 tmp->nr = tmpl->nr;
5137                                 tmp->found_rec = 1;
5138                                 tmp->metadata = tmpl->metadata;
5139                                 tmp->extent_item_refs = tmpl->extent_item_refs;
5140                                 INIT_LIST_HEAD(&tmp->list);
5141                                 list_add_tail(&tmp->list, &rec->dups);
5142                                 rec->num_duplicates++;
5143                         } else {
5144                                 rec->nr = tmpl->nr;
5145                                 rec->found_rec = 1;
5146                         }
5147                 }
5148
5149                 if (tmpl->extent_item_refs && !dup) {
5150                         if (rec->extent_item_refs) {
5151                                 fprintf(stderr, "block %llu rec "
5152                                         "extent_item_refs %llu, passed %llu\n",
5153                                         (unsigned long long)tmpl->start,
5154                                         (unsigned long long)
5155                                                         rec->extent_item_refs,
5156                                         (unsigned long long)tmpl->extent_item_refs);
5157                         }
5158                         rec->extent_item_refs = tmpl->extent_item_refs;
5159                 }
5160                 if (tmpl->is_root)
5161                         rec->is_root = 1;
5162                 if (tmpl->content_checked)
5163                         rec->content_checked = 1;
5164                 if (tmpl->owner_ref_checked)
5165                         rec->owner_ref_checked = 1;
5166                 memcpy(&rec->parent_key, &tmpl->parent_key,
5167                                 sizeof(tmpl->parent_key));
5168                 if (tmpl->parent_generation)
5169                         rec->parent_generation = tmpl->parent_generation;
5170                 if (rec->max_size < tmpl->max_size)
5171                         rec->max_size = tmpl->max_size;
5172
5173                 /*
5174                  * A metadata extent can't cross stripe_len boundary, otherwise
5175                  * kernel scrub won't be able to handle it.
5176                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5177                  * it.
5178                  */
5179                 if (tmpl->metadata)
5180                         rec->crossing_stripes = check_crossing_stripes(
5181                                         global_info, rec->start,
5182                                         global_info->tree_root->nodesize);
5183                 check_extent_type(rec);
5184                 maybe_free_extent_rec(extent_cache, rec);
5185                 return ret;
5186         }
5187
5188         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5189
5190         return ret;
5191 }
5192
5193 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5194                             u64 parent, u64 root, int found_ref)
5195 {
5196         struct extent_record *rec;
5197         struct tree_backref *back;
5198         struct cache_extent *cache;
5199         int ret;
5200
5201         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5202         if (!cache) {
5203                 struct extent_record tmpl;
5204
5205                 memset(&tmpl, 0, sizeof(tmpl));
5206                 tmpl.start = bytenr;
5207                 tmpl.nr = 1;
5208                 tmpl.metadata = 1;
5209
5210                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5211                 if (ret)
5212                         return ret;
5213
5214                 /* really a bug in cache_extent implement now */
5215                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5216                 if (!cache)
5217                         return -ENOENT;
5218         }
5219
5220         rec = container_of(cache, struct extent_record, cache);
5221         if (rec->start != bytenr) {
5222                 /*
5223                  * Several cause, from unaligned bytenr to over lapping extents
5224                  */
5225                 return -EEXIST;
5226         }
5227
5228         back = find_tree_backref(rec, parent, root);
5229         if (!back) {
5230                 back = alloc_tree_backref(rec, parent, root);
5231                 if (!back)
5232                         return -ENOMEM;
5233         }
5234
5235         if (found_ref) {
5236                 if (back->node.found_ref) {
5237                         fprintf(stderr, "Extent back ref already exists "
5238                                 "for %llu parent %llu root %llu \n",
5239                                 (unsigned long long)bytenr,
5240                                 (unsigned long long)parent,
5241                                 (unsigned long long)root);
5242                 }
5243                 back->node.found_ref = 1;
5244         } else {
5245                 if (back->node.found_extent_tree) {
5246                         fprintf(stderr, "Extent back ref already exists "
5247                                 "for %llu parent %llu root %llu \n",
5248                                 (unsigned long long)bytenr,
5249                                 (unsigned long long)parent,
5250                                 (unsigned long long)root);
5251                 }
5252                 back->node.found_extent_tree = 1;
5253         }
5254         check_extent_type(rec);
5255         maybe_free_extent_rec(extent_cache, rec);
5256         return 0;
5257 }
5258
5259 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5260                             u64 parent, u64 root, u64 owner, u64 offset,
5261                             u32 num_refs, int found_ref, u64 max_size)
5262 {
5263         struct extent_record *rec;
5264         struct data_backref *back;
5265         struct cache_extent *cache;
5266         int ret;
5267
5268         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5269         if (!cache) {
5270                 struct extent_record tmpl;
5271
5272                 memset(&tmpl, 0, sizeof(tmpl));
5273                 tmpl.start = bytenr;
5274                 tmpl.nr = 1;
5275                 tmpl.max_size = max_size;
5276
5277                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5278                 if (ret)
5279                         return ret;
5280
5281                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5282                 if (!cache)
5283                         abort();
5284         }
5285
5286         rec = container_of(cache, struct extent_record, cache);
5287         if (rec->max_size < max_size)
5288                 rec->max_size = max_size;
5289
5290         /*
5291          * If found_ref is set then max_size is the real size and must match the
5292          * existing refs.  So if we have already found a ref then we need to
5293          * make sure that this ref matches the existing one, otherwise we need
5294          * to add a new backref so we can notice that the backrefs don't match
5295          * and we need to figure out who is telling the truth.  This is to
5296          * account for that awful fsync bug I introduced where we'd end up with
5297          * a btrfs_file_extent_item that would have its length include multiple
5298          * prealloc extents or point inside of a prealloc extent.
5299          */
5300         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5301                                  bytenr, max_size);
5302         if (!back) {
5303                 back = alloc_data_backref(rec, parent, root, owner, offset,
5304                                           max_size);
5305                 BUG_ON(!back);
5306         }
5307
5308         if (found_ref) {
5309                 BUG_ON(num_refs != 1);
5310                 if (back->node.found_ref)
5311                         BUG_ON(back->bytes != max_size);
5312                 back->node.found_ref = 1;
5313                 back->found_ref += 1;
5314                 back->bytes = max_size;
5315                 back->disk_bytenr = bytenr;
5316                 rec->refs += 1;
5317                 rec->content_checked = 1;
5318                 rec->owner_ref_checked = 1;
5319         } else {
5320                 if (back->node.found_extent_tree) {
5321                         fprintf(stderr, "Extent back ref already exists "
5322                                 "for %llu parent %llu root %llu "
5323                                 "owner %llu offset %llu num_refs %lu\n",
5324                                 (unsigned long long)bytenr,
5325                                 (unsigned long long)parent,
5326                                 (unsigned long long)root,
5327                                 (unsigned long long)owner,
5328                                 (unsigned long long)offset,
5329                                 (unsigned long)num_refs);
5330                 }
5331                 back->num_refs = num_refs;
5332                 back->node.found_extent_tree = 1;
5333         }
5334         maybe_free_extent_rec(extent_cache, rec);
5335         return 0;
5336 }
5337
5338 static int add_pending(struct cache_tree *pending,
5339                        struct cache_tree *seen, u64 bytenr, u32 size)
5340 {
5341         int ret;
5342         ret = add_cache_extent(seen, bytenr, size);
5343         if (ret)
5344                 return ret;
5345         add_cache_extent(pending, bytenr, size);
5346         return 0;
5347 }
5348
5349 static int pick_next_pending(struct cache_tree *pending,
5350                         struct cache_tree *reada,
5351                         struct cache_tree *nodes,
5352                         u64 last, struct block_info *bits, int bits_nr,
5353                         int *reada_bits)
5354 {
5355         unsigned long node_start = last;
5356         struct cache_extent *cache;
5357         int ret;
5358
5359         cache = search_cache_extent(reada, 0);
5360         if (cache) {
5361                 bits[0].start = cache->start;
5362                 bits[0].size = cache->size;
5363                 *reada_bits = 1;
5364                 return 1;
5365         }
5366         *reada_bits = 0;
5367         if (node_start > 32768)
5368                 node_start -= 32768;
5369
5370         cache = search_cache_extent(nodes, node_start);
5371         if (!cache)
5372                 cache = search_cache_extent(nodes, 0);
5373
5374         if (!cache) {
5375                  cache = search_cache_extent(pending, 0);
5376                  if (!cache)
5377                          return 0;
5378                  ret = 0;
5379                  do {
5380                          bits[ret].start = cache->start;
5381                          bits[ret].size = cache->size;
5382                          cache = next_cache_extent(cache);
5383                          ret++;
5384                  } while (cache && ret < bits_nr);
5385                  return ret;
5386         }
5387
5388         ret = 0;
5389         do {
5390                 bits[ret].start = cache->start;
5391                 bits[ret].size = cache->size;
5392                 cache = next_cache_extent(cache);
5393                 ret++;
5394         } while (cache && ret < bits_nr);
5395
5396         if (bits_nr - ret > 8) {
5397                 u64 lookup = bits[0].start + bits[0].size;
5398                 struct cache_extent *next;
5399                 next = search_cache_extent(pending, lookup);
5400                 while(next) {
5401                         if (next->start - lookup > 32768)
5402                                 break;
5403                         bits[ret].start = next->start;
5404                         bits[ret].size = next->size;
5405                         lookup = next->start + next->size;
5406                         ret++;
5407                         if (ret == bits_nr)
5408                                 break;
5409                         next = next_cache_extent(next);
5410                         if (!next)
5411                                 break;
5412                 }
5413         }
5414         return ret;
5415 }
5416
5417 static void free_chunk_record(struct cache_extent *cache)
5418 {
5419         struct chunk_record *rec;
5420
5421         rec = container_of(cache, struct chunk_record, cache);
5422         list_del_init(&rec->list);
5423         list_del_init(&rec->dextents);
5424         free(rec);
5425 }
5426
5427 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5428 {
5429         cache_tree_free_extents(chunk_cache, free_chunk_record);
5430 }
5431
5432 static void free_device_record(struct rb_node *node)
5433 {
5434         struct device_record *rec;
5435
5436         rec = container_of(node, struct device_record, node);
5437         free(rec);
5438 }
5439
5440 FREE_RB_BASED_TREE(device_cache, free_device_record);
5441
5442 int insert_block_group_record(struct block_group_tree *tree,
5443                               struct block_group_record *bg_rec)
5444 {
5445         int ret;
5446
5447         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5448         if (ret)
5449                 return ret;
5450
5451         list_add_tail(&bg_rec->list, &tree->block_groups);
5452         return 0;
5453 }
5454
5455 static void free_block_group_record(struct cache_extent *cache)
5456 {
5457         struct block_group_record *rec;
5458
5459         rec = container_of(cache, struct block_group_record, cache);
5460         list_del_init(&rec->list);
5461         free(rec);
5462 }
5463
5464 void free_block_group_tree(struct block_group_tree *tree)
5465 {
5466         cache_tree_free_extents(&tree->tree, free_block_group_record);
5467 }
5468
5469 int insert_device_extent_record(struct device_extent_tree *tree,
5470                                 struct device_extent_record *de_rec)
5471 {
5472         int ret;
5473
5474         /*
5475          * Device extent is a bit different from the other extents, because
5476          * the extents which belong to the different devices may have the
5477          * same start and size, so we need use the special extent cache
5478          * search/insert functions.
5479          */
5480         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5481         if (ret)
5482                 return ret;
5483
5484         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5485         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5486         return 0;
5487 }
5488
5489 static void free_device_extent_record(struct cache_extent *cache)
5490 {
5491         struct device_extent_record *rec;
5492
5493         rec = container_of(cache, struct device_extent_record, cache);
5494         if (!list_empty(&rec->chunk_list))
5495                 list_del_init(&rec->chunk_list);
5496         if (!list_empty(&rec->device_list))
5497                 list_del_init(&rec->device_list);
5498         free(rec);
5499 }
5500
5501 void free_device_extent_tree(struct device_extent_tree *tree)
5502 {
5503         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5504 }
5505
5506 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5507 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5508                                  struct extent_buffer *leaf, int slot)
5509 {
5510         struct btrfs_extent_ref_v0 *ref0;
5511         struct btrfs_key key;
5512         int ret;
5513
5514         btrfs_item_key_to_cpu(leaf, &key, slot);
5515         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5516         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5517                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5518                                 0, 0);
5519         } else {
5520                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5521                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5522         }
5523         return ret;
5524 }
5525 #endif
5526
5527 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5528                                             struct btrfs_key *key,
5529                                             int slot)
5530 {
5531         struct btrfs_chunk *ptr;
5532         struct chunk_record *rec;
5533         int num_stripes, i;
5534
5535         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5536         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5537
5538         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5539         if (!rec) {
5540                 fprintf(stderr, "memory allocation failed\n");
5541                 exit(-1);
5542         }
5543
5544         INIT_LIST_HEAD(&rec->list);
5545         INIT_LIST_HEAD(&rec->dextents);
5546         rec->bg_rec = NULL;
5547
5548         rec->cache.start = key->offset;
5549         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5550
5551         rec->generation = btrfs_header_generation(leaf);
5552
5553         rec->objectid = key->objectid;
5554         rec->type = key->type;
5555         rec->offset = key->offset;
5556
5557         rec->length = rec->cache.size;
5558         rec->owner = btrfs_chunk_owner(leaf, ptr);
5559         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5560         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5561         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5562         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5563         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5564         rec->num_stripes = num_stripes;
5565         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5566
5567         for (i = 0; i < rec->num_stripes; ++i) {
5568                 rec->stripes[i].devid =
5569                         btrfs_stripe_devid_nr(leaf, ptr, i);
5570                 rec->stripes[i].offset =
5571                         btrfs_stripe_offset_nr(leaf, ptr, i);
5572                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5573                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5574                                 BTRFS_UUID_SIZE);
5575         }
5576
5577         return rec;
5578 }
5579
5580 static int process_chunk_item(struct cache_tree *chunk_cache,
5581                               struct btrfs_key *key, struct extent_buffer *eb,
5582                               int slot)
5583 {
5584         struct chunk_record *rec;
5585         struct btrfs_chunk *chunk;
5586         int ret = 0;
5587
5588         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5589         /*
5590          * Do extra check for this chunk item,
5591          *
5592          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5593          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5594          * and owner<->key_type check.
5595          */
5596         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5597                                       key->offset);
5598         if (ret < 0) {
5599                 error("chunk(%llu, %llu) is not valid, ignore it",
5600                       key->offset, btrfs_chunk_length(eb, chunk));
5601                 return 0;
5602         }
5603         rec = btrfs_new_chunk_record(eb, key, slot);
5604         ret = insert_cache_extent(chunk_cache, &rec->cache);
5605         if (ret) {
5606                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5607                         rec->offset, rec->length);
5608                 free(rec);
5609         }
5610
5611         return ret;
5612 }
5613
5614 static int process_device_item(struct rb_root *dev_cache,
5615                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5616 {
5617         struct btrfs_dev_item *ptr;
5618         struct device_record *rec;
5619         int ret = 0;
5620
5621         ptr = btrfs_item_ptr(eb,
5622                 slot, struct btrfs_dev_item);
5623
5624         rec = malloc(sizeof(*rec));
5625         if (!rec) {
5626                 fprintf(stderr, "memory allocation failed\n");
5627                 return -ENOMEM;
5628         }
5629
5630         rec->devid = key->offset;
5631         rec->generation = btrfs_header_generation(eb);
5632
5633         rec->objectid = key->objectid;
5634         rec->type = key->type;
5635         rec->offset = key->offset;
5636
5637         rec->devid = btrfs_device_id(eb, ptr);
5638         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5639         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5640
5641         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5642         if (ret) {
5643                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5644                 free(rec);
5645         }
5646
5647         return ret;
5648 }
5649
5650 struct block_group_record *
5651 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5652                              int slot)
5653 {
5654         struct btrfs_block_group_item *ptr;
5655         struct block_group_record *rec;
5656
5657         rec = calloc(1, sizeof(*rec));
5658         if (!rec) {
5659                 fprintf(stderr, "memory allocation failed\n");
5660                 exit(-1);
5661         }
5662
5663         rec->cache.start = key->objectid;
5664         rec->cache.size = key->offset;
5665
5666         rec->generation = btrfs_header_generation(leaf);
5667
5668         rec->objectid = key->objectid;
5669         rec->type = key->type;
5670         rec->offset = key->offset;
5671
5672         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5673         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5674
5675         INIT_LIST_HEAD(&rec->list);
5676
5677         return rec;
5678 }
5679
5680 static int process_block_group_item(struct block_group_tree *block_group_cache,
5681                                     struct btrfs_key *key,
5682                                     struct extent_buffer *eb, int slot)
5683 {
5684         struct block_group_record *rec;
5685         int ret = 0;
5686
5687         rec = btrfs_new_block_group_record(eb, key, slot);
5688         ret = insert_block_group_record(block_group_cache, rec);
5689         if (ret) {
5690                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5691                         rec->objectid, rec->offset);
5692                 free(rec);
5693         }
5694
5695         return ret;
5696 }
5697
5698 struct device_extent_record *
5699 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5700                                struct btrfs_key *key, int slot)
5701 {
5702         struct device_extent_record *rec;
5703         struct btrfs_dev_extent *ptr;
5704
5705         rec = calloc(1, sizeof(*rec));
5706         if (!rec) {
5707                 fprintf(stderr, "memory allocation failed\n");
5708                 exit(-1);
5709         }
5710
5711         rec->cache.objectid = key->objectid;
5712         rec->cache.start = key->offset;
5713
5714         rec->generation = btrfs_header_generation(leaf);
5715
5716         rec->objectid = key->objectid;
5717         rec->type = key->type;
5718         rec->offset = key->offset;
5719
5720         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5721         rec->chunk_objecteid =
5722                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5723         rec->chunk_offset =
5724                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5725         rec->length = btrfs_dev_extent_length(leaf, ptr);
5726         rec->cache.size = rec->length;
5727
5728         INIT_LIST_HEAD(&rec->chunk_list);
5729         INIT_LIST_HEAD(&rec->device_list);
5730
5731         return rec;
5732 }
5733
5734 static int
5735 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5736                            struct btrfs_key *key, struct extent_buffer *eb,
5737                            int slot)
5738 {
5739         struct device_extent_record *rec;
5740         int ret;
5741
5742         rec = btrfs_new_device_extent_record(eb, key, slot);
5743         ret = insert_device_extent_record(dev_extent_cache, rec);
5744         if (ret) {
5745                 fprintf(stderr,
5746                         "Device extent[%llu, %llu, %llu] existed.\n",
5747                         rec->objectid, rec->offset, rec->length);
5748                 free(rec);
5749         }
5750
5751         return ret;
5752 }
5753
5754 static int process_extent_item(struct btrfs_root *root,
5755                                struct cache_tree *extent_cache,
5756                                struct extent_buffer *eb, int slot)
5757 {
5758         struct btrfs_extent_item *ei;
5759         struct btrfs_extent_inline_ref *iref;
5760         struct btrfs_extent_data_ref *dref;
5761         struct btrfs_shared_data_ref *sref;
5762         struct btrfs_key key;
5763         struct extent_record tmpl;
5764         unsigned long end;
5765         unsigned long ptr;
5766         int ret;
5767         int type;
5768         u32 item_size = btrfs_item_size_nr(eb, slot);
5769         u64 refs = 0;
5770         u64 offset;
5771         u64 num_bytes;
5772         int metadata = 0;
5773
5774         btrfs_item_key_to_cpu(eb, &key, slot);
5775
5776         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5777                 metadata = 1;
5778                 num_bytes = root->nodesize;
5779         } else {
5780                 num_bytes = key.offset;
5781         }
5782
5783         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5784                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5785                       key.objectid, root->sectorsize);
5786                 return -EIO;
5787         }
5788         if (item_size < sizeof(*ei)) {
5789 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5790                 struct btrfs_extent_item_v0 *ei0;
5791                 BUG_ON(item_size != sizeof(*ei0));
5792                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5793                 refs = btrfs_extent_refs_v0(eb, ei0);
5794 #else
5795                 BUG();
5796 #endif
5797                 memset(&tmpl, 0, sizeof(tmpl));
5798                 tmpl.start = key.objectid;
5799                 tmpl.nr = num_bytes;
5800                 tmpl.extent_item_refs = refs;
5801                 tmpl.metadata = metadata;
5802                 tmpl.found_rec = 1;
5803                 tmpl.max_size = num_bytes;
5804
5805                 return add_extent_rec(extent_cache, &tmpl);
5806         }
5807
5808         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5809         refs = btrfs_extent_refs(eb, ei);
5810         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5811                 metadata = 1;
5812         else
5813                 metadata = 0;
5814         if (metadata && num_bytes != root->nodesize) {
5815                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5816                       num_bytes, root->nodesize);
5817                 return -EIO;
5818         }
5819         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5820                 error("ignore invalid data extent, length %llu is not aligned to %u",
5821                       num_bytes, root->sectorsize);
5822                 return -EIO;
5823         }
5824
5825         memset(&tmpl, 0, sizeof(tmpl));
5826         tmpl.start = key.objectid;
5827         tmpl.nr = num_bytes;
5828         tmpl.extent_item_refs = refs;
5829         tmpl.metadata = metadata;
5830         tmpl.found_rec = 1;
5831         tmpl.max_size = num_bytes;
5832         add_extent_rec(extent_cache, &tmpl);
5833
5834         ptr = (unsigned long)(ei + 1);
5835         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5836             key.type == BTRFS_EXTENT_ITEM_KEY)
5837                 ptr += sizeof(struct btrfs_tree_block_info);
5838
5839         end = (unsigned long)ei + item_size;
5840         while (ptr < end) {
5841                 iref = (struct btrfs_extent_inline_ref *)ptr;
5842                 type = btrfs_extent_inline_ref_type(eb, iref);
5843                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5844                 switch (type) {
5845                 case BTRFS_TREE_BLOCK_REF_KEY:
5846                         ret = add_tree_backref(extent_cache, key.objectid,
5847                                         0, offset, 0);
5848                         if (ret < 0)
5849                                 error("add_tree_backref failed: %s",
5850                                       strerror(-ret));
5851                         break;
5852                 case BTRFS_SHARED_BLOCK_REF_KEY:
5853                         ret = add_tree_backref(extent_cache, key.objectid,
5854                                         offset, 0, 0);
5855                         if (ret < 0)
5856                                 error("add_tree_backref failed: %s",
5857                                       strerror(-ret));
5858                         break;
5859                 case BTRFS_EXTENT_DATA_REF_KEY:
5860                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5861                         add_data_backref(extent_cache, key.objectid, 0,
5862                                         btrfs_extent_data_ref_root(eb, dref),
5863                                         btrfs_extent_data_ref_objectid(eb,
5864                                                                        dref),
5865                                         btrfs_extent_data_ref_offset(eb, dref),
5866                                         btrfs_extent_data_ref_count(eb, dref),
5867                                         0, num_bytes);
5868                         break;
5869                 case BTRFS_SHARED_DATA_REF_KEY:
5870                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5871                         add_data_backref(extent_cache, key.objectid, offset,
5872                                         0, 0, 0,
5873                                         btrfs_shared_data_ref_count(eb, sref),
5874                                         0, num_bytes);
5875                         break;
5876                 default:
5877                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5878                                 key.objectid, key.type, num_bytes);
5879                         goto out;
5880                 }
5881                 ptr += btrfs_extent_inline_ref_size(type);
5882         }
5883         WARN_ON(ptr > end);
5884 out:
5885         return 0;
5886 }
5887
5888 static int check_cache_range(struct btrfs_root *root,
5889                              struct btrfs_block_group_cache *cache,
5890                              u64 offset, u64 bytes)
5891 {
5892         struct btrfs_free_space *entry;
5893         u64 *logical;
5894         u64 bytenr;
5895         int stripe_len;
5896         int i, nr, ret;
5897
5898         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5899                 bytenr = btrfs_sb_offset(i);
5900                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5901                                        cache->key.objectid, bytenr, 0,
5902                                        &logical, &nr, &stripe_len);
5903                 if (ret)
5904                         return ret;
5905
5906                 while (nr--) {
5907                         if (logical[nr] + stripe_len <= offset)
5908                                 continue;
5909                         if (offset + bytes <= logical[nr])
5910                                 continue;
5911                         if (logical[nr] == offset) {
5912                                 if (stripe_len >= bytes) {
5913                                         free(logical);
5914                                         return 0;
5915                                 }
5916                                 bytes -= stripe_len;
5917                                 offset += stripe_len;
5918                         } else if (logical[nr] < offset) {
5919                                 if (logical[nr] + stripe_len >=
5920                                     offset + bytes) {
5921                                         free(logical);
5922                                         return 0;
5923                                 }
5924                                 bytes = (offset + bytes) -
5925                                         (logical[nr] + stripe_len);
5926                                 offset = logical[nr] + stripe_len;
5927                         } else {
5928                                 /*
5929                                  * Could be tricky, the super may land in the
5930                                  * middle of the area we're checking.  First
5931                                  * check the easiest case, it's at the end.
5932                                  */
5933                                 if (logical[nr] + stripe_len >=
5934                                     bytes + offset) {
5935                                         bytes = logical[nr] - offset;
5936                                         continue;
5937                                 }
5938
5939                                 /* Check the left side */
5940                                 ret = check_cache_range(root, cache,
5941                                                         offset,
5942                                                         logical[nr] - offset);
5943                                 if (ret) {
5944                                         free(logical);
5945                                         return ret;
5946                                 }
5947
5948                                 /* Now we continue with the right side */
5949                                 bytes = (offset + bytes) -
5950                                         (logical[nr] + stripe_len);
5951                                 offset = logical[nr] + stripe_len;
5952                         }
5953                 }
5954
5955                 free(logical);
5956         }
5957
5958         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5959         if (!entry) {
5960                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5961                         offset, offset+bytes);
5962                 return -EINVAL;
5963         }
5964
5965         if (entry->offset != offset) {
5966                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5967                         entry->offset);
5968                 return -EINVAL;
5969         }
5970
5971         if (entry->bytes != bytes) {
5972                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5973                         bytes, entry->bytes, offset);
5974                 return -EINVAL;
5975         }
5976
5977         unlink_free_space(cache->free_space_ctl, entry);
5978         free(entry);
5979         return 0;
5980 }
5981
5982 static int verify_space_cache(struct btrfs_root *root,
5983                               struct btrfs_block_group_cache *cache)
5984 {
5985         struct btrfs_path path;
5986         struct extent_buffer *leaf;
5987         struct btrfs_key key;
5988         u64 last;
5989         int ret = 0;
5990
5991         root = root->fs_info->extent_root;
5992
5993         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5994
5995         btrfs_init_path(&path);
5996         key.objectid = last;
5997         key.offset = 0;
5998         key.type = BTRFS_EXTENT_ITEM_KEY;
5999         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6000         if (ret < 0)
6001                 goto out;
6002         ret = 0;
6003         while (1) {
6004                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6005                         ret = btrfs_next_leaf(root, &path);
6006                         if (ret < 0)
6007                                 goto out;
6008                         if (ret > 0) {
6009                                 ret = 0;
6010                                 break;
6011                         }
6012                 }
6013                 leaf = path.nodes[0];
6014                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6015                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6016                         break;
6017                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6018                     key.type != BTRFS_METADATA_ITEM_KEY) {
6019                         path.slots[0]++;
6020                         continue;
6021                 }
6022
6023                 if (last == key.objectid) {
6024                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6025                                 last = key.objectid + key.offset;
6026                         else
6027                                 last = key.objectid + root->nodesize;
6028                         path.slots[0]++;
6029                         continue;
6030                 }
6031
6032                 ret = check_cache_range(root, cache, last,
6033                                         key.objectid - last);
6034                 if (ret)
6035                         break;
6036                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6037                         last = key.objectid + key.offset;
6038                 else
6039                         last = key.objectid + root->nodesize;
6040                 path.slots[0]++;
6041         }
6042
6043         if (last < cache->key.objectid + cache->key.offset)
6044                 ret = check_cache_range(root, cache, last,
6045                                         cache->key.objectid +
6046                                         cache->key.offset - last);
6047
6048 out:
6049         btrfs_release_path(&path);
6050
6051         if (!ret &&
6052             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6053                 fprintf(stderr, "There are still entries left in the space "
6054                         "cache\n");
6055                 ret = -EINVAL;
6056         }
6057
6058         return ret;
6059 }
6060
6061 static int check_space_cache(struct btrfs_root *root)
6062 {
6063         struct btrfs_block_group_cache *cache;
6064         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6065         int ret;
6066         int error = 0;
6067
6068         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6069             btrfs_super_generation(root->fs_info->super_copy) !=
6070             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6071                 printf("cache and super generation don't match, space cache "
6072                        "will be invalidated\n");
6073                 return 0;
6074         }
6075
6076         if (ctx.progress_enabled) {
6077                 ctx.tp = TASK_FREE_SPACE;
6078                 task_start(ctx.info);
6079         }
6080
6081         while (1) {
6082                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6083                 if (!cache)
6084                         break;
6085
6086                 start = cache->key.objectid + cache->key.offset;
6087                 if (!cache->free_space_ctl) {
6088                         if (btrfs_init_free_space_ctl(cache,
6089                                                       root->sectorsize)) {
6090                                 ret = -ENOMEM;
6091                                 break;
6092                         }
6093                 } else {
6094                         btrfs_remove_free_space_cache(cache);
6095                 }
6096
6097                 if (btrfs_fs_compat_ro(root->fs_info,
6098                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6099                         ret = exclude_super_stripes(root, cache);
6100                         if (ret) {
6101                                 fprintf(stderr, "could not exclude super stripes: %s\n",
6102                                         strerror(-ret));
6103                                 error++;
6104                                 continue;
6105                         }
6106                         ret = load_free_space_tree(root->fs_info, cache);
6107                         free_excluded_extents(root, cache);
6108                         if (ret < 0) {
6109                                 fprintf(stderr, "could not load free space tree: %s\n",
6110                                         strerror(-ret));
6111                                 error++;
6112                                 continue;
6113                         }
6114                         error += ret;
6115                 } else {
6116                         ret = load_free_space_cache(root->fs_info, cache);
6117                         if (!ret)
6118                                 continue;
6119                 }
6120
6121                 ret = verify_space_cache(root, cache);
6122                 if (ret) {
6123                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
6124                                 cache->key.objectid);
6125                         error++;
6126                 }
6127         }
6128
6129         task_stop(ctx.info);
6130
6131         return error ? -EINVAL : 0;
6132 }
6133
6134 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6135                         u64 num_bytes, unsigned long leaf_offset,
6136                         struct extent_buffer *eb) {
6137
6138         u64 offset = 0;
6139         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6140         char *data;
6141         unsigned long csum_offset;
6142         u32 csum;
6143         u32 csum_expected;
6144         u64 read_len;
6145         u64 data_checked = 0;
6146         u64 tmp;
6147         int ret = 0;
6148         int mirror;
6149         int num_copies;
6150
6151         if (num_bytes % root->sectorsize)
6152                 return -EINVAL;
6153
6154         data = malloc(num_bytes);
6155         if (!data)
6156                 return -ENOMEM;
6157
6158         while (offset < num_bytes) {
6159                 mirror = 0;
6160 again:
6161                 read_len = num_bytes - offset;
6162                 /* read as much space once a time */
6163                 ret = read_extent_data(root, data + offset,
6164                                 bytenr + offset, &read_len, mirror);
6165                 if (ret)
6166                         goto out;
6167                 data_checked = 0;
6168                 /* verify every 4k data's checksum */
6169                 while (data_checked < read_len) {
6170                         csum = ~(u32)0;
6171                         tmp = offset + data_checked;
6172
6173                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6174                                                csum, root->sectorsize);
6175                         btrfs_csum_final(csum, (u8 *)&csum);
6176
6177                         csum_offset = leaf_offset +
6178                                  tmp / root->sectorsize * csum_size;
6179                         read_extent_buffer(eb, (char *)&csum_expected,
6180                                            csum_offset, csum_size);
6181                         /* try another mirror */
6182                         if (csum != csum_expected) {
6183                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6184                                                 mirror, bytenr + tmp,
6185                                                 csum, csum_expected);
6186                                 num_copies = btrfs_num_copies(
6187                                                 &root->fs_info->mapping_tree,
6188                                                 bytenr, num_bytes);
6189                                 if (mirror < num_copies - 1) {
6190                                         mirror += 1;
6191                                         goto again;
6192                                 }
6193                         }
6194                         data_checked += root->sectorsize;
6195                 }
6196                 offset += read_len;
6197         }
6198 out:
6199         free(data);
6200         return ret;
6201 }
6202
6203 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6204                                u64 num_bytes)
6205 {
6206         struct btrfs_path path;
6207         struct extent_buffer *leaf;
6208         struct btrfs_key key;
6209         int ret;
6210
6211         btrfs_init_path(&path);
6212         key.objectid = bytenr;
6213         key.type = BTRFS_EXTENT_ITEM_KEY;
6214         key.offset = (u64)-1;
6215
6216 again:
6217         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6218                                 0, 0);
6219         if (ret < 0) {
6220                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6221                 btrfs_release_path(&path);
6222                 return ret;
6223         } else if (ret) {
6224                 if (path.slots[0] > 0) {
6225                         path.slots[0]--;
6226                 } else {
6227                         ret = btrfs_prev_leaf(root, &path);
6228                         if (ret < 0) {
6229                                 goto out;
6230                         } else if (ret > 0) {
6231                                 ret = 0;
6232                                 goto out;
6233                         }
6234                 }
6235         }
6236
6237         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6238
6239         /*
6240          * Block group items come before extent items if they have the same
6241          * bytenr, so walk back one more just in case.  Dear future traveller,
6242          * first congrats on mastering time travel.  Now if it's not too much
6243          * trouble could you go back to 2006 and tell Chris to make the
6244          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6245          * EXTENT_ITEM_KEY please?
6246          */
6247         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6248                 if (path.slots[0] > 0) {
6249                         path.slots[0]--;
6250                 } else {
6251                         ret = btrfs_prev_leaf(root, &path);
6252                         if (ret < 0) {
6253                                 goto out;
6254                         } else if (ret > 0) {
6255                                 ret = 0;
6256                                 goto out;
6257                         }
6258                 }
6259                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6260         }
6261
6262         while (num_bytes) {
6263                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6264                         ret = btrfs_next_leaf(root, &path);
6265                         if (ret < 0) {
6266                                 fprintf(stderr, "Error going to next leaf "
6267                                         "%d\n", ret);
6268                                 btrfs_release_path(&path);
6269                                 return ret;
6270                         } else if (ret) {
6271                                 break;
6272                         }
6273                 }
6274                 leaf = path.nodes[0];
6275                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6276                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6277                         path.slots[0]++;
6278                         continue;
6279                 }
6280                 if (key.objectid + key.offset < bytenr) {
6281                         path.slots[0]++;
6282                         continue;
6283                 }
6284                 if (key.objectid > bytenr + num_bytes)
6285                         break;
6286
6287                 if (key.objectid == bytenr) {
6288                         if (key.offset >= num_bytes) {
6289                                 num_bytes = 0;
6290                                 break;
6291                         }
6292                         num_bytes -= key.offset;
6293                         bytenr += key.offset;
6294                 } else if (key.objectid < bytenr) {
6295                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6296                                 num_bytes = 0;
6297                                 break;
6298                         }
6299                         num_bytes = (bytenr + num_bytes) -
6300                                 (key.objectid + key.offset);
6301                         bytenr = key.objectid + key.offset;
6302                 } else {
6303                         if (key.objectid + key.offset < bytenr + num_bytes) {
6304                                 u64 new_start = key.objectid + key.offset;
6305                                 u64 new_bytes = bytenr + num_bytes - new_start;
6306
6307                                 /*
6308                                  * Weird case, the extent is in the middle of
6309                                  * our range, we'll have to search one side
6310                                  * and then the other.  Not sure if this happens
6311                                  * in real life, but no harm in coding it up
6312                                  * anyway just in case.
6313                                  */
6314                                 btrfs_release_path(&path);
6315                                 ret = check_extent_exists(root, new_start,
6316                                                           new_bytes);
6317                                 if (ret) {
6318                                         fprintf(stderr, "Right section didn't "
6319                                                 "have a record\n");
6320                                         break;
6321                                 }
6322                                 num_bytes = key.objectid - bytenr;
6323                                 goto again;
6324                         }
6325                         num_bytes = key.objectid - bytenr;
6326                 }
6327                 path.slots[0]++;
6328         }
6329         ret = 0;
6330
6331 out:
6332         if (num_bytes && !ret) {
6333                 fprintf(stderr, "There are no extents for csum range "
6334                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6335                 ret = 1;
6336         }
6337
6338         btrfs_release_path(&path);
6339         return ret;
6340 }
6341
6342 static int check_csums(struct btrfs_root *root)
6343 {
6344         struct btrfs_path path;
6345         struct extent_buffer *leaf;
6346         struct btrfs_key key;
6347         u64 offset = 0, num_bytes = 0;
6348         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6349         int errors = 0;
6350         int ret;
6351         u64 data_len;
6352         unsigned long leaf_offset;
6353
6354         root = root->fs_info->csum_root;
6355         if (!extent_buffer_uptodate(root->node)) {
6356                 fprintf(stderr, "No valid csum tree found\n");
6357                 return -ENOENT;
6358         }
6359
6360         btrfs_init_path(&path);
6361         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6362         key.type = BTRFS_EXTENT_CSUM_KEY;
6363         key.offset = 0;
6364         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6365         if (ret < 0) {
6366                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6367                 btrfs_release_path(&path);
6368                 return ret;
6369         }
6370
6371         if (ret > 0 && path.slots[0])
6372                 path.slots[0]--;
6373         ret = 0;
6374
6375         while (1) {
6376                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6377                         ret = btrfs_next_leaf(root, &path);
6378                         if (ret < 0) {
6379                                 fprintf(stderr, "Error going to next leaf "
6380                                         "%d\n", ret);
6381                                 break;
6382                         }
6383                         if (ret)
6384                                 break;
6385                 }
6386                 leaf = path.nodes[0];
6387
6388                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6389                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6390                         path.slots[0]++;
6391                         continue;
6392                 }
6393
6394                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6395                               csum_size) * root->sectorsize;
6396                 if (!check_data_csum)
6397                         goto skip_csum_check;
6398                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6399                 ret = check_extent_csums(root, key.offset, data_len,
6400                                          leaf_offset, leaf);
6401                 if (ret)
6402                         break;
6403 skip_csum_check:
6404                 if (!num_bytes) {
6405                         offset = key.offset;
6406                 } else if (key.offset != offset + num_bytes) {
6407                         ret = check_extent_exists(root, offset, num_bytes);
6408                         if (ret) {
6409                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6410                                         "there is no extent record\n",
6411                                         offset, offset+num_bytes);
6412                                 errors++;
6413                         }
6414                         offset = key.offset;
6415                         num_bytes = 0;
6416                 }
6417                 num_bytes += data_len;
6418                 path.slots[0]++;
6419         }
6420
6421         btrfs_release_path(&path);
6422         return errors;
6423 }
6424
6425 static int is_dropped_key(struct btrfs_key *key,
6426                           struct btrfs_key *drop_key) {
6427         if (key->objectid < drop_key->objectid)
6428                 return 1;
6429         else if (key->objectid == drop_key->objectid) {
6430                 if (key->type < drop_key->type)
6431                         return 1;
6432                 else if (key->type == drop_key->type) {
6433                         if (key->offset < drop_key->offset)
6434                                 return 1;
6435                 }
6436         }
6437         return 0;
6438 }
6439
6440 /*
6441  * Here are the rules for FULL_BACKREF.
6442  *
6443  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6444  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6445  *      FULL_BACKREF set.
6446  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6447  *    if it happened after the relocation occurred since we'll have dropped the
6448  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6449  *    have no real way to know for sure.
6450  *
6451  * We process the blocks one root at a time, and we start from the lowest root
6452  * objectid and go to the highest.  So we can just lookup the owner backref for
6453  * the record and if we don't find it then we know it doesn't exist and we have
6454  * a FULL BACKREF.
6455  *
6456  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6457  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6458  * be set or not and then we can check later once we've gathered all the refs.
6459  */
6460 static int calc_extent_flag(struct btrfs_root *root,
6461                            struct cache_tree *extent_cache,
6462                            struct extent_buffer *buf,
6463                            struct root_item_record *ri,
6464                            u64 *flags)
6465 {
6466         struct extent_record *rec;
6467         struct cache_extent *cache;
6468         struct tree_backref *tback;
6469         u64 owner = 0;
6470
6471         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6472         /* we have added this extent before */
6473         if (!cache)
6474                 return -ENOENT;
6475
6476         rec = container_of(cache, struct extent_record, cache);
6477
6478         /*
6479          * Except file/reloc tree, we can not have
6480          * FULL BACKREF MODE
6481          */
6482         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6483                 goto normal;
6484         /*
6485          * root node
6486          */
6487         if (buf->start == ri->bytenr)
6488                 goto normal;
6489
6490         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6491                 goto full_backref;
6492
6493         owner = btrfs_header_owner(buf);
6494         if (owner == ri->objectid)
6495                 goto normal;
6496
6497         tback = find_tree_backref(rec, 0, owner);
6498         if (!tback)
6499                 goto full_backref;
6500 normal:
6501         *flags = 0;
6502         if (rec->flag_block_full_backref != FLAG_UNSET &&
6503             rec->flag_block_full_backref != 0)
6504                 rec->bad_full_backref = 1;
6505         return 0;
6506 full_backref:
6507         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6508         if (rec->flag_block_full_backref != FLAG_UNSET &&
6509             rec->flag_block_full_backref != 1)
6510                 rec->bad_full_backref = 1;
6511         return 0;
6512 }
6513
6514 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6515 {
6516         fprintf(stderr, "Invalid key type(");
6517         print_key_type(stderr, 0, key_type);
6518         fprintf(stderr, ") found in root(");
6519         print_objectid(stderr, rootid, 0);
6520         fprintf(stderr, ")\n");
6521 }
6522
6523 /*
6524  * Check if the key is valid with its extent buffer.
6525  *
6526  * This is a early check in case invalid key exists in a extent buffer
6527  * This is not comprehensive yet, but should prevent wrong key/item passed
6528  * further
6529  */
6530 static int check_type_with_root(u64 rootid, u8 key_type)
6531 {
6532         switch (key_type) {
6533         /* Only valid in chunk tree */
6534         case BTRFS_DEV_ITEM_KEY:
6535         case BTRFS_CHUNK_ITEM_KEY:
6536                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6537                         goto err;
6538                 break;
6539         /* valid in csum and log tree */
6540         case BTRFS_CSUM_TREE_OBJECTID:
6541                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6542                       is_fstree(rootid)))
6543                         goto err;
6544                 break;
6545         case BTRFS_EXTENT_ITEM_KEY:
6546         case BTRFS_METADATA_ITEM_KEY:
6547         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6548                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6549                         goto err;
6550                 break;
6551         case BTRFS_ROOT_ITEM_KEY:
6552                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6553                         goto err;
6554                 break;
6555         case BTRFS_DEV_EXTENT_KEY:
6556                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6557                         goto err;
6558                 break;
6559         }
6560         return 0;
6561 err:
6562         report_mismatch_key_root(key_type, rootid);
6563         return -EINVAL;
6564 }
6565
6566 static int run_next_block(struct btrfs_root *root,
6567                           struct block_info *bits,
6568                           int bits_nr,
6569                           u64 *last,
6570                           struct cache_tree *pending,
6571                           struct cache_tree *seen,
6572                           struct cache_tree *reada,
6573                           struct cache_tree *nodes,
6574                           struct cache_tree *extent_cache,
6575                           struct cache_tree *chunk_cache,
6576                           struct rb_root *dev_cache,
6577                           struct block_group_tree *block_group_cache,
6578                           struct device_extent_tree *dev_extent_cache,
6579                           struct root_item_record *ri)
6580 {
6581         struct extent_buffer *buf;
6582         struct extent_record *rec = NULL;
6583         u64 bytenr;
6584         u32 size;
6585         u64 parent;
6586         u64 owner;
6587         u64 flags;
6588         u64 ptr;
6589         u64 gen = 0;
6590         int ret = 0;
6591         int i;
6592         int nritems;
6593         struct btrfs_key key;
6594         struct cache_extent *cache;
6595         int reada_bits;
6596
6597         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6598                                     bits_nr, &reada_bits);
6599         if (nritems == 0)
6600                 return 1;
6601
6602         if (!reada_bits) {
6603                 for(i = 0; i < nritems; i++) {
6604                         ret = add_cache_extent(reada, bits[i].start,
6605                                                bits[i].size);
6606                         if (ret == -EEXIST)
6607                                 continue;
6608
6609                         /* fixme, get the parent transid */
6610                         readahead_tree_block(root, bits[i].start,
6611                                              bits[i].size, 0);
6612                 }
6613         }
6614         *last = bits[0].start;
6615         bytenr = bits[0].start;
6616         size = bits[0].size;
6617
6618         cache = lookup_cache_extent(pending, bytenr, size);
6619         if (cache) {
6620                 remove_cache_extent(pending, cache);
6621                 free(cache);
6622         }
6623         cache = lookup_cache_extent(reada, bytenr, size);
6624         if (cache) {
6625                 remove_cache_extent(reada, cache);
6626                 free(cache);
6627         }
6628         cache = lookup_cache_extent(nodes, bytenr, size);
6629         if (cache) {
6630                 remove_cache_extent(nodes, cache);
6631                 free(cache);
6632         }
6633         cache = lookup_cache_extent(extent_cache, bytenr, size);
6634         if (cache) {
6635                 rec = container_of(cache, struct extent_record, cache);
6636                 gen = rec->parent_generation;
6637         }
6638
6639         /* fixme, get the real parent transid */
6640         buf = read_tree_block(root, bytenr, size, gen);
6641         if (!extent_buffer_uptodate(buf)) {
6642                 record_bad_block_io(root->fs_info,
6643                                     extent_cache, bytenr, size);
6644                 goto out;
6645         }
6646
6647         nritems = btrfs_header_nritems(buf);
6648
6649         flags = 0;
6650         if (!init_extent_tree) {
6651                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6652                                        btrfs_header_level(buf), 1, NULL,
6653                                        &flags);
6654                 if (ret < 0) {
6655                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6656                         if (ret < 0) {
6657                                 fprintf(stderr, "Couldn't calc extent flags\n");
6658                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6659                         }
6660                 }
6661         } else {
6662                 flags = 0;
6663                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6664                 if (ret < 0) {
6665                         fprintf(stderr, "Couldn't calc extent flags\n");
6666                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6667                 }
6668         }
6669
6670         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6671                 if (ri != NULL &&
6672                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6673                     ri->objectid == btrfs_header_owner(buf)) {
6674                         /*
6675                          * Ok we got to this block from it's original owner and
6676                          * we have FULL_BACKREF set.  Relocation can leave
6677                          * converted blocks over so this is altogether possible,
6678                          * however it's not possible if the generation > the
6679                          * last snapshot, so check for this case.
6680                          */
6681                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6682                             btrfs_header_generation(buf) > ri->last_snapshot) {
6683                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6684                                 rec->bad_full_backref = 1;
6685                         }
6686                 }
6687         } else {
6688                 if (ri != NULL &&
6689                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6690                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6691                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6692                         rec->bad_full_backref = 1;
6693                 }
6694         }
6695
6696         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6697                 rec->flag_block_full_backref = 1;
6698                 parent = bytenr;
6699                 owner = 0;
6700         } else {
6701                 rec->flag_block_full_backref = 0;
6702                 parent = 0;
6703                 owner = btrfs_header_owner(buf);
6704         }
6705
6706         ret = check_block(root, extent_cache, buf, flags);
6707         if (ret)
6708                 goto out;
6709
6710         if (btrfs_is_leaf(buf)) {
6711                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6712                 for (i = 0; i < nritems; i++) {
6713                         struct btrfs_file_extent_item *fi;
6714                         btrfs_item_key_to_cpu(buf, &key, i);
6715                         /*
6716                          * Check key type against the leaf owner.
6717                          * Could filter quite a lot of early error if
6718                          * owner is correct
6719                          */
6720                         if (check_type_with_root(btrfs_header_owner(buf),
6721                                                  key.type)) {
6722                                 fprintf(stderr, "ignoring invalid key\n");
6723                                 continue;
6724                         }
6725                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6726                                 process_extent_item(root, extent_cache, buf,
6727                                                     i);
6728                                 continue;
6729                         }
6730                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6731                                 process_extent_item(root, extent_cache, buf,
6732                                                     i);
6733                                 continue;
6734                         }
6735                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6736                                 total_csum_bytes +=
6737                                         btrfs_item_size_nr(buf, i);
6738                                 continue;
6739                         }
6740                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6741                                 process_chunk_item(chunk_cache, &key, buf, i);
6742                                 continue;
6743                         }
6744                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6745                                 process_device_item(dev_cache, &key, buf, i);
6746                                 continue;
6747                         }
6748                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6749                                 process_block_group_item(block_group_cache,
6750                                         &key, buf, i);
6751                                 continue;
6752                         }
6753                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6754                                 process_device_extent_item(dev_extent_cache,
6755                                         &key, buf, i);
6756                                 continue;
6757
6758                         }
6759                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6760 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6761                                 process_extent_ref_v0(extent_cache, buf, i);
6762 #else
6763                                 BUG();
6764 #endif
6765                                 continue;
6766                         }
6767
6768                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6769                                 ret = add_tree_backref(extent_cache,
6770                                                 key.objectid, 0, key.offset, 0);
6771                                 if (ret < 0)
6772                                         error("add_tree_backref failed: %s",
6773                                               strerror(-ret));
6774                                 continue;
6775                         }
6776                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6777                                 ret = add_tree_backref(extent_cache,
6778                                                 key.objectid, key.offset, 0, 0);
6779                                 if (ret < 0)
6780                                         error("add_tree_backref failed: %s",
6781                                               strerror(-ret));
6782                                 continue;
6783                         }
6784                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6785                                 struct btrfs_extent_data_ref *ref;
6786                                 ref = btrfs_item_ptr(buf, i,
6787                                                 struct btrfs_extent_data_ref);
6788                                 add_data_backref(extent_cache,
6789                                         key.objectid, 0,
6790                                         btrfs_extent_data_ref_root(buf, ref),
6791                                         btrfs_extent_data_ref_objectid(buf,
6792                                                                        ref),
6793                                         btrfs_extent_data_ref_offset(buf, ref),
6794                                         btrfs_extent_data_ref_count(buf, ref),
6795                                         0, root->sectorsize);
6796                                 continue;
6797                         }
6798                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6799                                 struct btrfs_shared_data_ref *ref;
6800                                 ref = btrfs_item_ptr(buf, i,
6801                                                 struct btrfs_shared_data_ref);
6802                                 add_data_backref(extent_cache,
6803                                         key.objectid, key.offset, 0, 0, 0,
6804                                         btrfs_shared_data_ref_count(buf, ref),
6805                                         0, root->sectorsize);
6806                                 continue;
6807                         }
6808                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6809                                 struct bad_item *bad;
6810
6811                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6812                                         continue;
6813                                 if (!owner)
6814                                         continue;
6815                                 bad = malloc(sizeof(struct bad_item));
6816                                 if (!bad)
6817                                         continue;
6818                                 INIT_LIST_HEAD(&bad->list);
6819                                 memcpy(&bad->key, &key,
6820                                        sizeof(struct btrfs_key));
6821                                 bad->root_id = owner;
6822                                 list_add_tail(&bad->list, &delete_items);
6823                                 continue;
6824                         }
6825                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6826                                 continue;
6827                         fi = btrfs_item_ptr(buf, i,
6828                                             struct btrfs_file_extent_item);
6829                         if (btrfs_file_extent_type(buf, fi) ==
6830                             BTRFS_FILE_EXTENT_INLINE)
6831                                 continue;
6832                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6833                                 continue;
6834
6835                         data_bytes_allocated +=
6836                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6837                         if (data_bytes_allocated < root->sectorsize) {
6838                                 abort();
6839                         }
6840                         data_bytes_referenced +=
6841                                 btrfs_file_extent_num_bytes(buf, fi);
6842                         add_data_backref(extent_cache,
6843                                 btrfs_file_extent_disk_bytenr(buf, fi),
6844                                 parent, owner, key.objectid, key.offset -
6845                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6846                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6847                 }
6848         } else {
6849                 int level;
6850                 struct btrfs_key first_key;
6851
6852                 first_key.objectid = 0;
6853
6854                 if (nritems > 0)
6855                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6856                 level = btrfs_header_level(buf);
6857                 for (i = 0; i < nritems; i++) {
6858                         struct extent_record tmpl;
6859
6860                         ptr = btrfs_node_blockptr(buf, i);
6861                         size = root->nodesize;
6862                         btrfs_node_key_to_cpu(buf, &key, i);
6863                         if (ri != NULL) {
6864                                 if ((level == ri->drop_level)
6865                                     && is_dropped_key(&key, &ri->drop_key)) {
6866                                         continue;
6867                                 }
6868                         }
6869
6870                         memset(&tmpl, 0, sizeof(tmpl));
6871                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6872                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6873                         tmpl.start = ptr;
6874                         tmpl.nr = size;
6875                         tmpl.refs = 1;
6876                         tmpl.metadata = 1;
6877                         tmpl.max_size = size;
6878                         ret = add_extent_rec(extent_cache, &tmpl);
6879                         if (ret < 0)
6880                                 goto out;
6881
6882                         ret = add_tree_backref(extent_cache, ptr, parent,
6883                                         owner, 1);
6884                         if (ret < 0) {
6885                                 error("add_tree_backref failed: %s",
6886                                       strerror(-ret));
6887                                 continue;
6888                         }
6889
6890                         if (level > 1) {
6891                                 add_pending(nodes, seen, ptr, size);
6892                         } else {
6893                                 add_pending(pending, seen, ptr, size);
6894                         }
6895                 }
6896                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6897                                       nritems) * sizeof(struct btrfs_key_ptr);
6898         }
6899         total_btree_bytes += buf->len;
6900         if (fs_root_objectid(btrfs_header_owner(buf)))
6901                 total_fs_tree_bytes += buf->len;
6902         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6903                 total_extent_tree_bytes += buf->len;
6904         if (!found_old_backref &&
6905             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6906             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6907             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6908                 found_old_backref = 1;
6909 out:
6910         free_extent_buffer(buf);
6911         return ret;
6912 }
6913
6914 static int add_root_to_pending(struct extent_buffer *buf,
6915                                struct cache_tree *extent_cache,
6916                                struct cache_tree *pending,
6917                                struct cache_tree *seen,
6918                                struct cache_tree *nodes,
6919                                u64 objectid)
6920 {
6921         struct extent_record tmpl;
6922         int ret;
6923
6924         if (btrfs_header_level(buf) > 0)
6925                 add_pending(nodes, seen, buf->start, buf->len);
6926         else
6927                 add_pending(pending, seen, buf->start, buf->len);
6928
6929         memset(&tmpl, 0, sizeof(tmpl));
6930         tmpl.start = buf->start;
6931         tmpl.nr = buf->len;
6932         tmpl.is_root = 1;
6933         tmpl.refs = 1;
6934         tmpl.metadata = 1;
6935         tmpl.max_size = buf->len;
6936         add_extent_rec(extent_cache, &tmpl);
6937
6938         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6939             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6940                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6941                                 0, 1);
6942         else
6943                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6944                                 1);
6945         return ret;
6946 }
6947
6948 /* as we fix the tree, we might be deleting blocks that
6949  * we're tracking for repair.  This hook makes sure we
6950  * remove any backrefs for blocks as we are fixing them.
6951  */
6952 static int free_extent_hook(struct btrfs_trans_handle *trans,
6953                             struct btrfs_root *root,
6954                             u64 bytenr, u64 num_bytes, u64 parent,
6955                             u64 root_objectid, u64 owner, u64 offset,
6956                             int refs_to_drop)
6957 {
6958         struct extent_record *rec;
6959         struct cache_extent *cache;
6960         int is_data;
6961         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6962
6963         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6964         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6965         if (!cache)
6966                 return 0;
6967
6968         rec = container_of(cache, struct extent_record, cache);
6969         if (is_data) {
6970                 struct data_backref *back;
6971                 back = find_data_backref(rec, parent, root_objectid, owner,
6972                                          offset, 1, bytenr, num_bytes);
6973                 if (!back)
6974                         goto out;
6975                 if (back->node.found_ref) {
6976                         back->found_ref -= refs_to_drop;
6977                         if (rec->refs)
6978                                 rec->refs -= refs_to_drop;
6979                 }
6980                 if (back->node.found_extent_tree) {
6981                         back->num_refs -= refs_to_drop;
6982                         if (rec->extent_item_refs)
6983                                 rec->extent_item_refs -= refs_to_drop;
6984                 }
6985                 if (back->found_ref == 0)
6986                         back->node.found_ref = 0;
6987                 if (back->num_refs == 0)
6988                         back->node.found_extent_tree = 0;
6989
6990                 if (!back->node.found_extent_tree && back->node.found_ref) {
6991                         list_del(&back->node.list);
6992                         free(back);
6993                 }
6994         } else {
6995                 struct tree_backref *back;
6996                 back = find_tree_backref(rec, parent, root_objectid);
6997                 if (!back)
6998                         goto out;
6999                 if (back->node.found_ref) {
7000                         if (rec->refs)
7001                                 rec->refs--;
7002                         back->node.found_ref = 0;
7003                 }
7004                 if (back->node.found_extent_tree) {
7005                         if (rec->extent_item_refs)
7006                                 rec->extent_item_refs--;
7007                         back->node.found_extent_tree = 0;
7008                 }
7009                 if (!back->node.found_extent_tree && back->node.found_ref) {
7010                         list_del(&back->node.list);
7011                         free(back);
7012                 }
7013         }
7014         maybe_free_extent_rec(extent_cache, rec);
7015 out:
7016         return 0;
7017 }
7018
7019 static int delete_extent_records(struct btrfs_trans_handle *trans,
7020                                  struct btrfs_root *root,
7021                                  struct btrfs_path *path,
7022                                  u64 bytenr, u64 new_len)
7023 {
7024         struct btrfs_key key;
7025         struct btrfs_key found_key;
7026         struct extent_buffer *leaf;
7027         int ret;
7028         int slot;
7029
7030
7031         key.objectid = bytenr;
7032         key.type = (u8)-1;
7033         key.offset = (u64)-1;
7034
7035         while(1) {
7036                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7037                                         &key, path, 0, 1);
7038                 if (ret < 0)
7039                         break;
7040
7041                 if (ret > 0) {
7042                         ret = 0;
7043                         if (path->slots[0] == 0)
7044                                 break;
7045                         path->slots[0]--;
7046                 }
7047                 ret = 0;
7048
7049                 leaf = path->nodes[0];
7050                 slot = path->slots[0];
7051
7052                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7053                 if (found_key.objectid != bytenr)
7054                         break;
7055
7056                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7057                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7058                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7059                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7060                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7061                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7062                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7063                         btrfs_release_path(path);
7064                         if (found_key.type == 0) {
7065                                 if (found_key.offset == 0)
7066                                         break;
7067                                 key.offset = found_key.offset - 1;
7068                                 key.type = found_key.type;
7069                         }
7070                         key.type = found_key.type - 1;
7071                         key.offset = (u64)-1;
7072                         continue;
7073                 }
7074
7075                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7076                         found_key.objectid, found_key.type, found_key.offset);
7077
7078                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7079                 if (ret)
7080                         break;
7081                 btrfs_release_path(path);
7082
7083                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7084                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7085                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7086                                 found_key.offset : root->nodesize;
7087
7088                         ret = btrfs_update_block_group(trans, root, bytenr,
7089                                                        bytes, 0, 0);
7090                         if (ret)
7091                                 break;
7092                 }
7093         }
7094
7095         btrfs_release_path(path);
7096         return ret;
7097 }
7098
7099 /*
7100  * for a single backref, this will allocate a new extent
7101  * and add the backref to it.
7102  */
7103 static int record_extent(struct btrfs_trans_handle *trans,
7104                          struct btrfs_fs_info *info,
7105                          struct btrfs_path *path,
7106                          struct extent_record *rec,
7107                          struct extent_backref *back,
7108                          int allocated, u64 flags)
7109 {
7110         int ret;
7111         struct btrfs_root *extent_root = info->extent_root;
7112         struct extent_buffer *leaf;
7113         struct btrfs_key ins_key;
7114         struct btrfs_extent_item *ei;
7115         struct data_backref *dback;
7116         struct btrfs_tree_block_info *bi;
7117
7118         if (!back->is_data)
7119                 rec->max_size = max_t(u64, rec->max_size,
7120                                     info->extent_root->nodesize);
7121
7122         if (!allocated) {
7123                 u32 item_size = sizeof(*ei);
7124
7125                 if (!back->is_data)
7126                         item_size += sizeof(*bi);
7127
7128                 ins_key.objectid = rec->start;
7129                 ins_key.offset = rec->max_size;
7130                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7131
7132                 ret = btrfs_insert_empty_item(trans, extent_root, path,
7133                                         &ins_key, item_size);
7134                 if (ret)
7135                         goto fail;
7136
7137                 leaf = path->nodes[0];
7138                 ei = btrfs_item_ptr(leaf, path->slots[0],
7139                                     struct btrfs_extent_item);
7140
7141                 btrfs_set_extent_refs(leaf, ei, 0);
7142                 btrfs_set_extent_generation(leaf, ei, rec->generation);
7143
7144                 if (back->is_data) {
7145                         btrfs_set_extent_flags(leaf, ei,
7146                                                BTRFS_EXTENT_FLAG_DATA);
7147                 } else {
7148                         struct btrfs_disk_key copy_key;;
7149
7150                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7151                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7152                                              sizeof(*bi));
7153
7154                         btrfs_set_disk_key_objectid(&copy_key,
7155                                                     rec->info_objectid);
7156                         btrfs_set_disk_key_type(&copy_key, 0);
7157                         btrfs_set_disk_key_offset(&copy_key, 0);
7158
7159                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7160                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7161
7162                         btrfs_set_extent_flags(leaf, ei,
7163                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7164                 }
7165
7166                 btrfs_mark_buffer_dirty(leaf);
7167                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7168                                                rec->max_size, 1, 0);
7169                 if (ret)
7170                         goto fail;
7171                 btrfs_release_path(path);
7172         }
7173
7174         if (back->is_data) {
7175                 u64 parent;
7176                 int i;
7177
7178                 dback = to_data_backref(back);
7179                 if (back->full_backref)
7180                         parent = dback->parent;
7181                 else
7182                         parent = 0;
7183
7184                 for (i = 0; i < dback->found_ref; i++) {
7185                         /* if parent != 0, we're doing a full backref
7186                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7187                          * just makes the backref allocator create a data
7188                          * backref
7189                          */
7190                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7191                                                    rec->start, rec->max_size,
7192                                                    parent,
7193                                                    dback->root,
7194                                                    parent ?
7195                                                    BTRFS_FIRST_FREE_OBJECTID :
7196                                                    dback->owner,
7197                                                    dback->offset);
7198                         if (ret)
7199                                 break;
7200                 }
7201                 fprintf(stderr, "adding new data backref"
7202                                 " on %llu %s %llu owner %llu"
7203                                 " offset %llu found %d\n",
7204                                 (unsigned long long)rec->start,
7205                                 back->full_backref ?
7206                                 "parent" : "root",
7207                                 back->full_backref ?
7208                                 (unsigned long long)parent :
7209                                 (unsigned long long)dback->root,
7210                                 (unsigned long long)dback->owner,
7211                                 (unsigned long long)dback->offset,
7212                                 dback->found_ref);
7213         } else {
7214                 u64 parent;
7215                 struct tree_backref *tback;
7216
7217                 tback = to_tree_backref(back);
7218                 if (back->full_backref)
7219                         parent = tback->parent;
7220                 else
7221                         parent = 0;
7222
7223                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7224                                            rec->start, rec->max_size,
7225                                            parent, tback->root, 0, 0);
7226                 fprintf(stderr, "adding new tree backref on "
7227                         "start %llu len %llu parent %llu root %llu\n",
7228                         rec->start, rec->max_size, parent, tback->root);
7229         }
7230 fail:
7231         btrfs_release_path(path);
7232         return ret;
7233 }
7234
7235 static struct extent_entry *find_entry(struct list_head *entries,
7236                                        u64 bytenr, u64 bytes)
7237 {
7238         struct extent_entry *entry = NULL;
7239
7240         list_for_each_entry(entry, entries, list) {
7241                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7242                         return entry;
7243         }
7244
7245         return NULL;
7246 }
7247
7248 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7249 {
7250         struct extent_entry *entry, *best = NULL, *prev = NULL;
7251
7252         list_for_each_entry(entry, entries, list) {
7253                 /*
7254                  * If there are as many broken entries as entries then we know
7255                  * not to trust this particular entry.
7256                  */
7257                 if (entry->broken == entry->count)
7258                         continue;
7259
7260                 /*
7261                  * Special case, when there are only two entries and 'best' is
7262                  * the first one
7263                  */
7264                 if (!prev) {
7265                         best = entry;
7266                         prev = entry;
7267                         continue;
7268                 }
7269
7270                 /*
7271                  * If our current entry == best then we can't be sure our best
7272                  * is really the best, so we need to keep searching.
7273                  */
7274                 if (best && best->count == entry->count) {
7275                         prev = entry;
7276                         best = NULL;
7277                         continue;
7278                 }
7279
7280                 /* Prev == entry, not good enough, have to keep searching */
7281                 if (!prev->broken && prev->count == entry->count)
7282                         continue;
7283
7284                 if (!best)
7285                         best = (prev->count > entry->count) ? prev : entry;
7286                 else if (best->count < entry->count)
7287                         best = entry;
7288                 prev = entry;
7289         }
7290
7291         return best;
7292 }
7293
7294 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7295                       struct data_backref *dback, struct extent_entry *entry)
7296 {
7297         struct btrfs_trans_handle *trans;
7298         struct btrfs_root *root;
7299         struct btrfs_file_extent_item *fi;
7300         struct extent_buffer *leaf;
7301         struct btrfs_key key;
7302         u64 bytenr, bytes;
7303         int ret, err;
7304
7305         key.objectid = dback->root;
7306         key.type = BTRFS_ROOT_ITEM_KEY;
7307         key.offset = (u64)-1;
7308         root = btrfs_read_fs_root(info, &key);
7309         if (IS_ERR(root)) {
7310                 fprintf(stderr, "Couldn't find root for our ref\n");
7311                 return -EINVAL;
7312         }
7313
7314         /*
7315          * The backref points to the original offset of the extent if it was
7316          * split, so we need to search down to the offset we have and then walk
7317          * forward until we find the backref we're looking for.
7318          */
7319         key.objectid = dback->owner;
7320         key.type = BTRFS_EXTENT_DATA_KEY;
7321         key.offset = dback->offset;
7322         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7323         if (ret < 0) {
7324                 fprintf(stderr, "Error looking up ref %d\n", ret);
7325                 return ret;
7326         }
7327
7328         while (1) {
7329                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7330                         ret = btrfs_next_leaf(root, path);
7331                         if (ret) {
7332                                 fprintf(stderr, "Couldn't find our ref, next\n");
7333                                 return -EINVAL;
7334                         }
7335                 }
7336                 leaf = path->nodes[0];
7337                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7338                 if (key.objectid != dback->owner ||
7339                     key.type != BTRFS_EXTENT_DATA_KEY) {
7340                         fprintf(stderr, "Couldn't find our ref, search\n");
7341                         return -EINVAL;
7342                 }
7343                 fi = btrfs_item_ptr(leaf, path->slots[0],
7344                                     struct btrfs_file_extent_item);
7345                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7346                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7347
7348                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7349                         break;
7350                 path->slots[0]++;
7351         }
7352
7353         btrfs_release_path(path);
7354
7355         trans = btrfs_start_transaction(root, 1);
7356         if (IS_ERR(trans))
7357                 return PTR_ERR(trans);
7358
7359         /*
7360          * Ok we have the key of the file extent we want to fix, now we can cow
7361          * down to the thing and fix it.
7362          */
7363         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7364         if (ret < 0) {
7365                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7366                         key.objectid, key.type, key.offset, ret);
7367                 goto out;
7368         }
7369         if (ret > 0) {
7370                 fprintf(stderr, "Well that's odd, we just found this key "
7371                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7372                         key.offset);
7373                 ret = -EINVAL;
7374                 goto out;
7375         }
7376         leaf = path->nodes[0];
7377         fi = btrfs_item_ptr(leaf, path->slots[0],
7378                             struct btrfs_file_extent_item);
7379
7380         if (btrfs_file_extent_compression(leaf, fi) &&
7381             dback->disk_bytenr != entry->bytenr) {
7382                 fprintf(stderr, "Ref doesn't match the record start and is "
7383                         "compressed, please take a btrfs-image of this file "
7384                         "system and send it to a btrfs developer so they can "
7385                         "complete this functionality for bytenr %Lu\n",
7386                         dback->disk_bytenr);
7387                 ret = -EINVAL;
7388                 goto out;
7389         }
7390
7391         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7392                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7393         } else if (dback->disk_bytenr > entry->bytenr) {
7394                 u64 off_diff, offset;
7395
7396                 off_diff = dback->disk_bytenr - entry->bytenr;
7397                 offset = btrfs_file_extent_offset(leaf, fi);
7398                 if (dback->disk_bytenr + offset +
7399                     btrfs_file_extent_num_bytes(leaf, fi) >
7400                     entry->bytenr + entry->bytes) {
7401                         fprintf(stderr, "Ref is past the entry end, please "
7402                                 "take a btrfs-image of this file system and "
7403                                 "send it to a btrfs developer, ref %Lu\n",
7404                                 dback->disk_bytenr);
7405                         ret = -EINVAL;
7406                         goto out;
7407                 }
7408                 offset += off_diff;
7409                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7410                 btrfs_set_file_extent_offset(leaf, fi, offset);
7411         } else if (dback->disk_bytenr < entry->bytenr) {
7412                 u64 offset;
7413
7414                 offset = btrfs_file_extent_offset(leaf, fi);
7415                 if (dback->disk_bytenr + offset < entry->bytenr) {
7416                         fprintf(stderr, "Ref is before the entry start, please"
7417                                 " take a btrfs-image of this file system and "
7418                                 "send it to a btrfs developer, ref %Lu\n",
7419                                 dback->disk_bytenr);
7420                         ret = -EINVAL;
7421                         goto out;
7422                 }
7423
7424                 offset += dback->disk_bytenr;
7425                 offset -= entry->bytenr;
7426                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7427                 btrfs_set_file_extent_offset(leaf, fi, offset);
7428         }
7429
7430         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7431
7432         /*
7433          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7434          * only do this if we aren't using compression, otherwise it's a
7435          * trickier case.
7436          */
7437         if (!btrfs_file_extent_compression(leaf, fi))
7438                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7439         else
7440                 printf("ram bytes may be wrong?\n");
7441         btrfs_mark_buffer_dirty(leaf);
7442 out:
7443         err = btrfs_commit_transaction(trans, root);
7444         btrfs_release_path(path);
7445         return ret ? ret : err;
7446 }
7447
7448 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7449                            struct extent_record *rec)
7450 {
7451         struct extent_backref *back;
7452         struct data_backref *dback;
7453         struct extent_entry *entry, *best = NULL;
7454         LIST_HEAD(entries);
7455         int nr_entries = 0;
7456         int broken_entries = 0;
7457         int ret = 0;
7458         short mismatch = 0;
7459
7460         /*
7461          * Metadata is easy and the backrefs should always agree on bytenr and
7462          * size, if not we've got bigger issues.
7463          */
7464         if (rec->metadata)
7465                 return 0;
7466
7467         list_for_each_entry(back, &rec->backrefs, list) {
7468                 if (back->full_backref || !back->is_data)
7469                         continue;
7470
7471                 dback = to_data_backref(back);
7472
7473                 /*
7474                  * We only pay attention to backrefs that we found a real
7475                  * backref for.
7476                  */
7477                 if (dback->found_ref == 0)
7478                         continue;
7479
7480                 /*
7481                  * For now we only catch when the bytes don't match, not the
7482                  * bytenr.  We can easily do this at the same time, but I want
7483                  * to have a fs image to test on before we just add repair
7484                  * functionality willy-nilly so we know we won't screw up the
7485                  * repair.
7486                  */
7487
7488                 entry = find_entry(&entries, dback->disk_bytenr,
7489                                    dback->bytes);
7490                 if (!entry) {
7491                         entry = malloc(sizeof(struct extent_entry));
7492                         if (!entry) {
7493                                 ret = -ENOMEM;
7494                                 goto out;
7495                         }
7496                         memset(entry, 0, sizeof(*entry));
7497                         entry->bytenr = dback->disk_bytenr;
7498                         entry->bytes = dback->bytes;
7499                         list_add_tail(&entry->list, &entries);
7500                         nr_entries++;
7501                 }
7502
7503                 /*
7504                  * If we only have on entry we may think the entries agree when
7505                  * in reality they don't so we have to do some extra checking.
7506                  */
7507                 if (dback->disk_bytenr != rec->start ||
7508                     dback->bytes != rec->nr || back->broken)
7509                         mismatch = 1;
7510
7511                 if (back->broken) {
7512                         entry->broken++;
7513                         broken_entries++;
7514                 }
7515
7516                 entry->count++;
7517         }
7518
7519         /* Yay all the backrefs agree, carry on good sir */
7520         if (nr_entries <= 1 && !mismatch)
7521                 goto out;
7522
7523         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7524                 "%Lu\n", rec->start);
7525
7526         /*
7527          * First we want to see if the backrefs can agree amongst themselves who
7528          * is right, so figure out which one of the entries has the highest
7529          * count.
7530          */
7531         best = find_most_right_entry(&entries);
7532
7533         /*
7534          * Ok so we may have an even split between what the backrefs think, so
7535          * this is where we use the extent ref to see what it thinks.
7536          */
7537         if (!best) {
7538                 entry = find_entry(&entries, rec->start, rec->nr);
7539                 if (!entry && (!broken_entries || !rec->found_rec)) {
7540                         fprintf(stderr, "Backrefs don't agree with each other "
7541                                 "and extent record doesn't agree with anybody,"
7542                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7543                                 rec->start, rec->nr);
7544                         ret = -EINVAL;
7545                         goto out;
7546                 } else if (!entry) {
7547                         /*
7548                          * Ok our backrefs were broken, we'll assume this is the
7549                          * correct value and add an entry for this range.
7550                          */
7551                         entry = malloc(sizeof(struct extent_entry));
7552                         if (!entry) {
7553                                 ret = -ENOMEM;
7554                                 goto out;
7555                         }
7556                         memset(entry, 0, sizeof(*entry));
7557                         entry->bytenr = rec->start;
7558                         entry->bytes = rec->nr;
7559                         list_add_tail(&entry->list, &entries);
7560                         nr_entries++;
7561                 }
7562                 entry->count++;
7563                 best = find_most_right_entry(&entries);
7564                 if (!best) {
7565                         fprintf(stderr, "Backrefs and extent record evenly "
7566                                 "split on who is right, this is going to "
7567                                 "require user input to fix bytenr %Lu bytes "
7568                                 "%Lu\n", rec->start, rec->nr);
7569                         ret = -EINVAL;
7570                         goto out;
7571                 }
7572         }
7573
7574         /*
7575          * I don't think this can happen currently as we'll abort() if we catch
7576          * this case higher up, but in case somebody removes that we still can't
7577          * deal with it properly here yet, so just bail out of that's the case.
7578          */
7579         if (best->bytenr != rec->start) {
7580                 fprintf(stderr, "Extent start and backref starts don't match, "
7581                         "please use btrfs-image on this file system and send "
7582                         "it to a btrfs developer so they can make fsck fix "
7583                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7584                         rec->start, rec->nr);
7585                 ret = -EINVAL;
7586                 goto out;
7587         }
7588
7589         /*
7590          * Ok great we all agreed on an extent record, let's go find the real
7591          * references and fix up the ones that don't match.
7592          */
7593         list_for_each_entry(back, &rec->backrefs, list) {
7594                 if (back->full_backref || !back->is_data)
7595                         continue;
7596
7597                 dback = to_data_backref(back);
7598
7599                 /*
7600                  * Still ignoring backrefs that don't have a real ref attached
7601                  * to them.
7602                  */
7603                 if (dback->found_ref == 0)
7604                         continue;
7605
7606                 if (dback->bytes == best->bytes &&
7607                     dback->disk_bytenr == best->bytenr)
7608                         continue;
7609
7610                 ret = repair_ref(info, path, dback, best);
7611                 if (ret)
7612                         goto out;
7613         }
7614
7615         /*
7616          * Ok we messed with the actual refs, which means we need to drop our
7617          * entire cache and go back and rescan.  I know this is a huge pain and
7618          * adds a lot of extra work, but it's the only way to be safe.  Once all
7619          * the backrefs agree we may not need to do anything to the extent
7620          * record itself.
7621          */
7622         ret = -EAGAIN;
7623 out:
7624         while (!list_empty(&entries)) {
7625                 entry = list_entry(entries.next, struct extent_entry, list);
7626                 list_del_init(&entry->list);
7627                 free(entry);
7628         }
7629         return ret;
7630 }
7631
7632 static int process_duplicates(struct btrfs_root *root,
7633                               struct cache_tree *extent_cache,
7634                               struct extent_record *rec)
7635 {
7636         struct extent_record *good, *tmp;
7637         struct cache_extent *cache;
7638         int ret;
7639
7640         /*
7641          * If we found a extent record for this extent then return, or if we
7642          * have more than one duplicate we are likely going to need to delete
7643          * something.
7644          */
7645         if (rec->found_rec || rec->num_duplicates > 1)
7646                 return 0;
7647
7648         /* Shouldn't happen but just in case */
7649         BUG_ON(!rec->num_duplicates);
7650
7651         /*
7652          * So this happens if we end up with a backref that doesn't match the
7653          * actual extent entry.  So either the backref is bad or the extent
7654          * entry is bad.  Either way we want to have the extent_record actually
7655          * reflect what we found in the extent_tree, so we need to take the
7656          * duplicate out and use that as the extent_record since the only way we
7657          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7658          */
7659         remove_cache_extent(extent_cache, &rec->cache);
7660
7661         good = to_extent_record(rec->dups.next);
7662         list_del_init(&good->list);
7663         INIT_LIST_HEAD(&good->backrefs);
7664         INIT_LIST_HEAD(&good->dups);
7665         good->cache.start = good->start;
7666         good->cache.size = good->nr;
7667         good->content_checked = 0;
7668         good->owner_ref_checked = 0;
7669         good->num_duplicates = 0;
7670         good->refs = rec->refs;
7671         list_splice_init(&rec->backrefs, &good->backrefs);
7672         while (1) {
7673                 cache = lookup_cache_extent(extent_cache, good->start,
7674                                             good->nr);
7675                 if (!cache)
7676                         break;
7677                 tmp = container_of(cache, struct extent_record, cache);
7678
7679                 /*
7680                  * If we find another overlapping extent and it's found_rec is
7681                  * set then it's a duplicate and we need to try and delete
7682                  * something.
7683                  */
7684                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7685                         if (list_empty(&good->list))
7686                                 list_add_tail(&good->list,
7687                                               &duplicate_extents);
7688                         good->num_duplicates += tmp->num_duplicates + 1;
7689                         list_splice_init(&tmp->dups, &good->dups);
7690                         list_del_init(&tmp->list);
7691                         list_add_tail(&tmp->list, &good->dups);
7692                         remove_cache_extent(extent_cache, &tmp->cache);
7693                         continue;
7694                 }
7695
7696                 /*
7697                  * Ok we have another non extent item backed extent rec, so lets
7698                  * just add it to this extent and carry on like we did above.
7699                  */
7700                 good->refs += tmp->refs;
7701                 list_splice_init(&tmp->backrefs, &good->backrefs);
7702                 remove_cache_extent(extent_cache, &tmp->cache);
7703                 free(tmp);
7704         }
7705         ret = insert_cache_extent(extent_cache, &good->cache);
7706         BUG_ON(ret);
7707         free(rec);
7708         return good->num_duplicates ? 0 : 1;
7709 }
7710
7711 static int delete_duplicate_records(struct btrfs_root *root,
7712                                     struct extent_record *rec)
7713 {
7714         struct btrfs_trans_handle *trans;
7715         LIST_HEAD(delete_list);
7716         struct btrfs_path path;
7717         struct extent_record *tmp, *good, *n;
7718         int nr_del = 0;
7719         int ret = 0, err;
7720         struct btrfs_key key;
7721
7722         btrfs_init_path(&path);
7723
7724         good = rec;
7725         /* Find the record that covers all of the duplicates. */
7726         list_for_each_entry(tmp, &rec->dups, list) {
7727                 if (good->start < tmp->start)
7728                         continue;
7729                 if (good->nr > tmp->nr)
7730                         continue;
7731
7732                 if (tmp->start + tmp->nr < good->start + good->nr) {
7733                         fprintf(stderr, "Ok we have overlapping extents that "
7734                                 "aren't completely covered by each other, this "
7735                                 "is going to require more careful thought.  "
7736                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7737                                 tmp->start, tmp->nr, good->start, good->nr);
7738                         abort();
7739                 }
7740                 good = tmp;
7741         }
7742
7743         if (good != rec)
7744                 list_add_tail(&rec->list, &delete_list);
7745
7746         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7747                 if (tmp == good)
7748                         continue;
7749                 list_move_tail(&tmp->list, &delete_list);
7750         }
7751
7752         root = root->fs_info->extent_root;
7753         trans = btrfs_start_transaction(root, 1);
7754         if (IS_ERR(trans)) {
7755                 ret = PTR_ERR(trans);
7756                 goto out;
7757         }
7758
7759         list_for_each_entry(tmp, &delete_list, list) {
7760                 if (tmp->found_rec == 0)
7761                         continue;
7762                 key.objectid = tmp->start;
7763                 key.type = BTRFS_EXTENT_ITEM_KEY;
7764                 key.offset = tmp->nr;
7765
7766                 /* Shouldn't happen but just in case */
7767                 if (tmp->metadata) {
7768                         fprintf(stderr, "Well this shouldn't happen, extent "
7769                                 "record overlaps but is metadata? "
7770                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7771                         abort();
7772                 }
7773
7774                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7775                 if (ret) {
7776                         if (ret > 0)
7777                                 ret = -EINVAL;
7778                         break;
7779                 }
7780                 ret = btrfs_del_item(trans, root, &path);
7781                 if (ret)
7782                         break;
7783                 btrfs_release_path(&path);
7784                 nr_del++;
7785         }
7786         err = btrfs_commit_transaction(trans, root);
7787         if (err && !ret)
7788                 ret = err;
7789 out:
7790         while (!list_empty(&delete_list)) {
7791                 tmp = to_extent_record(delete_list.next);
7792                 list_del_init(&tmp->list);
7793                 if (tmp == rec)
7794                         continue;
7795                 free(tmp);
7796         }
7797
7798         while (!list_empty(&rec->dups)) {
7799                 tmp = to_extent_record(rec->dups.next);
7800                 list_del_init(&tmp->list);
7801                 free(tmp);
7802         }
7803
7804         btrfs_release_path(&path);
7805
7806         if (!ret && !nr_del)
7807                 rec->num_duplicates = 0;
7808
7809         return ret ? ret : nr_del;
7810 }
7811
7812 static int find_possible_backrefs(struct btrfs_fs_info *info,
7813                                   struct btrfs_path *path,
7814                                   struct cache_tree *extent_cache,
7815                                   struct extent_record *rec)
7816 {
7817         struct btrfs_root *root;
7818         struct extent_backref *back;
7819         struct data_backref *dback;
7820         struct cache_extent *cache;
7821         struct btrfs_file_extent_item *fi;
7822         struct btrfs_key key;
7823         u64 bytenr, bytes;
7824         int ret;
7825
7826         list_for_each_entry(back, &rec->backrefs, list) {
7827                 /* Don't care about full backrefs (poor unloved backrefs) */
7828                 if (back->full_backref || !back->is_data)
7829                         continue;
7830
7831                 dback = to_data_backref(back);
7832
7833                 /* We found this one, we don't need to do a lookup */
7834                 if (dback->found_ref)
7835                         continue;
7836
7837                 key.objectid = dback->root;
7838                 key.type = BTRFS_ROOT_ITEM_KEY;
7839                 key.offset = (u64)-1;
7840
7841                 root = btrfs_read_fs_root(info, &key);
7842
7843                 /* No root, definitely a bad ref, skip */
7844                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7845                         continue;
7846                 /* Other err, exit */
7847                 if (IS_ERR(root))
7848                         return PTR_ERR(root);
7849
7850                 key.objectid = dback->owner;
7851                 key.type = BTRFS_EXTENT_DATA_KEY;
7852                 key.offset = dback->offset;
7853                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7854                 if (ret) {
7855                         btrfs_release_path(path);
7856                         if (ret < 0)
7857                                 return ret;
7858                         /* Didn't find it, we can carry on */
7859                         ret = 0;
7860                         continue;
7861                 }
7862
7863                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7864                                     struct btrfs_file_extent_item);
7865                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7866                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7867                 btrfs_release_path(path);
7868                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7869                 if (cache) {
7870                         struct extent_record *tmp;
7871                         tmp = container_of(cache, struct extent_record, cache);
7872
7873                         /*
7874                          * If we found an extent record for the bytenr for this
7875                          * particular backref then we can't add it to our
7876                          * current extent record.  We only want to add backrefs
7877                          * that don't have a corresponding extent item in the
7878                          * extent tree since they likely belong to this record
7879                          * and we need to fix it if it doesn't match bytenrs.
7880                          */
7881                         if  (tmp->found_rec)
7882                                 continue;
7883                 }
7884
7885                 dback->found_ref += 1;
7886                 dback->disk_bytenr = bytenr;
7887                 dback->bytes = bytes;
7888
7889                 /*
7890                  * Set this so the verify backref code knows not to trust the
7891                  * values in this backref.
7892                  */
7893                 back->broken = 1;
7894         }
7895
7896         return 0;
7897 }
7898
7899 /*
7900  * Record orphan data ref into corresponding root.
7901  *
7902  * Return 0 if the extent item contains data ref and recorded.
7903  * Return 1 if the extent item contains no useful data ref
7904  *   On that case, it may contains only shared_dataref or metadata backref
7905  *   or the file extent exists(this should be handled by the extent bytenr
7906  *   recovery routine)
7907  * Return <0 if something goes wrong.
7908  */
7909 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7910                                       struct extent_record *rec)
7911 {
7912         struct btrfs_key key;
7913         struct btrfs_root *dest_root;
7914         struct extent_backref *back;
7915         struct data_backref *dback;
7916         struct orphan_data_extent *orphan;
7917         struct btrfs_path path;
7918         int recorded_data_ref = 0;
7919         int ret = 0;
7920
7921         if (rec->metadata)
7922                 return 1;
7923         btrfs_init_path(&path);
7924         list_for_each_entry(back, &rec->backrefs, list) {
7925                 if (back->full_backref || !back->is_data ||
7926                     !back->found_extent_tree)
7927                         continue;
7928                 dback = to_data_backref(back);
7929                 if (dback->found_ref)
7930                         continue;
7931                 key.objectid = dback->root;
7932                 key.type = BTRFS_ROOT_ITEM_KEY;
7933                 key.offset = (u64)-1;
7934
7935                 dest_root = btrfs_read_fs_root(fs_info, &key);
7936
7937                 /* For non-exist root we just skip it */
7938                 if (IS_ERR(dest_root) || !dest_root)
7939                         continue;
7940
7941                 key.objectid = dback->owner;
7942                 key.type = BTRFS_EXTENT_DATA_KEY;
7943                 key.offset = dback->offset;
7944
7945                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7946                 btrfs_release_path(&path);
7947                 /*
7948                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7949                  * we need to record it for inode/file extent rebuild.
7950                  * For ret > 0, we record it only for file extent rebuild.
7951                  * For ret == 0, the file extent exists but only bytenr
7952                  * mismatch, let the original bytenr fix routine to handle,
7953                  * don't record it.
7954                  */
7955                 if (ret == 0)
7956                         continue;
7957                 ret = 0;
7958                 orphan = malloc(sizeof(*orphan));
7959                 if (!orphan) {
7960                         ret = -ENOMEM;
7961                         goto out;
7962                 }
7963                 INIT_LIST_HEAD(&orphan->list);
7964                 orphan->root = dback->root;
7965                 orphan->objectid = dback->owner;
7966                 orphan->offset = dback->offset;
7967                 orphan->disk_bytenr = rec->cache.start;
7968                 orphan->disk_len = rec->cache.size;
7969                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7970                 recorded_data_ref = 1;
7971         }
7972 out:
7973         btrfs_release_path(&path);
7974         if (!ret)
7975                 return !recorded_data_ref;
7976         else
7977                 return ret;
7978 }
7979
7980 /*
7981  * when an incorrect extent item is found, this will delete
7982  * all of the existing entries for it and recreate them
7983  * based on what the tree scan found.
7984  */
7985 static int fixup_extent_refs(struct btrfs_fs_info *info,
7986                              struct cache_tree *extent_cache,
7987                              struct extent_record *rec)
7988 {
7989         struct btrfs_trans_handle *trans = NULL;
7990         int ret;
7991         struct btrfs_path path;
7992         struct list_head *cur = rec->backrefs.next;
7993         struct cache_extent *cache;
7994         struct extent_backref *back;
7995         int allocated = 0;
7996         u64 flags = 0;
7997
7998         if (rec->flag_block_full_backref)
7999                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8000
8001         btrfs_init_path(&path);
8002         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8003                 /*
8004                  * Sometimes the backrefs themselves are so broken they don't
8005                  * get attached to any meaningful rec, so first go back and
8006                  * check any of our backrefs that we couldn't find and throw
8007                  * them into the list if we find the backref so that
8008                  * verify_backrefs can figure out what to do.
8009                  */
8010                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8011                 if (ret < 0)
8012                         goto out;
8013         }
8014
8015         /* step one, make sure all of the backrefs agree */
8016         ret = verify_backrefs(info, &path, rec);
8017         if (ret < 0)
8018                 goto out;
8019
8020         trans = btrfs_start_transaction(info->extent_root, 1);
8021         if (IS_ERR(trans)) {
8022                 ret = PTR_ERR(trans);
8023                 goto out;
8024         }
8025
8026         /* step two, delete all the existing records */
8027         ret = delete_extent_records(trans, info->extent_root, &path,
8028                                     rec->start, rec->max_size);
8029
8030         if (ret < 0)
8031                 goto out;
8032
8033         /* was this block corrupt?  If so, don't add references to it */
8034         cache = lookup_cache_extent(info->corrupt_blocks,
8035                                     rec->start, rec->max_size);
8036         if (cache) {
8037                 ret = 0;
8038                 goto out;
8039         }
8040
8041         /* step three, recreate all the refs we did find */
8042         while(cur != &rec->backrefs) {
8043                 back = to_extent_backref(cur);
8044                 cur = cur->next;
8045
8046                 /*
8047                  * if we didn't find any references, don't create a
8048                  * new extent record
8049                  */
8050                 if (!back->found_ref)
8051                         continue;
8052
8053                 rec->bad_full_backref = 0;
8054                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8055                 allocated = 1;
8056
8057                 if (ret)
8058                         goto out;
8059         }
8060 out:
8061         if (trans) {
8062                 int err = btrfs_commit_transaction(trans, info->extent_root);
8063                 if (!ret)
8064                         ret = err;
8065         }
8066
8067         btrfs_release_path(&path);
8068         return ret;
8069 }
8070
8071 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8072                               struct extent_record *rec)
8073 {
8074         struct btrfs_trans_handle *trans;
8075         struct btrfs_root *root = fs_info->extent_root;
8076         struct btrfs_path path;
8077         struct btrfs_extent_item *ei;
8078         struct btrfs_key key;
8079         u64 flags;
8080         int ret = 0;
8081
8082         key.objectid = rec->start;
8083         if (rec->metadata) {
8084                 key.type = BTRFS_METADATA_ITEM_KEY;
8085                 key.offset = rec->info_level;
8086         } else {
8087                 key.type = BTRFS_EXTENT_ITEM_KEY;
8088                 key.offset = rec->max_size;
8089         }
8090
8091         trans = btrfs_start_transaction(root, 0);
8092         if (IS_ERR(trans))
8093                 return PTR_ERR(trans);
8094
8095         btrfs_init_path(&path);
8096         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8097         if (ret < 0) {
8098                 btrfs_release_path(&path);
8099                 btrfs_commit_transaction(trans, root);
8100                 return ret;
8101         } else if (ret) {
8102                 fprintf(stderr, "Didn't find extent for %llu\n",
8103                         (unsigned long long)rec->start);
8104                 btrfs_release_path(&path);
8105                 btrfs_commit_transaction(trans, root);
8106                 return -ENOENT;
8107         }
8108
8109         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8110                             struct btrfs_extent_item);
8111         flags = btrfs_extent_flags(path.nodes[0], ei);
8112         if (rec->flag_block_full_backref) {
8113                 fprintf(stderr, "setting full backref on %llu\n",
8114                         (unsigned long long)key.objectid);
8115                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8116         } else {
8117                 fprintf(stderr, "clearing full backref on %llu\n",
8118                         (unsigned long long)key.objectid);
8119                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8120         }
8121         btrfs_set_extent_flags(path.nodes[0], ei, flags);
8122         btrfs_mark_buffer_dirty(path.nodes[0]);
8123         btrfs_release_path(&path);
8124         return btrfs_commit_transaction(trans, root);
8125 }
8126
8127 /* right now we only prune from the extent allocation tree */
8128 static int prune_one_block(struct btrfs_trans_handle *trans,
8129                            struct btrfs_fs_info *info,
8130                            struct btrfs_corrupt_block *corrupt)
8131 {
8132         int ret;
8133         struct btrfs_path path;
8134         struct extent_buffer *eb;
8135         u64 found;
8136         int slot;
8137         int nritems;
8138         int level = corrupt->level + 1;
8139
8140         btrfs_init_path(&path);
8141 again:
8142         /* we want to stop at the parent to our busted block */
8143         path.lowest_level = level;
8144
8145         ret = btrfs_search_slot(trans, info->extent_root,
8146                                 &corrupt->key, &path, -1, 1);
8147
8148         if (ret < 0)
8149                 goto out;
8150
8151         eb = path.nodes[level];
8152         if (!eb) {
8153                 ret = -ENOENT;
8154                 goto out;
8155         }
8156
8157         /*
8158          * hopefully the search gave us the block we want to prune,
8159          * lets try that first
8160          */
8161         slot = path.slots[level];
8162         found =  btrfs_node_blockptr(eb, slot);
8163         if (found == corrupt->cache.start)
8164                 goto del_ptr;
8165
8166         nritems = btrfs_header_nritems(eb);
8167
8168         /* the search failed, lets scan this node and hope we find it */
8169         for (slot = 0; slot < nritems; slot++) {
8170                 found =  btrfs_node_blockptr(eb, slot);
8171                 if (found == corrupt->cache.start)
8172                         goto del_ptr;
8173         }
8174         /*
8175          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8176          * to this block
8177          */
8178         if (eb == info->extent_root->node) {
8179                 ret = -ENOENT;
8180                 goto out;
8181         } else {
8182                 level++;
8183                 btrfs_release_path(&path);
8184                 goto again;
8185         }
8186
8187 del_ptr:
8188         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8189         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8190
8191 out:
8192         btrfs_release_path(&path);
8193         return ret;
8194 }
8195
8196 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8197 {
8198         struct btrfs_trans_handle *trans = NULL;
8199         struct cache_extent *cache;
8200         struct btrfs_corrupt_block *corrupt;
8201
8202         while (1) {
8203                 cache = search_cache_extent(info->corrupt_blocks, 0);
8204                 if (!cache)
8205                         break;
8206                 if (!trans) {
8207                         trans = btrfs_start_transaction(info->extent_root, 1);
8208                         if (IS_ERR(trans))
8209                                 return PTR_ERR(trans);
8210                 }
8211                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8212                 prune_one_block(trans, info, corrupt);
8213                 remove_cache_extent(info->corrupt_blocks, cache);
8214         }
8215         if (trans)
8216                 return btrfs_commit_transaction(trans, info->extent_root);
8217         return 0;
8218 }
8219
8220 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8221 {
8222         struct btrfs_block_group_cache *cache;
8223         u64 start, end;
8224         int ret;
8225
8226         while (1) {
8227                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8228                                             &start, &end, EXTENT_DIRTY);
8229                 if (ret)
8230                         break;
8231                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8232                                    GFP_NOFS);
8233         }
8234
8235         start = 0;
8236         while (1) {
8237                 cache = btrfs_lookup_first_block_group(fs_info, start);
8238                 if (!cache)
8239                         break;
8240                 if (cache->cached)
8241                         cache->cached = 0;
8242                 start = cache->key.objectid + cache->key.offset;
8243         }
8244 }
8245
8246 static int check_extent_refs(struct btrfs_root *root,
8247                              struct cache_tree *extent_cache)
8248 {
8249         struct extent_record *rec;
8250         struct cache_extent *cache;
8251         int err = 0;
8252         int ret = 0;
8253         int fixed = 0;
8254         int had_dups = 0;
8255         int recorded = 0;
8256
8257         if (repair) {
8258                 /*
8259                  * if we're doing a repair, we have to make sure
8260                  * we don't allocate from the problem extents.
8261                  * In the worst case, this will be all the
8262                  * extents in the FS
8263                  */
8264                 cache = search_cache_extent(extent_cache, 0);
8265                 while(cache) {
8266                         rec = container_of(cache, struct extent_record, cache);
8267                         set_extent_dirty(root->fs_info->excluded_extents,
8268                                          rec->start,
8269                                          rec->start + rec->max_size - 1,
8270                                          GFP_NOFS);
8271                         cache = next_cache_extent(cache);
8272                 }
8273
8274                 /* pin down all the corrupted blocks too */
8275                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8276                 while(cache) {
8277                         set_extent_dirty(root->fs_info->excluded_extents,
8278                                          cache->start,
8279                                          cache->start + cache->size - 1,
8280                                          GFP_NOFS);
8281                         cache = next_cache_extent(cache);
8282                 }
8283                 prune_corrupt_blocks(root->fs_info);
8284                 reset_cached_block_groups(root->fs_info);
8285         }
8286
8287         reset_cached_block_groups(root->fs_info);
8288
8289         /*
8290          * We need to delete any duplicate entries we find first otherwise we
8291          * could mess up the extent tree when we have backrefs that actually
8292          * belong to a different extent item and not the weird duplicate one.
8293          */
8294         while (repair && !list_empty(&duplicate_extents)) {
8295                 rec = to_extent_record(duplicate_extents.next);
8296                 list_del_init(&rec->list);
8297
8298                 /* Sometimes we can find a backref before we find an actual
8299                  * extent, so we need to process it a little bit to see if there
8300                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8301                  * if this is a backref screwup.  If we need to delete stuff
8302                  * process_duplicates() will return 0, otherwise it will return
8303                  * 1 and we
8304                  */
8305                 if (process_duplicates(root, extent_cache, rec))
8306                         continue;
8307                 ret = delete_duplicate_records(root, rec);
8308                 if (ret < 0)
8309                         return ret;
8310                 /*
8311                  * delete_duplicate_records will return the number of entries
8312                  * deleted, so if it's greater than 0 then we know we actually
8313                  * did something and we need to remove.
8314                  */
8315                 if (ret)
8316                         had_dups = 1;
8317         }
8318
8319         if (had_dups)
8320                 return -EAGAIN;
8321
8322         while(1) {
8323                 int cur_err = 0;
8324
8325                 fixed = 0;
8326                 recorded = 0;
8327                 cache = search_cache_extent(extent_cache, 0);
8328                 if (!cache)
8329                         break;
8330                 rec = container_of(cache, struct extent_record, cache);
8331                 if (rec->num_duplicates) {
8332                         fprintf(stderr, "extent item %llu has multiple extent "
8333                                 "items\n", (unsigned long long)rec->start);
8334                         err = 1;
8335                         cur_err = 1;
8336                 }
8337
8338                 if (rec->refs != rec->extent_item_refs) {
8339                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8340                                 (unsigned long long)rec->start,
8341                                 (unsigned long long)rec->nr);
8342                         fprintf(stderr, "extent item %llu, found %llu\n",
8343                                 (unsigned long long)rec->extent_item_refs,
8344                                 (unsigned long long)rec->refs);
8345                         ret = record_orphan_data_extents(root->fs_info, rec);
8346                         if (ret < 0)
8347                                 goto repair_abort;
8348                         if (ret == 0) {
8349                                 recorded = 1;
8350                         } else {
8351                                 /*
8352                                  * we can't use the extent to repair file
8353                                  * extent, let the fallback method handle it.
8354                                  */
8355                                 if (!fixed && repair) {
8356                                         ret = fixup_extent_refs(
8357                                                         root->fs_info,
8358                                                         extent_cache, rec);
8359                                         if (ret)
8360                                                 goto repair_abort;
8361                                         fixed = 1;
8362                                 }
8363                         }
8364                         err = 1;
8365                         cur_err = 1;
8366                 }
8367                 if (all_backpointers_checked(rec, 1)) {
8368                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8369                                 (unsigned long long)rec->start,
8370                                 (unsigned long long)rec->nr);
8371
8372                         if (!fixed && !recorded && repair) {
8373                                 ret = fixup_extent_refs(root->fs_info,
8374                                                         extent_cache, rec);
8375                                 if (ret)
8376                                         goto repair_abort;
8377                                 fixed = 1;
8378                         }
8379                         cur_err = 1;
8380                         err = 1;
8381                 }
8382                 if (!rec->owner_ref_checked) {
8383                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8384                                 (unsigned long long)rec->start,
8385                                 (unsigned long long)rec->nr);
8386                         if (!fixed && !recorded && repair) {
8387                                 ret = fixup_extent_refs(root->fs_info,
8388                                                         extent_cache, rec);
8389                                 if (ret)
8390                                         goto repair_abort;
8391                                 fixed = 1;
8392                         }
8393                         err = 1;
8394                         cur_err = 1;
8395                 }
8396                 if (rec->bad_full_backref) {
8397                         fprintf(stderr, "bad full backref, on [%llu]\n",
8398                                 (unsigned long long)rec->start);
8399                         if (repair) {
8400                                 ret = fixup_extent_flags(root->fs_info, rec);
8401                                 if (ret)
8402                                         goto repair_abort;
8403                                 fixed = 1;
8404                         }
8405                         err = 1;
8406                         cur_err = 1;
8407                 }
8408                 /*
8409                  * Although it's not a extent ref's problem, we reuse this
8410                  * routine for error reporting.
8411                  * No repair function yet.
8412                  */
8413                 if (rec->crossing_stripes) {
8414                         fprintf(stderr,
8415                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8416                                 rec->start, rec->start + rec->max_size);
8417                         err = 1;
8418                         cur_err = 1;
8419                 }
8420
8421                 if (rec->wrong_chunk_type) {
8422                         fprintf(stderr,
8423                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8424                                 rec->start, rec->start + rec->max_size);
8425                         err = 1;
8426                         cur_err = 1;
8427                 }
8428
8429                 remove_cache_extent(extent_cache, cache);
8430                 free_all_extent_backrefs(rec);
8431                 if (!init_extent_tree && repair && (!cur_err || fixed))
8432                         clear_extent_dirty(root->fs_info->excluded_extents,
8433                                            rec->start,
8434                                            rec->start + rec->max_size - 1,
8435                                            GFP_NOFS);
8436                 free(rec);
8437         }
8438 repair_abort:
8439         if (repair) {
8440                 if (ret && ret != -EAGAIN) {
8441                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8442                         exit(1);
8443                 } else if (!ret) {
8444                         struct btrfs_trans_handle *trans;
8445
8446                         root = root->fs_info->extent_root;
8447                         trans = btrfs_start_transaction(root, 1);
8448                         if (IS_ERR(trans)) {
8449                                 ret = PTR_ERR(trans);
8450                                 goto repair_abort;
8451                         }
8452
8453                         btrfs_fix_block_accounting(trans, root);
8454                         ret = btrfs_commit_transaction(trans, root);
8455                         if (ret)
8456                                 goto repair_abort;
8457                 }
8458                 if (err)
8459                         fprintf(stderr, "repaired damaged extent references\n");
8460                 return ret;
8461         }
8462         return err;
8463 }
8464
8465 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8466 {
8467         u64 stripe_size;
8468
8469         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8470                 stripe_size = length;
8471                 stripe_size /= num_stripes;
8472         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8473                 stripe_size = length * 2;
8474                 stripe_size /= num_stripes;
8475         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8476                 stripe_size = length;
8477                 stripe_size /= (num_stripes - 1);
8478         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8479                 stripe_size = length;
8480                 stripe_size /= (num_stripes - 2);
8481         } else {
8482                 stripe_size = length;
8483         }
8484         return stripe_size;
8485 }
8486
8487 /*
8488  * Check the chunk with its block group/dev list ref:
8489  * Return 0 if all refs seems valid.
8490  * Return 1 if part of refs seems valid, need later check for rebuild ref
8491  * like missing block group and needs to search extent tree to rebuild them.
8492  * Return -1 if essential refs are missing and unable to rebuild.
8493  */
8494 static int check_chunk_refs(struct chunk_record *chunk_rec,
8495                             struct block_group_tree *block_group_cache,
8496                             struct device_extent_tree *dev_extent_cache,
8497                             int silent)
8498 {
8499         struct cache_extent *block_group_item;
8500         struct block_group_record *block_group_rec;
8501         struct cache_extent *dev_extent_item;
8502         struct device_extent_record *dev_extent_rec;
8503         u64 devid;
8504         u64 offset;
8505         u64 length;
8506         int metadump_v2 = 0;
8507         int i;
8508         int ret = 0;
8509
8510         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8511                                                chunk_rec->offset,
8512                                                chunk_rec->length);
8513         if (block_group_item) {
8514                 block_group_rec = container_of(block_group_item,
8515                                                struct block_group_record,
8516                                                cache);
8517                 if (chunk_rec->length != block_group_rec->offset ||
8518                     chunk_rec->offset != block_group_rec->objectid ||
8519                     (!metadump_v2 &&
8520                      chunk_rec->type_flags != block_group_rec->flags)) {
8521                         if (!silent)
8522                                 fprintf(stderr,
8523                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8524                                         chunk_rec->objectid,
8525                                         chunk_rec->type,
8526                                         chunk_rec->offset,
8527                                         chunk_rec->length,
8528                                         chunk_rec->offset,
8529                                         chunk_rec->type_flags,
8530                                         block_group_rec->objectid,
8531                                         block_group_rec->type,
8532                                         block_group_rec->offset,
8533                                         block_group_rec->offset,
8534                                         block_group_rec->objectid,
8535                                         block_group_rec->flags);
8536                         ret = -1;
8537                 } else {
8538                         list_del_init(&block_group_rec->list);
8539                         chunk_rec->bg_rec = block_group_rec;
8540                 }
8541         } else {
8542                 if (!silent)
8543                         fprintf(stderr,
8544                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8545                                 chunk_rec->objectid,
8546                                 chunk_rec->type,
8547                                 chunk_rec->offset,
8548                                 chunk_rec->length,
8549                                 chunk_rec->offset,
8550                                 chunk_rec->type_flags);
8551                 ret = 1;
8552         }
8553
8554         if (metadump_v2)
8555                 return ret;
8556
8557         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8558                                     chunk_rec->num_stripes);
8559         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8560                 devid = chunk_rec->stripes[i].devid;
8561                 offset = chunk_rec->stripes[i].offset;
8562                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8563                                                        devid, offset, length);
8564                 if (dev_extent_item) {
8565                         dev_extent_rec = container_of(dev_extent_item,
8566                                                 struct device_extent_record,
8567                                                 cache);
8568                         if (dev_extent_rec->objectid != devid ||
8569                             dev_extent_rec->offset != offset ||
8570                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8571                             dev_extent_rec->length != length) {
8572                                 if (!silent)
8573                                         fprintf(stderr,
8574                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8575                                                 chunk_rec->objectid,
8576                                                 chunk_rec->type,
8577                                                 chunk_rec->offset,
8578                                                 chunk_rec->stripes[i].devid,
8579                                                 chunk_rec->stripes[i].offset,
8580                                                 dev_extent_rec->objectid,
8581                                                 dev_extent_rec->offset,
8582                                                 dev_extent_rec->length);
8583                                 ret = -1;
8584                         } else {
8585                                 list_move(&dev_extent_rec->chunk_list,
8586                                           &chunk_rec->dextents);
8587                         }
8588                 } else {
8589                         if (!silent)
8590                                 fprintf(stderr,
8591                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8592                                         chunk_rec->objectid,
8593                                         chunk_rec->type,
8594                                         chunk_rec->offset,
8595                                         chunk_rec->stripes[i].devid,
8596                                         chunk_rec->stripes[i].offset);
8597                         ret = -1;
8598                 }
8599         }
8600         return ret;
8601 }
8602
8603 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8604 int check_chunks(struct cache_tree *chunk_cache,
8605                  struct block_group_tree *block_group_cache,
8606                  struct device_extent_tree *dev_extent_cache,
8607                  struct list_head *good, struct list_head *bad,
8608                  struct list_head *rebuild, int silent)
8609 {
8610         struct cache_extent *chunk_item;
8611         struct chunk_record *chunk_rec;
8612         struct block_group_record *bg_rec;
8613         struct device_extent_record *dext_rec;
8614         int err;
8615         int ret = 0;
8616
8617         chunk_item = first_cache_extent(chunk_cache);
8618         while (chunk_item) {
8619                 chunk_rec = container_of(chunk_item, struct chunk_record,
8620                                          cache);
8621                 err = check_chunk_refs(chunk_rec, block_group_cache,
8622                                        dev_extent_cache, silent);
8623                 if (err < 0)
8624                         ret = err;
8625                 if (err == 0 && good)
8626                         list_add_tail(&chunk_rec->list, good);
8627                 if (err > 0 && rebuild)
8628                         list_add_tail(&chunk_rec->list, rebuild);
8629                 if (err < 0 && bad)
8630                         list_add_tail(&chunk_rec->list, bad);
8631                 chunk_item = next_cache_extent(chunk_item);
8632         }
8633
8634         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8635                 if (!silent)
8636                         fprintf(stderr,
8637                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8638                                 bg_rec->objectid,
8639                                 bg_rec->offset,
8640                                 bg_rec->flags);
8641                 if (!ret)
8642                         ret = 1;
8643         }
8644
8645         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8646                             chunk_list) {
8647                 if (!silent)
8648                         fprintf(stderr,
8649                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8650                                 dext_rec->objectid,
8651                                 dext_rec->offset,
8652                                 dext_rec->length);
8653                 if (!ret)
8654                         ret = 1;
8655         }
8656         return ret;
8657 }
8658
8659
8660 static int check_device_used(struct device_record *dev_rec,
8661                              struct device_extent_tree *dext_cache)
8662 {
8663         struct cache_extent *cache;
8664         struct device_extent_record *dev_extent_rec;
8665         u64 total_byte = 0;
8666
8667         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8668         while (cache) {
8669                 dev_extent_rec = container_of(cache,
8670                                               struct device_extent_record,
8671                                               cache);
8672                 if (dev_extent_rec->objectid != dev_rec->devid)
8673                         break;
8674
8675                 list_del_init(&dev_extent_rec->device_list);
8676                 total_byte += dev_extent_rec->length;
8677                 cache = next_cache_extent(cache);
8678         }
8679
8680         if (total_byte != dev_rec->byte_used) {
8681                 fprintf(stderr,
8682                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8683                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8684                         dev_rec->type, dev_rec->offset);
8685                 return -1;
8686         } else {
8687                 return 0;
8688         }
8689 }
8690
8691 /* check btrfs_dev_item -> btrfs_dev_extent */
8692 static int check_devices(struct rb_root *dev_cache,
8693                          struct device_extent_tree *dev_extent_cache)
8694 {
8695         struct rb_node *dev_node;
8696         struct device_record *dev_rec;
8697         struct device_extent_record *dext_rec;
8698         int err;
8699         int ret = 0;
8700
8701         dev_node = rb_first(dev_cache);
8702         while (dev_node) {
8703                 dev_rec = container_of(dev_node, struct device_record, node);
8704                 err = check_device_used(dev_rec, dev_extent_cache);
8705                 if (err)
8706                         ret = err;
8707
8708                 dev_node = rb_next(dev_node);
8709         }
8710         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8711                             device_list) {
8712                 fprintf(stderr,
8713                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8714                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8715                 if (!ret)
8716                         ret = 1;
8717         }
8718         return ret;
8719 }
8720
8721 static int add_root_item_to_list(struct list_head *head,
8722                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8723                                   u8 level, u8 drop_level,
8724                                   int level_size, struct btrfs_key *drop_key)
8725 {
8726
8727         struct root_item_record *ri_rec;
8728         ri_rec = malloc(sizeof(*ri_rec));
8729         if (!ri_rec)
8730                 return -ENOMEM;
8731         ri_rec->bytenr = bytenr;
8732         ri_rec->objectid = objectid;
8733         ri_rec->level = level;
8734         ri_rec->level_size = level_size;
8735         ri_rec->drop_level = drop_level;
8736         ri_rec->last_snapshot = last_snapshot;
8737         if (drop_key)
8738                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8739         list_add_tail(&ri_rec->list, head);
8740
8741         return 0;
8742 }
8743
8744 static void free_root_item_list(struct list_head *list)
8745 {
8746         struct root_item_record *ri_rec;
8747
8748         while (!list_empty(list)) {
8749                 ri_rec = list_first_entry(list, struct root_item_record,
8750                                           list);
8751                 list_del_init(&ri_rec->list);
8752                 free(ri_rec);
8753         }
8754 }
8755
8756 static int deal_root_from_list(struct list_head *list,
8757                                struct btrfs_root *root,
8758                                struct block_info *bits,
8759                                int bits_nr,
8760                                struct cache_tree *pending,
8761                                struct cache_tree *seen,
8762                                struct cache_tree *reada,
8763                                struct cache_tree *nodes,
8764                                struct cache_tree *extent_cache,
8765                                struct cache_tree *chunk_cache,
8766                                struct rb_root *dev_cache,
8767                                struct block_group_tree *block_group_cache,
8768                                struct device_extent_tree *dev_extent_cache)
8769 {
8770         int ret = 0;
8771         u64 last;
8772
8773         while (!list_empty(list)) {
8774                 struct root_item_record *rec;
8775                 struct extent_buffer *buf;
8776                 rec = list_entry(list->next,
8777                                  struct root_item_record, list);
8778                 last = 0;
8779                 buf = read_tree_block(root->fs_info->tree_root,
8780                                       rec->bytenr, rec->level_size, 0);
8781                 if (!extent_buffer_uptodate(buf)) {
8782                         free_extent_buffer(buf);
8783                         ret = -EIO;
8784                         break;
8785                 }
8786                 ret = add_root_to_pending(buf, extent_cache, pending,
8787                                     seen, nodes, rec->objectid);
8788                 if (ret < 0)
8789                         break;
8790                 /*
8791                  * To rebuild extent tree, we need deal with snapshot
8792                  * one by one, otherwise we deal with node firstly which
8793                  * can maximize readahead.
8794                  */
8795                 while (1) {
8796                         ret = run_next_block(root, bits, bits_nr, &last,
8797                                              pending, seen, reada, nodes,
8798                                              extent_cache, chunk_cache,
8799                                              dev_cache, block_group_cache,
8800                                              dev_extent_cache, rec);
8801                         if (ret != 0)
8802                                 break;
8803                 }
8804                 free_extent_buffer(buf);
8805                 list_del(&rec->list);
8806                 free(rec);
8807                 if (ret < 0)
8808                         break;
8809         }
8810         while (ret >= 0) {
8811                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8812                                      reada, nodes, extent_cache, chunk_cache,
8813                                      dev_cache, block_group_cache,
8814                                      dev_extent_cache, NULL);
8815                 if (ret != 0) {
8816                         if (ret > 0)
8817                                 ret = 0;
8818                         break;
8819                 }
8820         }
8821         return ret;
8822 }
8823
8824 static int check_chunks_and_extents(struct btrfs_root *root)
8825 {
8826         struct rb_root dev_cache;
8827         struct cache_tree chunk_cache;
8828         struct block_group_tree block_group_cache;
8829         struct device_extent_tree dev_extent_cache;
8830         struct cache_tree extent_cache;
8831         struct cache_tree seen;
8832         struct cache_tree pending;
8833         struct cache_tree reada;
8834         struct cache_tree nodes;
8835         struct extent_io_tree excluded_extents;
8836         struct cache_tree corrupt_blocks;
8837         struct btrfs_path path;
8838         struct btrfs_key key;
8839         struct btrfs_key found_key;
8840         int ret, err = 0;
8841         struct block_info *bits;
8842         int bits_nr;
8843         struct extent_buffer *leaf;
8844         int slot;
8845         struct btrfs_root_item ri;
8846         struct list_head dropping_trees;
8847         struct list_head normal_trees;
8848         struct btrfs_root *root1;
8849         u64 objectid;
8850         u32 level_size;
8851         u8 level;
8852
8853         dev_cache = RB_ROOT;
8854         cache_tree_init(&chunk_cache);
8855         block_group_tree_init(&block_group_cache);
8856         device_extent_tree_init(&dev_extent_cache);
8857
8858         cache_tree_init(&extent_cache);
8859         cache_tree_init(&seen);
8860         cache_tree_init(&pending);
8861         cache_tree_init(&nodes);
8862         cache_tree_init(&reada);
8863         cache_tree_init(&corrupt_blocks);
8864         extent_io_tree_init(&excluded_extents);
8865         INIT_LIST_HEAD(&dropping_trees);
8866         INIT_LIST_HEAD(&normal_trees);
8867
8868         if (repair) {
8869                 root->fs_info->excluded_extents = &excluded_extents;
8870                 root->fs_info->fsck_extent_cache = &extent_cache;
8871                 root->fs_info->free_extent_hook = free_extent_hook;
8872                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8873         }
8874
8875         bits_nr = 1024;
8876         bits = malloc(bits_nr * sizeof(struct block_info));
8877         if (!bits) {
8878                 perror("malloc");
8879                 exit(1);
8880         }
8881
8882         if (ctx.progress_enabled) {
8883                 ctx.tp = TASK_EXTENTS;
8884                 task_start(ctx.info);
8885         }
8886
8887 again:
8888         root1 = root->fs_info->tree_root;
8889         level = btrfs_header_level(root1->node);
8890         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8891                                     root1->node->start, 0, level, 0,
8892                                     root1->nodesize, NULL);
8893         if (ret < 0)
8894                 goto out;
8895         root1 = root->fs_info->chunk_root;
8896         level = btrfs_header_level(root1->node);
8897         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8898                                     root1->node->start, 0, level, 0,
8899                                     root1->nodesize, NULL);
8900         if (ret < 0)
8901                 goto out;
8902         btrfs_init_path(&path);
8903         key.offset = 0;
8904         key.objectid = 0;
8905         key.type = BTRFS_ROOT_ITEM_KEY;
8906         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8907                                         &key, &path, 0, 0);
8908         if (ret < 0)
8909                 goto out;
8910         while(1) {
8911                 leaf = path.nodes[0];
8912                 slot = path.slots[0];
8913                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8914                         ret = btrfs_next_leaf(root, &path);
8915                         if (ret != 0)
8916                                 break;
8917                         leaf = path.nodes[0];
8918                         slot = path.slots[0];
8919                 }
8920                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8921                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8922                         unsigned long offset;
8923                         u64 last_snapshot;
8924
8925                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8926                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8927                         last_snapshot = btrfs_root_last_snapshot(&ri);
8928                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8929                                 level = btrfs_root_level(&ri);
8930                                 level_size = root->nodesize;
8931                                 ret = add_root_item_to_list(&normal_trees,
8932                                                 found_key.objectid,
8933                                                 btrfs_root_bytenr(&ri),
8934                                                 last_snapshot, level,
8935                                                 0, level_size, NULL);
8936                                 if (ret < 0)
8937                                         goto out;
8938                         } else {
8939                                 level = btrfs_root_level(&ri);
8940                                 level_size = root->nodesize;
8941                                 objectid = found_key.objectid;
8942                                 btrfs_disk_key_to_cpu(&found_key,
8943                                                       &ri.drop_progress);
8944                                 ret = add_root_item_to_list(&dropping_trees,
8945                                                 objectid,
8946                                                 btrfs_root_bytenr(&ri),
8947                                                 last_snapshot, level,
8948                                                 ri.drop_level,
8949                                                 level_size, &found_key);
8950                                 if (ret < 0)
8951                                         goto out;
8952                         }
8953                 }
8954                 path.slots[0]++;
8955         }
8956         btrfs_release_path(&path);
8957
8958         /*
8959          * check_block can return -EAGAIN if it fixes something, please keep
8960          * this in mind when dealing with return values from these functions, if
8961          * we get -EAGAIN we want to fall through and restart the loop.
8962          */
8963         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8964                                   &seen, &reada, &nodes, &extent_cache,
8965                                   &chunk_cache, &dev_cache, &block_group_cache,
8966                                   &dev_extent_cache);
8967         if (ret < 0) {
8968                 if (ret == -EAGAIN)
8969                         goto loop;
8970                 goto out;
8971         }
8972         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8973                                   &pending, &seen, &reada, &nodes,
8974                                   &extent_cache, &chunk_cache, &dev_cache,
8975                                   &block_group_cache, &dev_extent_cache);
8976         if (ret < 0) {
8977                 if (ret == -EAGAIN)
8978                         goto loop;
8979                 goto out;
8980         }
8981
8982         ret = check_chunks(&chunk_cache, &block_group_cache,
8983                            &dev_extent_cache, NULL, NULL, NULL, 0);
8984         if (ret) {
8985                 if (ret == -EAGAIN)
8986                         goto loop;
8987                 err = ret;
8988         }
8989
8990         ret = check_extent_refs(root, &extent_cache);
8991         if (ret < 0) {
8992                 if (ret == -EAGAIN)
8993                         goto loop;
8994                 goto out;
8995         }
8996
8997         ret = check_devices(&dev_cache, &dev_extent_cache);
8998         if (ret && err)
8999                 ret = err;
9000
9001 out:
9002         task_stop(ctx.info);
9003         if (repair) {
9004                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9005                 extent_io_tree_cleanup(&excluded_extents);
9006                 root->fs_info->fsck_extent_cache = NULL;
9007                 root->fs_info->free_extent_hook = NULL;
9008                 root->fs_info->corrupt_blocks = NULL;
9009                 root->fs_info->excluded_extents = NULL;
9010         }
9011         free(bits);
9012         free_chunk_cache_tree(&chunk_cache);
9013         free_device_cache_tree(&dev_cache);
9014         free_block_group_tree(&block_group_cache);
9015         free_device_extent_tree(&dev_extent_cache);
9016         free_extent_cache_tree(&seen);
9017         free_extent_cache_tree(&pending);
9018         free_extent_cache_tree(&reada);
9019         free_extent_cache_tree(&nodes);
9020         return ret;
9021 loop:
9022         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9023         free_extent_cache_tree(&seen);
9024         free_extent_cache_tree(&pending);
9025         free_extent_cache_tree(&reada);
9026         free_extent_cache_tree(&nodes);
9027         free_chunk_cache_tree(&chunk_cache);
9028         free_block_group_tree(&block_group_cache);
9029         free_device_cache_tree(&dev_cache);
9030         free_device_extent_tree(&dev_extent_cache);
9031         free_extent_record_cache(root->fs_info, &extent_cache);
9032         free_root_item_list(&normal_trees);
9033         free_root_item_list(&dropping_trees);
9034         extent_io_tree_cleanup(&excluded_extents);
9035         goto again;
9036 }
9037
9038 /*
9039  * Check backrefs of a tree block given by @bytenr or @eb.
9040  *
9041  * @root:       the root containing the @bytenr or @eb
9042  * @eb:         tree block extent buffer, can be NULL
9043  * @bytenr:     bytenr of the tree block to search
9044  * @level:      tree level of the tree block
9045  * @owner:      owner of the tree block
9046  *
9047  * Return >0 for any error found and output error message
9048  * Return 0 for no error found
9049  */
9050 static int check_tree_block_ref(struct btrfs_root *root,
9051                                 struct extent_buffer *eb, u64 bytenr,
9052                                 int level, u64 owner)
9053 {
9054         struct btrfs_key key;
9055         struct btrfs_root *extent_root = root->fs_info->extent_root;
9056         struct btrfs_path path;
9057         struct btrfs_extent_item *ei;
9058         struct btrfs_extent_inline_ref *iref;
9059         struct extent_buffer *leaf;
9060         unsigned long end;
9061         unsigned long ptr;
9062         int slot;
9063         int skinny_level;
9064         int type;
9065         u32 nodesize = root->nodesize;
9066         u32 item_size;
9067         u64 offset;
9068         int found_ref = 0;
9069         int err = 0;
9070         int ret;
9071
9072         btrfs_init_path(&path);
9073         key.objectid = bytenr;
9074         if (btrfs_fs_incompat(root->fs_info,
9075                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9076                 key.type = BTRFS_METADATA_ITEM_KEY;
9077         else
9078                 key.type = BTRFS_EXTENT_ITEM_KEY;
9079         key.offset = (u64)-1;
9080
9081         /* Search for the backref in extent tree */
9082         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9083         if (ret < 0) {
9084                 err |= BACKREF_MISSING;
9085                 goto out;
9086         }
9087         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9088         if (ret) {
9089                 err |= BACKREF_MISSING;
9090                 goto out;
9091         }
9092
9093         leaf = path.nodes[0];
9094         slot = path.slots[0];
9095         btrfs_item_key_to_cpu(leaf, &key, slot);
9096
9097         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9098
9099         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9100                 skinny_level = (int)key.offset;
9101                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9102         } else {
9103                 struct btrfs_tree_block_info *info;
9104
9105                 info = (struct btrfs_tree_block_info *)(ei + 1);
9106                 skinny_level = btrfs_tree_block_level(leaf, info);
9107                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9108         }
9109
9110         if (eb) {
9111                 u64 header_gen;
9112                 u64 extent_gen;
9113
9114                 if (!(btrfs_extent_flags(leaf, ei) &
9115                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9116                         error(
9117                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9118                                 key.objectid, nodesize,
9119                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9120                         err = BACKREF_MISMATCH;
9121                 }
9122                 header_gen = btrfs_header_generation(eb);
9123                 extent_gen = btrfs_extent_generation(leaf, ei);
9124                 if (header_gen != extent_gen) {
9125                         error(
9126         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9127                                 key.objectid, nodesize, header_gen,
9128                                 extent_gen);
9129                         err = BACKREF_MISMATCH;
9130                 }
9131                 if (level != skinny_level) {
9132                         error(
9133                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9134                                 key.objectid, nodesize, level, skinny_level);
9135                         err = BACKREF_MISMATCH;
9136                 }
9137                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9138                         error(
9139                         "extent[%llu %u] is referred by other roots than %llu",
9140                                 key.objectid, nodesize, root->objectid);
9141                         err = BACKREF_MISMATCH;
9142                 }
9143         }
9144
9145         /*
9146          * Iterate the extent/metadata item to find the exact backref
9147          */
9148         item_size = btrfs_item_size_nr(leaf, slot);
9149         ptr = (unsigned long)iref;
9150         end = (unsigned long)ei + item_size;
9151         while (ptr < end) {
9152                 iref = (struct btrfs_extent_inline_ref *)ptr;
9153                 type = btrfs_extent_inline_ref_type(leaf, iref);
9154                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9155
9156                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9157                         (offset == root->objectid || offset == owner)) {
9158                         found_ref = 1;
9159                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9160                         /* Check if the backref points to valid referencer */
9161                         found_ref = !check_tree_block_ref(root, NULL, offset,
9162                                                           level + 1, owner);
9163                 }
9164
9165                 if (found_ref)
9166                         break;
9167                 ptr += btrfs_extent_inline_ref_size(type);
9168         }
9169
9170         /*
9171          * Inlined extent item doesn't have what we need, check
9172          * TREE_BLOCK_REF_KEY
9173          */
9174         if (!found_ref) {
9175                 btrfs_release_path(&path);
9176                 key.objectid = bytenr;
9177                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9178                 key.offset = root->objectid;
9179
9180                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9181                 if (!ret)
9182                         found_ref = 1;
9183         }
9184         if (!found_ref)
9185                 err |= BACKREF_MISSING;
9186 out:
9187         btrfs_release_path(&path);
9188         if (eb && (err & BACKREF_MISSING))
9189                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9190                         bytenr, nodesize, owner, level);
9191         return err;
9192 }
9193
9194 /*
9195  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9196  *
9197  * Return >0 any error found and output error message
9198  * Return 0 for no error found
9199  */
9200 static int check_extent_data_item(struct btrfs_root *root,
9201                                   struct extent_buffer *eb, int slot)
9202 {
9203         struct btrfs_file_extent_item *fi;
9204         struct btrfs_path path;
9205         struct btrfs_root *extent_root = root->fs_info->extent_root;
9206         struct btrfs_key fi_key;
9207         struct btrfs_key dbref_key;
9208         struct extent_buffer *leaf;
9209         struct btrfs_extent_item *ei;
9210         struct btrfs_extent_inline_ref *iref;
9211         struct btrfs_extent_data_ref *dref;
9212         u64 owner;
9213         u64 file_extent_gen;
9214         u64 disk_bytenr;
9215         u64 disk_num_bytes;
9216         u64 extent_num_bytes;
9217         u64 extent_flags;
9218         u64 extent_gen;
9219         u32 item_size;
9220         unsigned long end;
9221         unsigned long ptr;
9222         int type;
9223         u64 ref_root;
9224         int found_dbackref = 0;
9225         int err = 0;
9226         int ret;
9227
9228         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9229         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9230         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9231
9232         /* Nothing to check for hole and inline data extents */
9233         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9234             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9235                 return 0;
9236
9237         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9238         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9239         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9240
9241         /* Check unaligned disk_num_bytes and num_bytes */
9242         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9243                 error(
9244 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9245                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9246                         root->sectorsize);
9247                 err |= BYTES_UNALIGNED;
9248         } else {
9249                 data_bytes_allocated += disk_num_bytes;
9250         }
9251         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9252                 error(
9253 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9254                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9255                         root->sectorsize);
9256                 err |= BYTES_UNALIGNED;
9257         } else {
9258                 data_bytes_referenced += extent_num_bytes;
9259         }
9260         owner = btrfs_header_owner(eb);
9261
9262         /* Check the extent item of the file extent in extent tree */
9263         btrfs_init_path(&path);
9264         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9265         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9266         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9267
9268         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9269         if (ret) {
9270                 err |= BACKREF_MISSING;
9271                 goto error;
9272         }
9273
9274         leaf = path.nodes[0];
9275         slot = path.slots[0];
9276         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9277
9278         extent_flags = btrfs_extent_flags(leaf, ei);
9279         extent_gen = btrfs_extent_generation(leaf, ei);
9280
9281         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9282                 error(
9283                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9284                     disk_bytenr, disk_num_bytes,
9285                     BTRFS_EXTENT_FLAG_DATA);
9286                 err |= BACKREF_MISMATCH;
9287         }
9288
9289         if (file_extent_gen < extent_gen) {
9290                 error(
9291 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9292                         disk_bytenr, disk_num_bytes, file_extent_gen,
9293                         extent_gen);
9294                 err |= BACKREF_MISMATCH;
9295         }
9296
9297         /* Check data backref inside that extent item */
9298         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9299         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9300         ptr = (unsigned long)iref;
9301         end = (unsigned long)ei + item_size;
9302         while (ptr < end) {
9303                 iref = (struct btrfs_extent_inline_ref *)ptr;
9304                 type = btrfs_extent_inline_ref_type(leaf, iref);
9305                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9306
9307                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9308                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9309                         if (ref_root == owner || ref_root == root->objectid)
9310                                 found_dbackref = 1;
9311                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9312                         found_dbackref = !check_tree_block_ref(root, NULL,
9313                                 btrfs_extent_inline_ref_offset(leaf, iref),
9314                                 0, owner);
9315                 }
9316
9317                 if (found_dbackref)
9318                         break;
9319                 ptr += btrfs_extent_inline_ref_size(type);
9320         }
9321
9322         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9323         if (!found_dbackref) {
9324                 btrfs_release_path(&path);
9325
9326                 btrfs_init_path(&path);
9327                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9328                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9329                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9330                                 fi_key.objectid, fi_key.offset);
9331
9332                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9333                                         &dbref_key, &path, 0, 0);
9334                 if (!ret)
9335                         found_dbackref = 1;
9336         }
9337
9338         if (!found_dbackref)
9339                 err |= BACKREF_MISSING;
9340 error:
9341         btrfs_release_path(&path);
9342         if (err & BACKREF_MISSING) {
9343                 error("data extent[%llu %llu] backref lost",
9344                       disk_bytenr, disk_num_bytes);
9345         }
9346         return err;
9347 }
9348
9349 /*
9350  * Get real tree block level for the case like shared block
9351  * Return >= 0 as tree level
9352  * Return <0 for error
9353  */
9354 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9355 {
9356         struct extent_buffer *eb;
9357         struct btrfs_path path;
9358         struct btrfs_key key;
9359         struct btrfs_extent_item *ei;
9360         u64 flags;
9361         u64 transid;
9362         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9363         u8 backref_level;
9364         u8 header_level;
9365         int ret;
9366
9367         /* Search extent tree for extent generation and level */
9368         key.objectid = bytenr;
9369         key.type = BTRFS_METADATA_ITEM_KEY;
9370         key.offset = (u64)-1;
9371
9372         btrfs_init_path(&path);
9373         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9374         if (ret < 0)
9375                 goto release_out;
9376         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9377         if (ret < 0)
9378                 goto release_out;
9379         if (ret > 0) {
9380                 ret = -ENOENT;
9381                 goto release_out;
9382         }
9383
9384         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9385         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9386                             struct btrfs_extent_item);
9387         flags = btrfs_extent_flags(path.nodes[0], ei);
9388         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9389                 ret = -ENOENT;
9390                 goto release_out;
9391         }
9392
9393         /* Get transid for later read_tree_block() check */
9394         transid = btrfs_extent_generation(path.nodes[0], ei);
9395
9396         /* Get backref level as one source */
9397         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9398                 backref_level = key.offset;
9399         } else {
9400                 struct btrfs_tree_block_info *info;
9401
9402                 info = (struct btrfs_tree_block_info *)(ei + 1);
9403                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9404         }
9405         btrfs_release_path(&path);
9406
9407         /* Get level from tree block as an alternative source */
9408         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9409         if (!extent_buffer_uptodate(eb)) {
9410                 free_extent_buffer(eb);
9411                 return -EIO;
9412         }
9413         header_level = btrfs_header_level(eb);
9414         free_extent_buffer(eb);
9415
9416         if (header_level != backref_level)
9417                 return -EIO;
9418         return header_level;
9419
9420 release_out:
9421         btrfs_release_path(&path);
9422         return ret;
9423 }
9424
9425 /*
9426  * Check if a tree block backref is valid (points to a valid tree block)
9427  * if level == -1, level will be resolved
9428  * Return >0 for any error found and print error message
9429  */
9430 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9431                                     u64 bytenr, int level)
9432 {
9433         struct btrfs_root *root;
9434         struct btrfs_key key;
9435         struct btrfs_path path;
9436         struct extent_buffer *eb;
9437         struct extent_buffer *node;
9438         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9439         int err = 0;
9440         int ret;
9441
9442         /* Query level for level == -1 special case */
9443         if (level == -1)
9444                 level = query_tree_block_level(fs_info, bytenr);
9445         if (level < 0) {
9446                 err |= REFERENCER_MISSING;
9447                 goto out;
9448         }
9449
9450         key.objectid = root_id;
9451         key.type = BTRFS_ROOT_ITEM_KEY;
9452         key.offset = (u64)-1;
9453
9454         root = btrfs_read_fs_root(fs_info, &key);
9455         if (IS_ERR(root)) {
9456                 err |= REFERENCER_MISSING;
9457                 goto out;
9458         }
9459
9460         /* Read out the tree block to get item/node key */
9461         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9462         if (!extent_buffer_uptodate(eb)) {
9463                 err |= REFERENCER_MISSING;
9464                 free_extent_buffer(eb);
9465                 goto out;
9466         }
9467
9468         /* Empty tree, no need to check key */
9469         if (!btrfs_header_nritems(eb) && !level) {
9470                 free_extent_buffer(eb);
9471                 goto out;
9472         }
9473
9474         if (level)
9475                 btrfs_node_key_to_cpu(eb, &key, 0);
9476         else
9477                 btrfs_item_key_to_cpu(eb, &key, 0);
9478
9479         free_extent_buffer(eb);
9480
9481         btrfs_init_path(&path);
9482         path.lowest_level = level;
9483         /* Search with the first key, to ensure we can reach it */
9484         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9485         if (ret < 0) {
9486                 err |= REFERENCER_MISSING;
9487                 goto release_out;
9488         }
9489
9490         node = path.nodes[level];
9491         if (btrfs_header_bytenr(node) != bytenr) {
9492                 error(
9493         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9494                         bytenr, nodesize, bytenr,
9495                         btrfs_header_bytenr(node));
9496                 err |= REFERENCER_MISMATCH;
9497         }
9498         if (btrfs_header_level(node) != level) {
9499                 error(
9500         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9501                         bytenr, nodesize, level,
9502                         btrfs_header_level(node));
9503                 err |= REFERENCER_MISMATCH;
9504         }
9505
9506 release_out:
9507         btrfs_release_path(&path);
9508 out:
9509         if (err & REFERENCER_MISSING) {
9510                 if (level < 0)
9511                         error("extent [%llu %d] lost referencer (owner: %llu)",
9512                                 bytenr, nodesize, root_id);
9513                 else
9514                         error(
9515                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9516                                 bytenr, nodesize, root_id, level);
9517         }
9518
9519         return err;
9520 }
9521
9522 /*
9523  * Check referencer for shared block backref
9524  * If level == -1, this function will resolve the level.
9525  */
9526 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9527                                      u64 parent, u64 bytenr, int level)
9528 {
9529         struct extent_buffer *eb;
9530         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9531         u32 nr;
9532         int found_parent = 0;
9533         int i;
9534
9535         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9536         if (!extent_buffer_uptodate(eb))
9537                 goto out;
9538
9539         if (level == -1)
9540                 level = query_tree_block_level(fs_info, bytenr);
9541         if (level < 0)
9542                 goto out;
9543
9544         if (level + 1 != btrfs_header_level(eb))
9545                 goto out;
9546
9547         nr = btrfs_header_nritems(eb);
9548         for (i = 0; i < nr; i++) {
9549                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9550                         found_parent = 1;
9551                         break;
9552                 }
9553         }
9554 out:
9555         free_extent_buffer(eb);
9556         if (!found_parent) {
9557                 error(
9558         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9559                         bytenr, nodesize, parent, level);
9560                 return REFERENCER_MISSING;
9561         }
9562         return 0;
9563 }
9564
9565 /*
9566  * Check referencer for normal (inlined) data ref
9567  * If len == 0, it will be resolved by searching in extent tree
9568  */
9569 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9570                                      u64 root_id, u64 objectid, u64 offset,
9571                                      u64 bytenr, u64 len, u32 count)
9572 {
9573         struct btrfs_root *root;
9574         struct btrfs_root *extent_root = fs_info->extent_root;
9575         struct btrfs_key key;
9576         struct btrfs_path path;
9577         struct extent_buffer *leaf;
9578         struct btrfs_file_extent_item *fi;
9579         u32 found_count = 0;
9580         int slot;
9581         int ret = 0;
9582
9583         if (!len) {
9584                 key.objectid = bytenr;
9585                 key.type = BTRFS_EXTENT_ITEM_KEY;
9586                 key.offset = (u64)-1;
9587
9588                 btrfs_init_path(&path);
9589                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9590                 if (ret < 0)
9591                         goto out;
9592                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9593                 if (ret)
9594                         goto out;
9595                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9596                 if (key.objectid != bytenr ||
9597                     key.type != BTRFS_EXTENT_ITEM_KEY)
9598                         goto out;
9599                 len = key.offset;
9600                 btrfs_release_path(&path);
9601         }
9602         key.objectid = root_id;
9603         key.type = BTRFS_ROOT_ITEM_KEY;
9604         key.offset = (u64)-1;
9605         btrfs_init_path(&path);
9606
9607         root = btrfs_read_fs_root(fs_info, &key);
9608         if (IS_ERR(root))
9609                 goto out;
9610
9611         key.objectid = objectid;
9612         key.type = BTRFS_EXTENT_DATA_KEY;
9613         /*
9614          * It can be nasty as data backref offset is
9615          * file offset - file extent offset, which is smaller or
9616          * equal to original backref offset.  The only special case is
9617          * overflow.  So we need to special check and do further search.
9618          */
9619         key.offset = offset & (1ULL << 63) ? 0 : offset;
9620
9621         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9622         if (ret < 0)
9623                 goto out;
9624
9625         /*
9626          * Search afterwards to get correct one
9627          * NOTE: As we must do a comprehensive check on the data backref to
9628          * make sure the dref count also matches, we must iterate all file
9629          * extents for that inode.
9630          */
9631         while (1) {
9632                 leaf = path.nodes[0];
9633                 slot = path.slots[0];
9634
9635                 btrfs_item_key_to_cpu(leaf, &key, slot);
9636                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9637                         break;
9638                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9639                 /*
9640                  * Except normal disk bytenr and disk num bytes, we still
9641                  * need to do extra check on dbackref offset as
9642                  * dbackref offset = file_offset - file_extent_offset
9643                  */
9644                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9645                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9646                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9647                     offset)
9648                         found_count++;
9649
9650                 ret = btrfs_next_item(root, &path);
9651                 if (ret)
9652                         break;
9653         }
9654 out:
9655         btrfs_release_path(&path);
9656         if (found_count != count) {
9657                 error(
9658 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9659                         bytenr, len, root_id, objectid, offset, count, found_count);
9660                 return REFERENCER_MISSING;
9661         }
9662         return 0;
9663 }
9664
9665 /*
9666  * Check if the referencer of a shared data backref exists
9667  */
9668 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9669                                      u64 parent, u64 bytenr)
9670 {
9671         struct extent_buffer *eb;
9672         struct btrfs_key key;
9673         struct btrfs_file_extent_item *fi;
9674         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9675         u32 nr;
9676         int found_parent = 0;
9677         int i;
9678
9679         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9680         if (!extent_buffer_uptodate(eb))
9681                 goto out;
9682
9683         nr = btrfs_header_nritems(eb);
9684         for (i = 0; i < nr; i++) {
9685                 btrfs_item_key_to_cpu(eb, &key, i);
9686                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9687                         continue;
9688
9689                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9690                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9691                         continue;
9692
9693                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9694                         found_parent = 1;
9695                         break;
9696                 }
9697         }
9698
9699 out:
9700         free_extent_buffer(eb);
9701         if (!found_parent) {
9702                 error("shared extent %llu referencer lost (parent: %llu)",
9703                         bytenr, parent);
9704                 return REFERENCER_MISSING;
9705         }
9706         return 0;
9707 }
9708
9709 /*
9710  * This function will check a given extent item, including its backref and
9711  * itself (like crossing stripe boundary and type)
9712  *
9713  * Since we don't use extent_record anymore, introduce new error bit
9714  */
9715 static int check_extent_item(struct btrfs_fs_info *fs_info,
9716                              struct extent_buffer *eb, int slot)
9717 {
9718         struct btrfs_extent_item *ei;
9719         struct btrfs_extent_inline_ref *iref;
9720         struct btrfs_extent_data_ref *dref;
9721         unsigned long end;
9722         unsigned long ptr;
9723         int type;
9724         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9725         u32 item_size = btrfs_item_size_nr(eb, slot);
9726         u64 flags;
9727         u64 offset;
9728         int metadata = 0;
9729         int level;
9730         struct btrfs_key key;
9731         int ret;
9732         int err = 0;
9733
9734         btrfs_item_key_to_cpu(eb, &key, slot);
9735         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9736                 bytes_used += key.offset;
9737         else
9738                 bytes_used += nodesize;
9739
9740         if (item_size < sizeof(*ei)) {
9741                 /*
9742                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9743                  * old thing when on disk format is still un-determined.
9744                  * No need to care about it anymore
9745                  */
9746                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9747                 return -ENOTTY;
9748         }
9749
9750         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9751         flags = btrfs_extent_flags(eb, ei);
9752
9753         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9754                 metadata = 1;
9755         if (metadata && check_crossing_stripes(global_info, key.objectid,
9756                                                eb->len)) {
9757                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9758                       key.objectid, key.objectid + nodesize);
9759                 err |= CROSSING_STRIPE_BOUNDARY;
9760         }
9761
9762         ptr = (unsigned long)(ei + 1);
9763
9764         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9765                 /* Old EXTENT_ITEM metadata */
9766                 struct btrfs_tree_block_info *info;
9767
9768                 info = (struct btrfs_tree_block_info *)ptr;
9769                 level = btrfs_tree_block_level(eb, info);
9770                 ptr += sizeof(struct btrfs_tree_block_info);
9771         } else {
9772                 /* New METADATA_ITEM */
9773                 level = key.offset;
9774         }
9775         end = (unsigned long)ei + item_size;
9776
9777         if (ptr >= end) {
9778                 err |= ITEM_SIZE_MISMATCH;
9779                 goto out;
9780         }
9781
9782         /* Now check every backref in this extent item */
9783 next:
9784         iref = (struct btrfs_extent_inline_ref *)ptr;
9785         type = btrfs_extent_inline_ref_type(eb, iref);
9786         offset = btrfs_extent_inline_ref_offset(eb, iref);
9787         switch (type) {
9788         case BTRFS_TREE_BLOCK_REF_KEY:
9789                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9790                                                level);
9791                 err |= ret;
9792                 break;
9793         case BTRFS_SHARED_BLOCK_REF_KEY:
9794                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9795                                                  level);
9796                 err |= ret;
9797                 break;
9798         case BTRFS_EXTENT_DATA_REF_KEY:
9799                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9800                 ret = check_extent_data_backref(fs_info,
9801                                 btrfs_extent_data_ref_root(eb, dref),
9802                                 btrfs_extent_data_ref_objectid(eb, dref),
9803                                 btrfs_extent_data_ref_offset(eb, dref),
9804                                 key.objectid, key.offset,
9805                                 btrfs_extent_data_ref_count(eb, dref));
9806                 err |= ret;
9807                 break;
9808         case BTRFS_SHARED_DATA_REF_KEY:
9809                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9810                 err |= ret;
9811                 break;
9812         default:
9813                 error("extent[%llu %d %llu] has unknown ref type: %d",
9814                         key.objectid, key.type, key.offset, type);
9815                 err |= UNKNOWN_TYPE;
9816                 goto out;
9817         }
9818
9819         ptr += btrfs_extent_inline_ref_size(type);
9820         if (ptr < end)
9821                 goto next;
9822
9823 out:
9824         return err;
9825 }
9826
9827 /*
9828  * Check if a dev extent item is referred correctly by its chunk
9829  */
9830 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9831                                  struct extent_buffer *eb, int slot)
9832 {
9833         struct btrfs_root *chunk_root = fs_info->chunk_root;
9834         struct btrfs_dev_extent *ptr;
9835         struct btrfs_path path;
9836         struct btrfs_key chunk_key;
9837         struct btrfs_key devext_key;
9838         struct btrfs_chunk *chunk;
9839         struct extent_buffer *l;
9840         int num_stripes;
9841         u64 length;
9842         int i;
9843         int found_chunk = 0;
9844         int ret;
9845
9846         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9847         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9848         length = btrfs_dev_extent_length(eb, ptr);
9849
9850         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9851         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9852         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9853
9854         btrfs_init_path(&path);
9855         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9856         if (ret)
9857                 goto out;
9858
9859         l = path.nodes[0];
9860         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9861         if (btrfs_chunk_length(l, chunk) != length)
9862                 goto out;
9863
9864         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9865         for (i = 0; i < num_stripes; i++) {
9866                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9867                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9868
9869                 if (devid == devext_key.objectid &&
9870                     offset == devext_key.offset) {
9871                         found_chunk = 1;
9872                         break;
9873                 }
9874         }
9875 out:
9876         btrfs_release_path(&path);
9877         if (!found_chunk) {
9878                 error(
9879                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9880                         devext_key.objectid, devext_key.offset, length);
9881                 return REFERENCER_MISSING;
9882         }
9883         return 0;
9884 }
9885
9886 /*
9887  * Check if the used space is correct with the dev item
9888  */
9889 static int check_dev_item(struct btrfs_fs_info *fs_info,
9890                           struct extent_buffer *eb, int slot)
9891 {
9892         struct btrfs_root *dev_root = fs_info->dev_root;
9893         struct btrfs_dev_item *dev_item;
9894         struct btrfs_path path;
9895         struct btrfs_key key;
9896         struct btrfs_dev_extent *ptr;
9897         u64 dev_id;
9898         u64 used;
9899         u64 total = 0;
9900         int ret;
9901
9902         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9903         dev_id = btrfs_device_id(eb, dev_item);
9904         used = btrfs_device_bytes_used(eb, dev_item);
9905
9906         key.objectid = dev_id;
9907         key.type = BTRFS_DEV_EXTENT_KEY;
9908         key.offset = 0;
9909
9910         btrfs_init_path(&path);
9911         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9912         if (ret < 0) {
9913                 btrfs_item_key_to_cpu(eb, &key, slot);
9914                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9915                         key.objectid, key.type, key.offset);
9916                 btrfs_release_path(&path);
9917                 return REFERENCER_MISSING;
9918         }
9919
9920         /* Iterate dev_extents to calculate the used space of a device */
9921         while (1) {
9922                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9923
9924                 if (key.objectid > dev_id)
9925                         break;
9926                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9927                         goto next;
9928
9929                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9930                                      struct btrfs_dev_extent);
9931                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9932 next:
9933                 ret = btrfs_next_item(dev_root, &path);
9934                 if (ret)
9935                         break;
9936         }
9937         btrfs_release_path(&path);
9938
9939         if (used != total) {
9940                 btrfs_item_key_to_cpu(eb, &key, slot);
9941                 error(
9942 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9943                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9944                         BTRFS_DEV_EXTENT_KEY, dev_id);
9945                 return ACCOUNTING_MISMATCH;
9946         }
9947         return 0;
9948 }
9949
9950 /*
9951  * Check a block group item with its referener (chunk) and its used space
9952  * with extent/metadata item
9953  */
9954 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9955                                   struct extent_buffer *eb, int slot)
9956 {
9957         struct btrfs_root *extent_root = fs_info->extent_root;
9958         struct btrfs_root *chunk_root = fs_info->chunk_root;
9959         struct btrfs_block_group_item *bi;
9960         struct btrfs_block_group_item bg_item;
9961         struct btrfs_path path;
9962         struct btrfs_key bg_key;
9963         struct btrfs_key chunk_key;
9964         struct btrfs_key extent_key;
9965         struct btrfs_chunk *chunk;
9966         struct extent_buffer *leaf;
9967         struct btrfs_extent_item *ei;
9968         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9969         u64 flags;
9970         u64 bg_flags;
9971         u64 used;
9972         u64 total = 0;
9973         int ret;
9974         int err = 0;
9975
9976         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9977         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9978         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9979         used = btrfs_block_group_used(&bg_item);
9980         bg_flags = btrfs_block_group_flags(&bg_item);
9981
9982         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9983         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9984         chunk_key.offset = bg_key.objectid;
9985
9986         btrfs_init_path(&path);
9987         /* Search for the referencer chunk */
9988         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9989         if (ret) {
9990                 error(
9991                 "block group[%llu %llu] did not find the related chunk item",
9992                         bg_key.objectid, bg_key.offset);
9993                 err |= REFERENCER_MISSING;
9994         } else {
9995                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9996                                         struct btrfs_chunk);
9997                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9998                                                 bg_key.offset) {
9999                         error(
10000         "block group[%llu %llu] related chunk item length does not match",
10001                                 bg_key.objectid, bg_key.offset);
10002                         err |= REFERENCER_MISMATCH;
10003                 }
10004         }
10005         btrfs_release_path(&path);
10006
10007         /* Search from the block group bytenr */
10008         extent_key.objectid = bg_key.objectid;
10009         extent_key.type = 0;
10010         extent_key.offset = 0;
10011
10012         btrfs_init_path(&path);
10013         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10014         if (ret < 0)
10015                 goto out;
10016
10017         /* Iterate extent tree to account used space */
10018         while (1) {
10019                 leaf = path.nodes[0];
10020                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10021                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10022                         break;
10023
10024                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10025                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10026                         goto next;
10027                 if (extent_key.objectid < bg_key.objectid)
10028                         goto next;
10029
10030                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10031                         total += nodesize;
10032                 else
10033                         total += extent_key.offset;
10034
10035                 ei = btrfs_item_ptr(leaf, path.slots[0],
10036                                     struct btrfs_extent_item);
10037                 flags = btrfs_extent_flags(leaf, ei);
10038                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10039                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10040                                 error(
10041                         "bad extent[%llu, %llu) type mismatch with chunk",
10042                                         extent_key.objectid,
10043                                         extent_key.objectid + extent_key.offset);
10044                                 err |= CHUNK_TYPE_MISMATCH;
10045                         }
10046                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10047                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10048                                     BTRFS_BLOCK_GROUP_METADATA))) {
10049                                 error(
10050                         "bad extent[%llu, %llu) type mismatch with chunk",
10051                                         extent_key.objectid,
10052                                         extent_key.objectid + nodesize);
10053                                 err |= CHUNK_TYPE_MISMATCH;
10054                         }
10055                 }
10056 next:
10057                 ret = btrfs_next_item(extent_root, &path);
10058                 if (ret)
10059                         break;
10060         }
10061
10062 out:
10063         btrfs_release_path(&path);
10064
10065         if (total != used) {
10066                 error(
10067                 "block group[%llu %llu] used %llu but extent items used %llu",
10068                         bg_key.objectid, bg_key.offset, used, total);
10069                 err |= ACCOUNTING_MISMATCH;
10070         }
10071         return err;
10072 }
10073
10074 /*
10075  * Check a chunk item.
10076  * Including checking all referred dev_extents and block group
10077  */
10078 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10079                             struct extent_buffer *eb, int slot)
10080 {
10081         struct btrfs_root *extent_root = fs_info->extent_root;
10082         struct btrfs_root *dev_root = fs_info->dev_root;
10083         struct btrfs_path path;
10084         struct btrfs_key chunk_key;
10085         struct btrfs_key bg_key;
10086         struct btrfs_key devext_key;
10087         struct btrfs_chunk *chunk;
10088         struct extent_buffer *leaf;
10089         struct btrfs_block_group_item *bi;
10090         struct btrfs_block_group_item bg_item;
10091         struct btrfs_dev_extent *ptr;
10092         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10093         u64 length;
10094         u64 chunk_end;
10095         u64 type;
10096         u64 profile;
10097         int num_stripes;
10098         u64 offset;
10099         u64 objectid;
10100         int i;
10101         int ret;
10102         int err = 0;
10103
10104         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10105         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10106         length = btrfs_chunk_length(eb, chunk);
10107         chunk_end = chunk_key.offset + length;
10108         if (!IS_ALIGNED(length, sectorsize)) {
10109                 error("chunk[%llu %llu) not aligned to %u",
10110                         chunk_key.offset, chunk_end, sectorsize);
10111                 err |= BYTES_UNALIGNED;
10112                 goto out;
10113         }
10114
10115         type = btrfs_chunk_type(eb, chunk);
10116         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10117         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10118                 error("chunk[%llu %llu) has no chunk type",
10119                         chunk_key.offset, chunk_end);
10120                 err |= UNKNOWN_TYPE;
10121         }
10122         if (profile && (profile & (profile - 1))) {
10123                 error("chunk[%llu %llu) multiple profiles detected: %llx",
10124                         chunk_key.offset, chunk_end, profile);
10125                 err |= UNKNOWN_TYPE;
10126         }
10127
10128         bg_key.objectid = chunk_key.offset;
10129         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10130         bg_key.offset = length;
10131
10132         btrfs_init_path(&path);
10133         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10134         if (ret) {
10135                 error(
10136                 "chunk[%llu %llu) did not find the related block group item",
10137                         chunk_key.offset, chunk_end);
10138                 err |= REFERENCER_MISSING;
10139         } else{
10140                 leaf = path.nodes[0];
10141                 bi = btrfs_item_ptr(leaf, path.slots[0],
10142                                     struct btrfs_block_group_item);
10143                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10144                                    sizeof(bg_item));
10145                 if (btrfs_block_group_flags(&bg_item) != type) {
10146                         error(
10147 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10148                                 chunk_key.offset, chunk_end, type,
10149                                 btrfs_block_group_flags(&bg_item));
10150                         err |= REFERENCER_MISSING;
10151                 }
10152         }
10153
10154         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10155         for (i = 0; i < num_stripes; i++) {
10156                 btrfs_release_path(&path);
10157                 btrfs_init_path(&path);
10158                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10159                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10160                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10161
10162                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10163                                         0, 0);
10164                 if (ret)
10165                         goto not_match_dev;
10166
10167                 leaf = path.nodes[0];
10168                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10169                                      struct btrfs_dev_extent);
10170                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10171                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10172                 if (objectid != chunk_key.objectid ||
10173                     offset != chunk_key.offset ||
10174                     btrfs_dev_extent_length(leaf, ptr) != length)
10175                         goto not_match_dev;
10176                 continue;
10177 not_match_dev:
10178                 err |= BACKREF_MISSING;
10179                 error(
10180                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10181                         chunk_key.objectid, chunk_end, i);
10182                 continue;
10183         }
10184         btrfs_release_path(&path);
10185 out:
10186         return err;
10187 }
10188
10189 /*
10190  * Main entry function to check known items and update related accounting info
10191  */
10192 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10193 {
10194         struct btrfs_fs_info *fs_info = root->fs_info;
10195         struct btrfs_key key;
10196         int slot = 0;
10197         int type;
10198         struct btrfs_extent_data_ref *dref;
10199         int ret;
10200         int err = 0;
10201
10202 next:
10203         btrfs_item_key_to_cpu(eb, &key, slot);
10204         type = key.type;
10205
10206         switch (type) {
10207         case BTRFS_EXTENT_DATA_KEY:
10208                 ret = check_extent_data_item(root, eb, slot);
10209                 err |= ret;
10210                 break;
10211         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10212                 ret = check_block_group_item(fs_info, eb, slot);
10213                 err |= ret;
10214                 break;
10215         case BTRFS_DEV_ITEM_KEY:
10216                 ret = check_dev_item(fs_info, eb, slot);
10217                 err |= ret;
10218                 break;
10219         case BTRFS_CHUNK_ITEM_KEY:
10220                 ret = check_chunk_item(fs_info, eb, slot);
10221                 err |= ret;
10222                 break;
10223         case BTRFS_DEV_EXTENT_KEY:
10224                 ret = check_dev_extent_item(fs_info, eb, slot);
10225                 err |= ret;
10226                 break;
10227         case BTRFS_EXTENT_ITEM_KEY:
10228         case BTRFS_METADATA_ITEM_KEY:
10229                 ret = check_extent_item(fs_info, eb, slot);
10230                 err |= ret;
10231                 break;
10232         case BTRFS_EXTENT_CSUM_KEY:
10233                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10234                 break;
10235         case BTRFS_TREE_BLOCK_REF_KEY:
10236                 ret = check_tree_block_backref(fs_info, key.offset,
10237                                                key.objectid, -1);
10238                 err |= ret;
10239                 break;
10240         case BTRFS_EXTENT_DATA_REF_KEY:
10241                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10242                 ret = check_extent_data_backref(fs_info,
10243                                 btrfs_extent_data_ref_root(eb, dref),
10244                                 btrfs_extent_data_ref_objectid(eb, dref),
10245                                 btrfs_extent_data_ref_offset(eb, dref),
10246                                 key.objectid, 0,
10247                                 btrfs_extent_data_ref_count(eb, dref));
10248                 err |= ret;
10249                 break;
10250         case BTRFS_SHARED_BLOCK_REF_KEY:
10251                 ret = check_shared_block_backref(fs_info, key.offset,
10252                                                  key.objectid, -1);
10253                 err |= ret;
10254                 break;
10255         case BTRFS_SHARED_DATA_REF_KEY:
10256                 ret = check_shared_data_backref(fs_info, key.offset,
10257                                                 key.objectid);
10258                 err |= ret;
10259                 break;
10260         default:
10261                 break;
10262         }
10263
10264         if (++slot < btrfs_header_nritems(eb))
10265                 goto next;
10266
10267         return err;
10268 }
10269
10270 /*
10271  * Helper function for later fs/subvol tree check.  To determine if a tree
10272  * block should be checked.
10273  * This function will ensure only the direct referencer with lowest rootid to
10274  * check a fs/subvolume tree block.
10275  *
10276  * Backref check at extent tree would detect errors like missing subvolume
10277  * tree, so we can do aggressive check to reduce duplicated checks.
10278  */
10279 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10280 {
10281         struct btrfs_root *extent_root = root->fs_info->extent_root;
10282         struct btrfs_key key;
10283         struct btrfs_path path;
10284         struct extent_buffer *leaf;
10285         int slot;
10286         struct btrfs_extent_item *ei;
10287         unsigned long ptr;
10288         unsigned long end;
10289         int type;
10290         u32 item_size;
10291         u64 offset;
10292         struct btrfs_extent_inline_ref *iref;
10293         int ret;
10294
10295         btrfs_init_path(&path);
10296         key.objectid = btrfs_header_bytenr(eb);
10297         key.type = BTRFS_METADATA_ITEM_KEY;
10298         key.offset = (u64)-1;
10299
10300         /*
10301          * Any failure in backref resolving means we can't determine
10302          * whom the tree block belongs to.
10303          * So in that case, we need to check that tree block
10304          */
10305         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10306         if (ret < 0)
10307                 goto need_check;
10308
10309         ret = btrfs_previous_extent_item(extent_root, &path,
10310                                          btrfs_header_bytenr(eb));
10311         if (ret)
10312                 goto need_check;
10313
10314         leaf = path.nodes[0];
10315         slot = path.slots[0];
10316         btrfs_item_key_to_cpu(leaf, &key, slot);
10317         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10318
10319         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10320                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10321         } else {
10322                 struct btrfs_tree_block_info *info;
10323
10324                 info = (struct btrfs_tree_block_info *)(ei + 1);
10325                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10326         }
10327
10328         item_size = btrfs_item_size_nr(leaf, slot);
10329         ptr = (unsigned long)iref;
10330         end = (unsigned long)ei + item_size;
10331         while (ptr < end) {
10332                 iref = (struct btrfs_extent_inline_ref *)ptr;
10333                 type = btrfs_extent_inline_ref_type(leaf, iref);
10334                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10335
10336                 /*
10337                  * We only check the tree block if current root is
10338                  * the lowest referencer of it.
10339                  */
10340                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10341                     offset < root->objectid) {
10342                         btrfs_release_path(&path);
10343                         return 0;
10344                 }
10345
10346                 ptr += btrfs_extent_inline_ref_size(type);
10347         }
10348         /*
10349          * Normally we should also check keyed tree block ref, but that may be
10350          * very time consuming.  Inlined ref should already make us skip a lot
10351          * of refs now.  So skip search keyed tree block ref.
10352          */
10353
10354 need_check:
10355         btrfs_release_path(&path);
10356         return 1;
10357 }
10358
10359 /*
10360  * Traversal function for tree block. We will do:
10361  * 1) Skip shared fs/subvolume tree blocks
10362  * 2) Update related bytes accounting
10363  * 3) Pre-order traversal
10364  */
10365 static int traverse_tree_block(struct btrfs_root *root,
10366                                 struct extent_buffer *node)
10367 {
10368         struct extent_buffer *eb;
10369         struct btrfs_key key;
10370         struct btrfs_key drop_key;
10371         int level;
10372         u64 nr;
10373         int i;
10374         int err = 0;
10375         int ret;
10376
10377         /*
10378          * Skip shared fs/subvolume tree block, in that case they will
10379          * be checked by referencer with lowest rootid
10380          */
10381         if (is_fstree(root->objectid) && !should_check(root, node))
10382                 return 0;
10383
10384         /* Update bytes accounting */
10385         total_btree_bytes += node->len;
10386         if (fs_root_objectid(btrfs_header_owner(node)))
10387                 total_fs_tree_bytes += node->len;
10388         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10389                 total_extent_tree_bytes += node->len;
10390         if (!found_old_backref &&
10391             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10392             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10393             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10394                 found_old_backref = 1;
10395
10396         /* pre-order tranversal, check itself first */
10397         level = btrfs_header_level(node);
10398         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10399                                    btrfs_header_level(node),
10400                                    btrfs_header_owner(node));
10401         err |= ret;
10402         if (err)
10403                 error(
10404         "check %s failed root %llu bytenr %llu level %d, force continue check",
10405                         level ? "node":"leaf", root->objectid,
10406                         btrfs_header_bytenr(node), btrfs_header_level(node));
10407
10408         if (!level) {
10409                 btree_space_waste += btrfs_leaf_free_space(root, node);
10410                 ret = check_leaf_items(root, node);
10411                 err |= ret;
10412                 return err;
10413         }
10414
10415         nr = btrfs_header_nritems(node);
10416         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10417         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10418                 sizeof(struct btrfs_key_ptr);
10419
10420         /* Then check all its children */
10421         for (i = 0; i < nr; i++) {
10422                 u64 blocknr = btrfs_node_blockptr(node, i);
10423
10424                 btrfs_node_key_to_cpu(node, &key, i);
10425                 if (level == root->root_item.drop_level &&
10426                     is_dropped_key(&key, &drop_key))
10427                         continue;
10428
10429                 /*
10430                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10431                  * to call the function itself.
10432                  */
10433                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10434                 if (extent_buffer_uptodate(eb)) {
10435                         ret = traverse_tree_block(root, eb);
10436                         err |= ret;
10437                 }
10438                 free_extent_buffer(eb);
10439         }
10440
10441         return err;
10442 }
10443
10444 /*
10445  * Low memory usage version check_chunks_and_extents.
10446  */
10447 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10448 {
10449         struct btrfs_path path;
10450         struct btrfs_key key;
10451         struct btrfs_root *root1;
10452         struct btrfs_root *cur_root;
10453         int err = 0;
10454         int ret;
10455
10456         root1 = root->fs_info->chunk_root;
10457         ret = traverse_tree_block(root1, root1->node);
10458         err |= ret;
10459
10460         root1 = root->fs_info->tree_root;
10461         ret = traverse_tree_block(root1, root1->node);
10462         err |= ret;
10463
10464         btrfs_init_path(&path);
10465         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10466         key.offset = 0;
10467         key.type = BTRFS_ROOT_ITEM_KEY;
10468
10469         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10470         if (ret) {
10471                 error("cannot find extent treet in tree_root");
10472                 goto out;
10473         }
10474
10475         while (1) {
10476                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10477                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10478                         goto next;
10479                 key.offset = (u64)-1;
10480
10481                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10482                 if (IS_ERR(cur_root) || !cur_root) {
10483                         error("failed to read tree: %lld", key.objectid);
10484                         goto next;
10485                 }
10486
10487                 ret = traverse_tree_block(cur_root, cur_root->node);
10488                 err |= ret;
10489
10490 next:
10491                 ret = btrfs_next_item(root1, &path);
10492                 if (ret)
10493                         goto out;
10494         }
10495
10496 out:
10497         btrfs_release_path(&path);
10498         return err;
10499 }
10500
10501 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10502                            struct btrfs_root *root, int overwrite)
10503 {
10504         struct extent_buffer *c;
10505         struct extent_buffer *old = root->node;
10506         int level;
10507         int ret;
10508         struct btrfs_disk_key disk_key = {0,0,0};
10509
10510         level = 0;
10511
10512         if (overwrite) {
10513                 c = old;
10514                 extent_buffer_get(c);
10515                 goto init;
10516         }
10517         c = btrfs_alloc_free_block(trans, root,
10518                                    root->nodesize,
10519                                    root->root_key.objectid,
10520                                    &disk_key, level, 0, 0);
10521         if (IS_ERR(c)) {
10522                 c = old;
10523                 extent_buffer_get(c);
10524                 overwrite = 1;
10525         }
10526 init:
10527         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10528         btrfs_set_header_level(c, level);
10529         btrfs_set_header_bytenr(c, c->start);
10530         btrfs_set_header_generation(c, trans->transid);
10531         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10532         btrfs_set_header_owner(c, root->root_key.objectid);
10533
10534         write_extent_buffer(c, root->fs_info->fsid,
10535                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10536
10537         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10538                             btrfs_header_chunk_tree_uuid(c),
10539                             BTRFS_UUID_SIZE);
10540
10541         btrfs_mark_buffer_dirty(c);
10542         /*
10543          * this case can happen in the following case:
10544          *
10545          * 1.overwrite previous root.
10546          *
10547          * 2.reinit reloc data root, this is because we skip pin
10548          * down reloc data tree before which means we can allocate
10549          * same block bytenr here.
10550          */
10551         if (old->start == c->start) {
10552                 btrfs_set_root_generation(&root->root_item,
10553                                           trans->transid);
10554                 root->root_item.level = btrfs_header_level(root->node);
10555                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10556                                         &root->root_key, &root->root_item);
10557                 if (ret) {
10558                         free_extent_buffer(c);
10559                         return ret;
10560                 }
10561         }
10562         free_extent_buffer(old);
10563         root->node = c;
10564         add_root_to_dirty_list(root);
10565         return 0;
10566 }
10567
10568 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10569                                 struct extent_buffer *eb, int tree_root)
10570 {
10571         struct extent_buffer *tmp;
10572         struct btrfs_root_item *ri;
10573         struct btrfs_key key;
10574         u64 bytenr;
10575         u32 nodesize;
10576         int level = btrfs_header_level(eb);
10577         int nritems;
10578         int ret;
10579         int i;
10580
10581         /*
10582          * If we have pinned this block before, don't pin it again.
10583          * This can not only avoid forever loop with broken filesystem
10584          * but also give us some speedups.
10585          */
10586         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10587                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10588                 return 0;
10589
10590         btrfs_pin_extent(fs_info, eb->start, eb->len);
10591
10592         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10593         nritems = btrfs_header_nritems(eb);
10594         for (i = 0; i < nritems; i++) {
10595                 if (level == 0) {
10596                         btrfs_item_key_to_cpu(eb, &key, i);
10597                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10598                                 continue;
10599                         /* Skip the extent root and reloc roots */
10600                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10601                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10602                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10603                                 continue;
10604                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10605                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10606
10607                         /*
10608                          * If at any point we start needing the real root we
10609                          * will have to build a stump root for the root we are
10610                          * in, but for now this doesn't actually use the root so
10611                          * just pass in extent_root.
10612                          */
10613                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10614                                               nodesize, 0);
10615                         if (!extent_buffer_uptodate(tmp)) {
10616                                 fprintf(stderr, "Error reading root block\n");
10617                                 return -EIO;
10618                         }
10619                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10620                         free_extent_buffer(tmp);
10621                         if (ret)
10622                                 return ret;
10623                 } else {
10624                         bytenr = btrfs_node_blockptr(eb, i);
10625
10626                         /* If we aren't the tree root don't read the block */
10627                         if (level == 1 && !tree_root) {
10628                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10629                                 continue;
10630                         }
10631
10632                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10633                                               nodesize, 0);
10634                         if (!extent_buffer_uptodate(tmp)) {
10635                                 fprintf(stderr, "Error reading tree block\n");
10636                                 return -EIO;
10637                         }
10638                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10639                         free_extent_buffer(tmp);
10640                         if (ret)
10641                                 return ret;
10642                 }
10643         }
10644
10645         return 0;
10646 }
10647
10648 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10649 {
10650         int ret;
10651
10652         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10653         if (ret)
10654                 return ret;
10655
10656         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10657 }
10658
10659 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10660 {
10661         struct btrfs_block_group_cache *cache;
10662         struct btrfs_path path;
10663         struct extent_buffer *leaf;
10664         struct btrfs_chunk *chunk;
10665         struct btrfs_key key;
10666         int ret;
10667         u64 start;
10668
10669         btrfs_init_path(&path);
10670         key.objectid = 0;
10671         key.type = BTRFS_CHUNK_ITEM_KEY;
10672         key.offset = 0;
10673         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10674         if (ret < 0) {
10675                 btrfs_release_path(&path);
10676                 return ret;
10677         }
10678
10679         /*
10680          * We do this in case the block groups were screwed up and had alloc
10681          * bits that aren't actually set on the chunks.  This happens with
10682          * restored images every time and could happen in real life I guess.
10683          */
10684         fs_info->avail_data_alloc_bits = 0;
10685         fs_info->avail_metadata_alloc_bits = 0;
10686         fs_info->avail_system_alloc_bits = 0;
10687
10688         /* First we need to create the in-memory block groups */
10689         while (1) {
10690                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10691                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10692                         if (ret < 0) {
10693                                 btrfs_release_path(&path);
10694                                 return ret;
10695                         }
10696                         if (ret) {
10697                                 ret = 0;
10698                                 break;
10699                         }
10700                 }
10701                 leaf = path.nodes[0];
10702                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10703                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10704                         path.slots[0]++;
10705                         continue;
10706                 }
10707
10708                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10709                 btrfs_add_block_group(fs_info, 0,
10710                                       btrfs_chunk_type(leaf, chunk),
10711                                       key.objectid, key.offset,
10712                                       btrfs_chunk_length(leaf, chunk));
10713                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10714                                  key.offset + btrfs_chunk_length(leaf, chunk),
10715                                  GFP_NOFS);
10716                 path.slots[0]++;
10717         }
10718         start = 0;
10719         while (1) {
10720                 cache = btrfs_lookup_first_block_group(fs_info, start);
10721                 if (!cache)
10722                         break;
10723                 cache->cached = 1;
10724                 start = cache->key.objectid + cache->key.offset;
10725         }
10726
10727         btrfs_release_path(&path);
10728         return 0;
10729 }
10730
10731 static int reset_balance(struct btrfs_trans_handle *trans,
10732                          struct btrfs_fs_info *fs_info)
10733 {
10734         struct btrfs_root *root = fs_info->tree_root;
10735         struct btrfs_path path;
10736         struct extent_buffer *leaf;
10737         struct btrfs_key key;
10738         int del_slot, del_nr = 0;
10739         int ret;
10740         int found = 0;
10741
10742         btrfs_init_path(&path);
10743         key.objectid = BTRFS_BALANCE_OBJECTID;
10744         key.type = BTRFS_BALANCE_ITEM_KEY;
10745         key.offset = 0;
10746         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10747         if (ret) {
10748                 if (ret > 0)
10749                         ret = 0;
10750                 if (!ret)
10751                         goto reinit_data_reloc;
10752                 else
10753                         goto out;
10754         }
10755
10756         ret = btrfs_del_item(trans, root, &path);
10757         if (ret)
10758                 goto out;
10759         btrfs_release_path(&path);
10760
10761         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10762         key.type = BTRFS_ROOT_ITEM_KEY;
10763         key.offset = 0;
10764         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10765         if (ret < 0)
10766                 goto out;
10767         while (1) {
10768                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10769                         if (!found)
10770                                 break;
10771
10772                         if (del_nr) {
10773                                 ret = btrfs_del_items(trans, root, &path,
10774                                                       del_slot, del_nr);
10775                                 del_nr = 0;
10776                                 if (ret)
10777                                         goto out;
10778                         }
10779                         key.offset++;
10780                         btrfs_release_path(&path);
10781
10782                         found = 0;
10783                         ret = btrfs_search_slot(trans, root, &key, &path,
10784                                                 -1, 1);
10785                         if (ret < 0)
10786                                 goto out;
10787                         continue;
10788                 }
10789                 found = 1;
10790                 leaf = path.nodes[0];
10791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10792                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10793                         break;
10794                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10795                         path.slots[0]++;
10796                         continue;
10797                 }
10798                 if (!del_nr) {
10799                         del_slot = path.slots[0];
10800                         del_nr = 1;
10801                 } else {
10802                         del_nr++;
10803                 }
10804                 path.slots[0]++;
10805         }
10806
10807         if (del_nr) {
10808                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10809                 if (ret)
10810                         goto out;
10811         }
10812         btrfs_release_path(&path);
10813
10814 reinit_data_reloc:
10815         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10816         key.type = BTRFS_ROOT_ITEM_KEY;
10817         key.offset = (u64)-1;
10818         root = btrfs_read_fs_root(fs_info, &key);
10819         if (IS_ERR(root)) {
10820                 fprintf(stderr, "Error reading data reloc tree\n");
10821                 ret = PTR_ERR(root);
10822                 goto out;
10823         }
10824         record_root_in_trans(trans, root);
10825         ret = btrfs_fsck_reinit_root(trans, root, 0);
10826         if (ret)
10827                 goto out;
10828         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10829 out:
10830         btrfs_release_path(&path);
10831         return ret;
10832 }
10833
10834 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10835                               struct btrfs_fs_info *fs_info)
10836 {
10837         u64 start = 0;
10838         int ret;
10839
10840         /*
10841          * The only reason we don't do this is because right now we're just
10842          * walking the trees we find and pinning down their bytes, we don't look
10843          * at any of the leaves.  In order to do mixed groups we'd have to check
10844          * the leaves of any fs roots and pin down the bytes for any file
10845          * extents we find.  Not hard but why do it if we don't have to?
10846          */
10847         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10848                 fprintf(stderr, "We don't support re-initing the extent tree "
10849                         "for mixed block groups yet, please notify a btrfs "
10850                         "developer you want to do this so they can add this "
10851                         "functionality.\n");
10852                 return -EINVAL;
10853         }
10854
10855         /*
10856          * first we need to walk all of the trees except the extent tree and pin
10857          * down the bytes that are in use so we don't overwrite any existing
10858          * metadata.
10859          */
10860         ret = pin_metadata_blocks(fs_info);
10861         if (ret) {
10862                 fprintf(stderr, "error pinning down used bytes\n");
10863                 return ret;
10864         }
10865
10866         /*
10867          * Need to drop all the block groups since we're going to recreate all
10868          * of them again.
10869          */
10870         btrfs_free_block_groups(fs_info);
10871         ret = reset_block_groups(fs_info);
10872         if (ret) {
10873                 fprintf(stderr, "error resetting the block groups\n");
10874                 return ret;
10875         }
10876
10877         /* Ok we can allocate now, reinit the extent root */
10878         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10879         if (ret) {
10880                 fprintf(stderr, "extent root initialization failed\n");
10881                 /*
10882                  * When the transaction code is updated we should end the
10883                  * transaction, but for now progs only knows about commit so
10884                  * just return an error.
10885                  */
10886                 return ret;
10887         }
10888
10889         /*
10890          * Now we have all the in-memory block groups setup so we can make
10891          * allocations properly, and the metadata we care about is safe since we
10892          * pinned all of it above.
10893          */
10894         while (1) {
10895                 struct btrfs_block_group_cache *cache;
10896
10897                 cache = btrfs_lookup_first_block_group(fs_info, start);
10898                 if (!cache)
10899                         break;
10900                 start = cache->key.objectid + cache->key.offset;
10901                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10902                                         &cache->key, &cache->item,
10903                                         sizeof(cache->item));
10904                 if (ret) {
10905                         fprintf(stderr, "Error adding block group\n");
10906                         return ret;
10907                 }
10908                 btrfs_extent_post_op(trans, fs_info->extent_root);
10909         }
10910
10911         ret = reset_balance(trans, fs_info);
10912         if (ret)
10913                 fprintf(stderr, "error resetting the pending balance\n");
10914
10915         return ret;
10916 }
10917
10918 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10919 {
10920         struct btrfs_path path;
10921         struct btrfs_trans_handle *trans;
10922         struct btrfs_key key;
10923         int ret;
10924
10925         printf("Recowing metadata block %llu\n", eb->start);
10926         key.objectid = btrfs_header_owner(eb);
10927         key.type = BTRFS_ROOT_ITEM_KEY;
10928         key.offset = (u64)-1;
10929
10930         root = btrfs_read_fs_root(root->fs_info, &key);
10931         if (IS_ERR(root)) {
10932                 fprintf(stderr, "Couldn't find owner root %llu\n",
10933                         key.objectid);
10934                 return PTR_ERR(root);
10935         }
10936
10937         trans = btrfs_start_transaction(root, 1);
10938         if (IS_ERR(trans))
10939                 return PTR_ERR(trans);
10940
10941         btrfs_init_path(&path);
10942         path.lowest_level = btrfs_header_level(eb);
10943         if (path.lowest_level)
10944                 btrfs_node_key_to_cpu(eb, &key, 0);
10945         else
10946                 btrfs_item_key_to_cpu(eb, &key, 0);
10947
10948         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10949         btrfs_commit_transaction(trans, root);
10950         btrfs_release_path(&path);
10951         return ret;
10952 }
10953
10954 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10955 {
10956         struct btrfs_path path;
10957         struct btrfs_trans_handle *trans;
10958         struct btrfs_key key;
10959         int ret;
10960
10961         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10962                bad->key.type, bad->key.offset);
10963         key.objectid = bad->root_id;
10964         key.type = BTRFS_ROOT_ITEM_KEY;
10965         key.offset = (u64)-1;
10966
10967         root = btrfs_read_fs_root(root->fs_info, &key);
10968         if (IS_ERR(root)) {
10969                 fprintf(stderr, "Couldn't find owner root %llu\n",
10970                         key.objectid);
10971                 return PTR_ERR(root);
10972         }
10973
10974         trans = btrfs_start_transaction(root, 1);
10975         if (IS_ERR(trans))
10976                 return PTR_ERR(trans);
10977
10978         btrfs_init_path(&path);
10979         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10980         if (ret) {
10981                 if (ret > 0)
10982                         ret = 0;
10983                 goto out;
10984         }
10985         ret = btrfs_del_item(trans, root, &path);
10986 out:
10987         btrfs_commit_transaction(trans, root);
10988         btrfs_release_path(&path);
10989         return ret;
10990 }
10991
10992 static int zero_log_tree(struct btrfs_root *root)
10993 {
10994         struct btrfs_trans_handle *trans;
10995         int ret;
10996
10997         trans = btrfs_start_transaction(root, 1);
10998         if (IS_ERR(trans)) {
10999                 ret = PTR_ERR(trans);
11000                 return ret;
11001         }
11002         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11003         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11004         ret = btrfs_commit_transaction(trans, root);
11005         return ret;
11006 }
11007
11008 static int populate_csum(struct btrfs_trans_handle *trans,
11009                          struct btrfs_root *csum_root, char *buf, u64 start,
11010                          u64 len)
11011 {
11012         u64 offset = 0;
11013         u64 sectorsize;
11014         int ret = 0;
11015
11016         while (offset < len) {
11017                 sectorsize = csum_root->sectorsize;
11018                 ret = read_extent_data(csum_root, buf, start + offset,
11019                                        &sectorsize, 0);
11020                 if (ret)
11021                         break;
11022                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11023                                             start + offset, buf, sectorsize);
11024                 if (ret)
11025                         break;
11026                 offset += sectorsize;
11027         }
11028         return ret;
11029 }
11030
11031 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11032                                       struct btrfs_root *csum_root,
11033                                       struct btrfs_root *cur_root)
11034 {
11035         struct btrfs_path path;
11036         struct btrfs_key key;
11037         struct extent_buffer *node;
11038         struct btrfs_file_extent_item *fi;
11039         char *buf = NULL;
11040         u64 start = 0;
11041         u64 len = 0;
11042         int slot = 0;
11043         int ret = 0;
11044
11045         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11046         if (!buf)
11047                 return -ENOMEM;
11048
11049         btrfs_init_path(&path);
11050         key.objectid = 0;
11051         key.offset = 0;
11052         key.type = 0;
11053         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11054         if (ret < 0)
11055                 goto out;
11056         /* Iterate all regular file extents and fill its csum */
11057         while (1) {
11058                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11059
11060                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11061                         goto next;
11062                 node = path.nodes[0];
11063                 slot = path.slots[0];
11064                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11065                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11066                         goto next;
11067                 start = btrfs_file_extent_disk_bytenr(node, fi);
11068                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11069
11070                 ret = populate_csum(trans, csum_root, buf, start, len);
11071                 if (ret == -EEXIST)
11072                         ret = 0;
11073                 if (ret < 0)
11074                         goto out;
11075 next:
11076                 /*
11077                  * TODO: if next leaf is corrupted, jump to nearest next valid
11078                  * leaf.
11079                  */
11080                 ret = btrfs_next_item(cur_root, &path);
11081                 if (ret < 0)
11082                         goto out;
11083                 if (ret > 0) {
11084                         ret = 0;
11085                         goto out;
11086                 }
11087         }
11088
11089 out:
11090         btrfs_release_path(&path);
11091         free(buf);
11092         return ret;
11093 }
11094
11095 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11096                                   struct btrfs_root *csum_root)
11097 {
11098         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11099         struct btrfs_path path;
11100         struct btrfs_root *tree_root = fs_info->tree_root;
11101         struct btrfs_root *cur_root;
11102         struct extent_buffer *node;
11103         struct btrfs_key key;
11104         int slot = 0;
11105         int ret = 0;
11106
11107         btrfs_init_path(&path);
11108         key.objectid = BTRFS_FS_TREE_OBJECTID;
11109         key.offset = 0;
11110         key.type = BTRFS_ROOT_ITEM_KEY;
11111         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11112         if (ret < 0)
11113                 goto out;
11114         if (ret > 0) {
11115                 ret = -ENOENT;
11116                 goto out;
11117         }
11118
11119         while (1) {
11120                 node = path.nodes[0];
11121                 slot = path.slots[0];
11122                 btrfs_item_key_to_cpu(node, &key, slot);
11123                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11124                         goto out;
11125                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11126                         goto next;
11127                 if (!is_fstree(key.objectid))
11128                         goto next;
11129                 key.offset = (u64)-1;
11130
11131                 cur_root = btrfs_read_fs_root(fs_info, &key);
11132                 if (IS_ERR(cur_root) || !cur_root) {
11133                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11134                                 key.objectid);
11135                         goto out;
11136                 }
11137                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11138                                 cur_root);
11139                 if (ret < 0)
11140                         goto out;
11141 next:
11142                 ret = btrfs_next_item(tree_root, &path);
11143                 if (ret > 0) {
11144                         ret = 0;
11145                         goto out;
11146                 }
11147                 if (ret < 0)
11148                         goto out;
11149         }
11150
11151 out:
11152         btrfs_release_path(&path);
11153         return ret;
11154 }
11155
11156 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11157                                       struct btrfs_root *csum_root)
11158 {
11159         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11160         struct btrfs_path path;
11161         struct btrfs_extent_item *ei;
11162         struct extent_buffer *leaf;
11163         char *buf;
11164         struct btrfs_key key;
11165         int ret;
11166
11167         btrfs_init_path(&path);
11168         key.objectid = 0;
11169         key.type = BTRFS_EXTENT_ITEM_KEY;
11170         key.offset = 0;
11171         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11172         if (ret < 0) {
11173                 btrfs_release_path(&path);
11174                 return ret;
11175         }
11176
11177         buf = malloc(csum_root->sectorsize);
11178         if (!buf) {
11179                 btrfs_release_path(&path);
11180                 return -ENOMEM;
11181         }
11182
11183         while (1) {
11184                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11185                         ret = btrfs_next_leaf(extent_root, &path);
11186                         if (ret < 0)
11187                                 break;
11188                         if (ret) {
11189                                 ret = 0;
11190                                 break;
11191                         }
11192                 }
11193                 leaf = path.nodes[0];
11194
11195                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11196                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11197                         path.slots[0]++;
11198                         continue;
11199                 }
11200
11201                 ei = btrfs_item_ptr(leaf, path.slots[0],
11202                                     struct btrfs_extent_item);
11203                 if (!(btrfs_extent_flags(leaf, ei) &
11204                       BTRFS_EXTENT_FLAG_DATA)) {
11205                         path.slots[0]++;
11206                         continue;
11207                 }
11208
11209                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11210                                     key.offset);
11211                 if (ret)
11212                         break;
11213                 path.slots[0]++;
11214         }
11215
11216         btrfs_release_path(&path);
11217         free(buf);
11218         return ret;
11219 }
11220
11221 /*
11222  * Recalculate the csum and put it into the csum tree.
11223  *
11224  * Extent tree init will wipe out all the extent info, so in that case, we
11225  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11226  * will use fs/subvol trees to init the csum tree.
11227  */
11228 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11229                           struct btrfs_root *csum_root,
11230                           int search_fs_tree)
11231 {
11232         if (search_fs_tree)
11233                 return fill_csum_tree_from_fs(trans, csum_root);
11234         else
11235                 return fill_csum_tree_from_extent(trans, csum_root);
11236 }
11237
11238 static void free_roots_info_cache(void)
11239 {
11240         if (!roots_info_cache)
11241                 return;
11242
11243         while (!cache_tree_empty(roots_info_cache)) {
11244                 struct cache_extent *entry;
11245                 struct root_item_info *rii;
11246
11247                 entry = first_cache_extent(roots_info_cache);
11248                 if (!entry)
11249                         break;
11250                 remove_cache_extent(roots_info_cache, entry);
11251                 rii = container_of(entry, struct root_item_info, cache_extent);
11252                 free(rii);
11253         }
11254
11255         free(roots_info_cache);
11256         roots_info_cache = NULL;
11257 }
11258
11259 static int build_roots_info_cache(struct btrfs_fs_info *info)
11260 {
11261         int ret = 0;
11262         struct btrfs_key key;
11263         struct extent_buffer *leaf;
11264         struct btrfs_path path;
11265
11266         if (!roots_info_cache) {
11267                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11268                 if (!roots_info_cache)
11269                         return -ENOMEM;
11270                 cache_tree_init(roots_info_cache);
11271         }
11272
11273         btrfs_init_path(&path);
11274         key.objectid = 0;
11275         key.type = BTRFS_EXTENT_ITEM_KEY;
11276         key.offset = 0;
11277         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11278         if (ret < 0)
11279                 goto out;
11280         leaf = path.nodes[0];
11281
11282         while (1) {
11283                 struct btrfs_key found_key;
11284                 struct btrfs_extent_item *ei;
11285                 struct btrfs_extent_inline_ref *iref;
11286                 int slot = path.slots[0];
11287                 int type;
11288                 u64 flags;
11289                 u64 root_id;
11290                 u8 level;
11291                 struct cache_extent *entry;
11292                 struct root_item_info *rii;
11293
11294                 if (slot >= btrfs_header_nritems(leaf)) {
11295                         ret = btrfs_next_leaf(info->extent_root, &path);
11296                         if (ret < 0) {
11297                                 break;
11298                         } else if (ret) {
11299                                 ret = 0;
11300                                 break;
11301                         }
11302                         leaf = path.nodes[0];
11303                         slot = path.slots[0];
11304                 }
11305
11306                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11307
11308                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11309                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11310                         goto next;
11311
11312                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11313                 flags = btrfs_extent_flags(leaf, ei);
11314
11315                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11316                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11317                         goto next;
11318
11319                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11320                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11321                         level = found_key.offset;
11322                 } else {
11323                         struct btrfs_tree_block_info *binfo;
11324
11325                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11326                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11327                         level = btrfs_tree_block_level(leaf, binfo);
11328                 }
11329
11330                 /*
11331                  * For a root extent, it must be of the following type and the
11332                  * first (and only one) iref in the item.
11333                  */
11334                 type = btrfs_extent_inline_ref_type(leaf, iref);
11335                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11336                         goto next;
11337
11338                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11339                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11340                 if (!entry) {
11341                         rii = malloc(sizeof(struct root_item_info));
11342                         if (!rii) {
11343                                 ret = -ENOMEM;
11344                                 goto out;
11345                         }
11346                         rii->cache_extent.start = root_id;
11347                         rii->cache_extent.size = 1;
11348                         rii->level = (u8)-1;
11349                         entry = &rii->cache_extent;
11350                         ret = insert_cache_extent(roots_info_cache, entry);
11351                         ASSERT(ret == 0);
11352                 } else {
11353                         rii = container_of(entry, struct root_item_info,
11354                                            cache_extent);
11355                 }
11356
11357                 ASSERT(rii->cache_extent.start == root_id);
11358                 ASSERT(rii->cache_extent.size == 1);
11359
11360                 if (level > rii->level || rii->level == (u8)-1) {
11361                         rii->level = level;
11362                         rii->bytenr = found_key.objectid;
11363                         rii->gen = btrfs_extent_generation(leaf, ei);
11364                         rii->node_count = 1;
11365                 } else if (level == rii->level) {
11366                         rii->node_count++;
11367                 }
11368 next:
11369                 path.slots[0]++;
11370         }
11371
11372 out:
11373         btrfs_release_path(&path);
11374
11375         return ret;
11376 }
11377
11378 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11379                                   struct btrfs_path *path,
11380                                   const struct btrfs_key *root_key,
11381                                   const int read_only_mode)
11382 {
11383         const u64 root_id = root_key->objectid;
11384         struct cache_extent *entry;
11385         struct root_item_info *rii;
11386         struct btrfs_root_item ri;
11387         unsigned long offset;
11388
11389         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11390         if (!entry) {
11391                 fprintf(stderr,
11392                         "Error: could not find extent items for root %llu\n",
11393                         root_key->objectid);
11394                 return -ENOENT;
11395         }
11396
11397         rii = container_of(entry, struct root_item_info, cache_extent);
11398         ASSERT(rii->cache_extent.start == root_id);
11399         ASSERT(rii->cache_extent.size == 1);
11400
11401         if (rii->node_count != 1) {
11402                 fprintf(stderr,
11403                         "Error: could not find btree root extent for root %llu\n",
11404                         root_id);
11405                 return -ENOENT;
11406         }
11407
11408         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11409         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11410
11411         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11412             btrfs_root_level(&ri) != rii->level ||
11413             btrfs_root_generation(&ri) != rii->gen) {
11414
11415                 /*
11416                  * If we're in repair mode but our caller told us to not update
11417                  * the root item, i.e. just check if it needs to be updated, don't
11418                  * print this message, since the caller will call us again shortly
11419                  * for the same root item without read only mode (the caller will
11420                  * open a transaction first).
11421                  */
11422                 if (!(read_only_mode && repair))
11423                         fprintf(stderr,
11424                                 "%sroot item for root %llu,"
11425                                 " current bytenr %llu, current gen %llu, current level %u,"
11426                                 " new bytenr %llu, new gen %llu, new level %u\n",
11427                                 (read_only_mode ? "" : "fixing "),
11428                                 root_id,
11429                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11430                                 btrfs_root_level(&ri),
11431                                 rii->bytenr, rii->gen, rii->level);
11432
11433                 if (btrfs_root_generation(&ri) > rii->gen) {
11434                         fprintf(stderr,
11435                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11436                                 root_id, btrfs_root_generation(&ri), rii->gen);
11437                         return -EINVAL;
11438                 }
11439
11440                 if (!read_only_mode) {
11441                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11442                         btrfs_set_root_level(&ri, rii->level);
11443                         btrfs_set_root_generation(&ri, rii->gen);
11444                         write_extent_buffer(path->nodes[0], &ri,
11445                                             offset, sizeof(ri));
11446                 }
11447
11448                 return 1;
11449         }
11450
11451         return 0;
11452 }
11453
11454 /*
11455  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11456  * caused read-only snapshots to be corrupted if they were created at a moment
11457  * when the source subvolume/snapshot had orphan items. The issue was that the
11458  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11459  * node instead of the post orphan cleanup root node.
11460  * So this function, and its callees, just detects and fixes those cases. Even
11461  * though the regression was for read-only snapshots, this function applies to
11462  * any snapshot/subvolume root.
11463  * This must be run before any other repair code - not doing it so, makes other
11464  * repair code delete or modify backrefs in the extent tree for example, which
11465  * will result in an inconsistent fs after repairing the root items.
11466  */
11467 static int repair_root_items(struct btrfs_fs_info *info)
11468 {
11469         struct btrfs_path path;
11470         struct btrfs_key key;
11471         struct extent_buffer *leaf;
11472         struct btrfs_trans_handle *trans = NULL;
11473         int ret = 0;
11474         int bad_roots = 0;
11475         int need_trans = 0;
11476
11477         btrfs_init_path(&path);
11478
11479         ret = build_roots_info_cache(info);
11480         if (ret)
11481                 goto out;
11482
11483         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11484         key.type = BTRFS_ROOT_ITEM_KEY;
11485         key.offset = 0;
11486
11487 again:
11488         /*
11489          * Avoid opening and committing transactions if a leaf doesn't have
11490          * any root items that need to be fixed, so that we avoid rotating
11491          * backup roots unnecessarily.
11492          */
11493         if (need_trans) {
11494                 trans = btrfs_start_transaction(info->tree_root, 1);
11495                 if (IS_ERR(trans)) {
11496                         ret = PTR_ERR(trans);
11497                         goto out;
11498                 }
11499         }
11500
11501         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11502                                 0, trans ? 1 : 0);
11503         if (ret < 0)
11504                 goto out;
11505         leaf = path.nodes[0];
11506
11507         while (1) {
11508                 struct btrfs_key found_key;
11509
11510                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11511                         int no_more_keys = find_next_key(&path, &key);
11512
11513                         btrfs_release_path(&path);
11514                         if (trans) {
11515                                 ret = btrfs_commit_transaction(trans,
11516                                                                info->tree_root);
11517                                 trans = NULL;
11518                                 if (ret < 0)
11519                                         goto out;
11520                         }
11521                         need_trans = 0;
11522                         if (no_more_keys)
11523                                 break;
11524                         goto again;
11525                 }
11526
11527                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11528
11529                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11530                         goto next;
11531                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11532                         goto next;
11533
11534                 ret = maybe_repair_root_item(info, &path, &found_key,
11535                                              trans ? 0 : 1);
11536                 if (ret < 0)
11537                         goto out;
11538                 if (ret) {
11539                         if (!trans && repair) {
11540                                 need_trans = 1;
11541                                 key = found_key;
11542                                 btrfs_release_path(&path);
11543                                 goto again;
11544                         }
11545                         bad_roots++;
11546                 }
11547 next:
11548                 path.slots[0]++;
11549         }
11550         ret = 0;
11551 out:
11552         free_roots_info_cache();
11553         btrfs_release_path(&path);
11554         if (trans)
11555                 btrfs_commit_transaction(trans, info->tree_root);
11556         if (ret < 0)
11557                 return ret;
11558
11559         return bad_roots;
11560 }
11561
11562 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11563 {
11564         struct btrfs_trans_handle *trans;
11565         struct btrfs_block_group_cache *bg_cache;
11566         u64 current = 0;
11567         int ret = 0;
11568
11569         /* Clear all free space cache inodes and its extent data */
11570         while (1) {
11571                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11572                 if (!bg_cache)
11573                         break;
11574                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11575                 if (ret < 0)
11576                         return ret;
11577                 current = bg_cache->key.objectid + bg_cache->key.offset;
11578         }
11579
11580         /* Don't forget to set cache_generation to -1 */
11581         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11582         if (IS_ERR(trans)) {
11583                 error("failed to update super block cache generation");
11584                 return PTR_ERR(trans);
11585         }
11586         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11587         btrfs_commit_transaction(trans, fs_info->tree_root);
11588
11589         return ret;
11590 }
11591
11592 const char * const cmd_check_usage[] = {
11593         "btrfs check [options] <device>",
11594         "Check structural integrity of a filesystem (unmounted).",
11595         "Check structural integrity of an unmounted filesystem. Verify internal",
11596         "trees' consistency and item connectivity. In the repair mode try to",
11597         "fix the problems found. ",
11598         "WARNING: the repair mode is considered dangerous",
11599         "",
11600         "-s|--super <superblock>     use this superblock copy",
11601         "-b|--backup                 use the first valid backup root copy",
11602         "--repair                    try to repair the filesystem",
11603         "--readonly                  run in read-only mode (default)",
11604         "--init-csum-tree            create a new CRC tree",
11605         "--init-extent-tree          create a new extent tree",
11606         "--mode <MODE>               allows choice of memory/IO trade-offs",
11607         "                            where MODE is one of:",
11608         "                            original - read inodes and extents to memory (requires",
11609         "                                       more memory, does less IO)",
11610         "                            lowmem   - try to use less memory but read blocks again",
11611         "                                       when needed",
11612         "--check-data-csum           verify checksums of data blocks",
11613         "-Q|--qgroup-report          print a report on qgroup consistency",
11614         "-E|--subvol-extents <subvolid>",
11615         "                            print subvolume extents and sharing state",
11616         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11617         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11618         "-p|--progress               indicate progress",
11619         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11620         NULL
11621 };
11622
11623 int cmd_check(int argc, char **argv)
11624 {
11625         struct cache_tree root_cache;
11626         struct btrfs_root *root;
11627         struct btrfs_fs_info *info;
11628         u64 bytenr = 0;
11629         u64 subvolid = 0;
11630         u64 tree_root_bytenr = 0;
11631         u64 chunk_root_bytenr = 0;
11632         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11633         int ret;
11634         u64 num;
11635         int init_csum_tree = 0;
11636         int readonly = 0;
11637         int clear_space_cache = 0;
11638         int qgroup_report = 0;
11639         int qgroups_repaired = 0;
11640         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11641
11642         while(1) {
11643                 int c;
11644                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11645                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11646                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11647                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11648                 static const struct option long_options[] = {
11649                         { "super", required_argument, NULL, 's' },
11650                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11651                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11652                         { "init-csum-tree", no_argument, NULL,
11653                                 GETOPT_VAL_INIT_CSUM },
11654                         { "init-extent-tree", no_argument, NULL,
11655                                 GETOPT_VAL_INIT_EXTENT },
11656                         { "check-data-csum", no_argument, NULL,
11657                                 GETOPT_VAL_CHECK_CSUM },
11658                         { "backup", no_argument, NULL, 'b' },
11659                         { "subvol-extents", required_argument, NULL, 'E' },
11660                         { "qgroup-report", no_argument, NULL, 'Q' },
11661                         { "tree-root", required_argument, NULL, 'r' },
11662                         { "chunk-root", required_argument, NULL,
11663                                 GETOPT_VAL_CHUNK_TREE },
11664                         { "progress", no_argument, NULL, 'p' },
11665                         { "mode", required_argument, NULL,
11666                                 GETOPT_VAL_MODE },
11667                         { "clear-space-cache", required_argument, NULL,
11668                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11669                         { NULL, 0, NULL, 0}
11670                 };
11671
11672                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11673                 if (c < 0)
11674                         break;
11675                 switch(c) {
11676                         case 'a': /* ignored */ break;
11677                         case 'b':
11678                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11679                                 break;
11680                         case 's':
11681                                 num = arg_strtou64(optarg);
11682                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11683                                         error(
11684                                         "super mirror should be less than %d",
11685                                                 BTRFS_SUPER_MIRROR_MAX);
11686                                         exit(1);
11687                                 }
11688                                 bytenr = btrfs_sb_offset(((int)num));
11689                                 printf("using SB copy %llu, bytenr %llu\n", num,
11690                                        (unsigned long long)bytenr);
11691                                 break;
11692                         case 'Q':
11693                                 qgroup_report = 1;
11694                                 break;
11695                         case 'E':
11696                                 subvolid = arg_strtou64(optarg);
11697                                 break;
11698                         case 'r':
11699                                 tree_root_bytenr = arg_strtou64(optarg);
11700                                 break;
11701                         case GETOPT_VAL_CHUNK_TREE:
11702                                 chunk_root_bytenr = arg_strtou64(optarg);
11703                                 break;
11704                         case 'p':
11705                                 ctx.progress_enabled = true;
11706                                 break;
11707                         case '?':
11708                         case 'h':
11709                                 usage(cmd_check_usage);
11710                         case GETOPT_VAL_REPAIR:
11711                                 printf("enabling repair mode\n");
11712                                 repair = 1;
11713                                 ctree_flags |= OPEN_CTREE_WRITES;
11714                                 break;
11715                         case GETOPT_VAL_READONLY:
11716                                 readonly = 1;
11717                                 break;
11718                         case GETOPT_VAL_INIT_CSUM:
11719                                 printf("Creating a new CRC tree\n");
11720                                 init_csum_tree = 1;
11721                                 repair = 1;
11722                                 ctree_flags |= OPEN_CTREE_WRITES;
11723                                 break;
11724                         case GETOPT_VAL_INIT_EXTENT:
11725                                 init_extent_tree = 1;
11726                                 ctree_flags |= (OPEN_CTREE_WRITES |
11727                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11728                                 repair = 1;
11729                                 break;
11730                         case GETOPT_VAL_CHECK_CSUM:
11731                                 check_data_csum = 1;
11732                                 break;
11733                         case GETOPT_VAL_MODE:
11734                                 check_mode = parse_check_mode(optarg);
11735                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11736                                         error("unknown mode: %s", optarg);
11737                                         exit(1);
11738                                 }
11739                                 break;
11740                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11741                                 if (strcmp(optarg, "v1") == 0) {
11742                                         clear_space_cache = 1;
11743                                 } else if (strcmp(optarg, "v2") == 0) {
11744                                         clear_space_cache = 2;
11745                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11746                                 } else {
11747                                         error(
11748                 "invalid argument to --clear-space-cache, must be v1 or v2");
11749                                         exit(1);
11750                                 }
11751                                 ctree_flags |= OPEN_CTREE_WRITES;
11752                                 break;
11753                 }
11754         }
11755
11756         if (check_argc_exact(argc - optind, 1))
11757                 usage(cmd_check_usage);
11758
11759         if (ctx.progress_enabled) {
11760                 ctx.tp = TASK_NOTHING;
11761                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11762         }
11763
11764         /* This check is the only reason for --readonly to exist */
11765         if (readonly && repair) {
11766                 error("repair options are not compatible with --readonly");
11767                 exit(1);
11768         }
11769
11770         /*
11771          * Not supported yet
11772          */
11773         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11774                 error("low memory mode doesn't support repair yet");
11775                 exit(1);
11776         }
11777
11778         radix_tree_init();
11779         cache_tree_init(&root_cache);
11780
11781         if((ret = check_mounted(argv[optind])) < 0) {
11782                 error("could not check mount status: %s", strerror(-ret));
11783                 goto err_out;
11784         } else if(ret) {
11785                 error("%s is currently mounted, aborting", argv[optind]);
11786                 ret = -EBUSY;
11787                 goto err_out;
11788         }
11789
11790         /* only allow partial opening under repair mode */
11791         if (repair)
11792                 ctree_flags |= OPEN_CTREE_PARTIAL;
11793
11794         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11795                                   chunk_root_bytenr, ctree_flags);
11796         if (!info) {
11797                 error("cannot open file system");
11798                 ret = -EIO;
11799                 goto err_out;
11800         }
11801
11802         global_info = info;
11803         root = info->fs_root;
11804         if (clear_space_cache == 1) {
11805                 if (btrfs_fs_compat_ro(info,
11806                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11807                         error(
11808                 "free space cache v2 detected, use --clear-space-cache v2");
11809                         ret = 1;
11810                         goto close_out;
11811                 }
11812                 printf("Clearing free space cache\n");
11813                 ret = clear_free_space_cache(info);
11814                 if (ret) {
11815                         error("failed to clear free space cache");
11816                         ret = 1;
11817                 } else {
11818                         printf("Free space cache cleared\n");
11819                 }
11820                 goto close_out;
11821         } else if (clear_space_cache == 2) {
11822                 if (!btrfs_fs_compat_ro(info,
11823                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11824                         printf("no free space cache v2 to clear\n");
11825                         ret = 0;
11826                         goto close_out;
11827                 }
11828                 printf("Clear free space cache v2\n");
11829                 ret = btrfs_clear_free_space_tree(info);
11830                 if (ret) {
11831                         error("failed to clear free space cache v2: %d", ret);
11832                         ret = 1;
11833                 } else {
11834                         printf("free space cache v2 cleared\n");
11835                 }
11836                 goto close_out;
11837         }
11838
11839         /*
11840          * repair mode will force us to commit transaction which
11841          * will make us fail to load log tree when mounting.
11842          */
11843         if (repair && btrfs_super_log_root(info->super_copy)) {
11844                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11845                 if (!ret) {
11846                         ret = 1;
11847                         goto close_out;
11848                 }
11849                 ret = zero_log_tree(root);
11850                 if (ret) {
11851                         error("failed to zero log tree: %d", ret);
11852                         goto close_out;
11853                 }
11854         }
11855
11856         uuid_unparse(info->super_copy->fsid, uuidbuf);
11857         if (qgroup_report) {
11858                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11859                        uuidbuf);
11860                 ret = qgroup_verify_all(info);
11861                 if (ret == 0)
11862                         report_qgroups(1);
11863                 goto close_out;
11864         }
11865         if (subvolid) {
11866                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11867                        subvolid, argv[optind], uuidbuf);
11868                 ret = print_extent_state(info, subvolid);
11869                 goto close_out;
11870         }
11871         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11872
11873         if (!extent_buffer_uptodate(info->tree_root->node) ||
11874             !extent_buffer_uptodate(info->dev_root->node) ||
11875             !extent_buffer_uptodate(info->chunk_root->node)) {
11876                 error("critical roots corrupted, unable to check the filesystem");
11877                 ret = -EIO;
11878                 goto close_out;
11879         }
11880
11881         if (init_extent_tree || init_csum_tree) {
11882                 struct btrfs_trans_handle *trans;
11883
11884                 trans = btrfs_start_transaction(info->extent_root, 0);
11885                 if (IS_ERR(trans)) {
11886                         error("error starting transaction");
11887                         ret = PTR_ERR(trans);
11888                         goto close_out;
11889                 }
11890
11891                 if (init_extent_tree) {
11892                         printf("Creating a new extent tree\n");
11893                         ret = reinit_extent_tree(trans, info);
11894                         if (ret)
11895                                 goto close_out;
11896                 }
11897
11898                 if (init_csum_tree) {
11899                         printf("Reinitialize checksum tree\n");
11900                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11901                         if (ret) {
11902                                 error("checksum tree initialization failed: %d",
11903                                                 ret);
11904                                 ret = -EIO;
11905                                 goto close_out;
11906                         }
11907
11908                         ret = fill_csum_tree(trans, info->csum_root,
11909                                              init_extent_tree);
11910                         if (ret) {
11911                                 error("checksum tree refilling failed: %d", ret);
11912                                 return -EIO;
11913                         }
11914                 }
11915                 /*
11916                  * Ok now we commit and run the normal fsck, which will add
11917                  * extent entries for all of the items it finds.
11918                  */
11919                 ret = btrfs_commit_transaction(trans, info->extent_root);
11920                 if (ret)
11921                         goto close_out;
11922         }
11923         if (!extent_buffer_uptodate(info->extent_root->node)) {
11924                 error("critical: extent_root, unable to check the filesystem");
11925                 ret = -EIO;
11926                 goto close_out;
11927         }
11928         if (!extent_buffer_uptodate(info->csum_root->node)) {
11929                 error("critical: csum_root, unable to check the filesystem");
11930                 ret = -EIO;
11931                 goto close_out;
11932         }
11933
11934         if (!ctx.progress_enabled)
11935                 fprintf(stderr, "checking extents\n");
11936         if (check_mode == CHECK_MODE_LOWMEM)
11937                 ret = check_chunks_and_extents_v2(root);
11938         else
11939                 ret = check_chunks_and_extents(root);
11940         if (ret)
11941                 error(
11942                 "errors found in extent allocation tree or chunk allocation");
11943
11944         ret = repair_root_items(info);
11945         if (ret < 0)
11946                 goto close_out;
11947         if (repair) {
11948                 fprintf(stderr, "Fixed %d roots.\n", ret);
11949                 ret = 0;
11950         } else if (ret > 0) {
11951                 fprintf(stderr,
11952                        "Found %d roots with an outdated root item.\n",
11953                        ret);
11954                 fprintf(stderr,
11955                         "Please run a filesystem check with the option --repair to fix them.\n");
11956                 ret = 1;
11957                 goto close_out;
11958         }
11959
11960         if (!ctx.progress_enabled) {
11961                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11962                         fprintf(stderr, "checking free space tree\n");
11963                 else
11964                         fprintf(stderr, "checking free space cache\n");
11965         }
11966         ret = check_space_cache(root);
11967         if (ret)
11968                 goto out;
11969
11970         /*
11971          * We used to have to have these hole extents in between our real
11972          * extents so if we don't have this flag set we need to make sure there
11973          * are no gaps in the file extents for inodes, otherwise we can just
11974          * ignore it when this happens.
11975          */
11976         no_holes = btrfs_fs_incompat(root->fs_info,
11977                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11978         if (!ctx.progress_enabled)
11979                 fprintf(stderr, "checking fs roots\n");
11980         ret = check_fs_roots(root, &root_cache);
11981         if (ret)
11982                 goto out;
11983
11984         fprintf(stderr, "checking csums\n");
11985         ret = check_csums(root);
11986         if (ret)
11987                 goto out;
11988
11989         fprintf(stderr, "checking root refs\n");
11990         ret = check_root_refs(root, &root_cache);
11991         if (ret)
11992                 goto out;
11993
11994         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11995                 struct extent_buffer *eb;
11996
11997                 eb = list_first_entry(&root->fs_info->recow_ebs,
11998                                       struct extent_buffer, recow);
11999                 list_del_init(&eb->recow);
12000                 ret = recow_extent_buffer(root, eb);
12001                 if (ret)
12002                         break;
12003         }
12004
12005         while (!list_empty(&delete_items)) {
12006                 struct bad_item *bad;
12007
12008                 bad = list_first_entry(&delete_items, struct bad_item, list);
12009                 list_del_init(&bad->list);
12010                 if (repair)
12011                         ret = delete_bad_item(root, bad);
12012                 free(bad);
12013         }
12014
12015         if (info->quota_enabled) {
12016                 int err;
12017                 fprintf(stderr, "checking quota groups\n");
12018                 err = qgroup_verify_all(info);
12019                 if (err)
12020                         goto out;
12021                 report_qgroups(0);
12022                 err = repair_qgroups(info, &qgroups_repaired);
12023                 if (err)
12024                         goto out;
12025         }
12026
12027         if (!list_empty(&root->fs_info->recow_ebs)) {
12028                 error("transid errors in file system");
12029                 ret = 1;
12030         }
12031 out:
12032         /* Don't override original ret */
12033         if (!ret && qgroups_repaired)
12034                 ret = qgroups_repaired;
12035
12036         if (found_old_backref) { /*
12037                  * there was a disk format change when mixed
12038                  * backref was in testing tree. The old format
12039                  * existed about one week.
12040                  */
12041                 printf("\n * Found old mixed backref format. "
12042                        "The old format is not supported! *"
12043                        "\n * Please mount the FS in readonly mode, "
12044                        "backup data and re-format the FS. *\n\n");
12045                 ret = 1;
12046         }
12047         printf("found %llu bytes used err is %d\n",
12048                (unsigned long long)bytes_used, ret);
12049         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12050         printf("total tree bytes: %llu\n",
12051                (unsigned long long)total_btree_bytes);
12052         printf("total fs tree bytes: %llu\n",
12053                (unsigned long long)total_fs_tree_bytes);
12054         printf("total extent tree bytes: %llu\n",
12055                (unsigned long long)total_extent_tree_bytes);
12056         printf("btree space waste bytes: %llu\n",
12057                (unsigned long long)btree_space_waste);
12058         printf("file data blocks allocated: %llu\n referenced %llu\n",
12059                 (unsigned long long)data_bytes_allocated,
12060                 (unsigned long long)data_bytes_referenced);
12061
12062         free_qgroup_counts();
12063         free_root_recs_tree(&root_cache);
12064 close_out:
12065         close_ctree(root);
12066 err_out:
12067         if (ctx.progress_enabled)
12068                 task_deinit(ctx.info);
12069
12070         return ret;
12071 }